lfs.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807
  1. # lfs.py -- Implementation of the LFS
  2. # Copyright (C) 2020 Jelmer Vernooij
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Git Large File Storage (LFS) support.
  22. This module provides support for Git LFS, which is a Git extension for
  23. versioning large files. It replaces large files with text pointers inside Git,
  24. while storing the file contents on a remote server.
  25. Key components:
  26. - LFS pointer file parsing and creation
  27. - LFS object storage and retrieval
  28. - HTTP client for LFS server communication
  29. - Integration with dulwich repositories
  30. """
  31. __all__ = [
  32. "FileLFSClient",
  33. "HTTPLFSClient",
  34. "LFSAction",
  35. "LFSBatchObject",
  36. "LFSBatchResponse",
  37. "LFSClient",
  38. "LFSError",
  39. "LFSErrorInfo",
  40. "LFSFilterDriver",
  41. "LFSPointer",
  42. "LFSStore",
  43. ]
  44. import hashlib
  45. import json
  46. import logging
  47. import os
  48. import tempfile
  49. from collections.abc import Iterable, Mapping
  50. from dataclasses import dataclass
  51. from typing import TYPE_CHECKING, Any, BinaryIO
  52. from urllib.parse import urljoin, urlparse
  53. from urllib.request import Request, urlopen
  54. logger = logging.getLogger(__name__)
  55. if TYPE_CHECKING:
  56. import urllib3
  57. from .config import Config
  58. from .repo import Repo
  59. @dataclass
  60. class LFSAction:
  61. """LFS action structure."""
  62. href: str
  63. header: dict[str, str] | None = None
  64. expires_at: str | None = None
  65. @dataclass
  66. class LFSErrorInfo:
  67. """LFS error structure."""
  68. code: int
  69. message: str
  70. @dataclass
  71. class LFSBatchObject:
  72. """LFS batch object structure."""
  73. oid: str
  74. size: int
  75. authenticated: bool | None = None
  76. actions: dict[str, LFSAction] | None = None
  77. error: LFSErrorInfo | None = None
  78. @dataclass
  79. class LFSBatchResponse:
  80. """LFS batch response structure."""
  81. transfer: str
  82. objects: list[LFSBatchObject]
  83. hash_algo: str | None = None
  84. class LFSStore:
  85. """Stores objects on disk, indexed by SHA256."""
  86. def __init__(self, path: str) -> None:
  87. """Initialize LFSStore."""
  88. self.path = path
  89. @classmethod
  90. def create(cls, lfs_dir: str) -> "LFSStore":
  91. """Create a new LFS store."""
  92. if not os.path.isdir(lfs_dir):
  93. os.mkdir(lfs_dir)
  94. tmp_dir = os.path.join(lfs_dir, "tmp")
  95. if not os.path.isdir(tmp_dir):
  96. os.mkdir(tmp_dir)
  97. objects_dir = os.path.join(lfs_dir, "objects")
  98. if not os.path.isdir(objects_dir):
  99. os.mkdir(objects_dir)
  100. return cls(lfs_dir)
  101. @classmethod
  102. def from_repo(cls, repo: "Repo", create: bool = False) -> "LFSStore":
  103. """Create LFS store from repository."""
  104. lfs_dir = os.path.join(repo.controldir(), "lfs")
  105. if create:
  106. return cls.create(lfs_dir)
  107. return cls(lfs_dir)
  108. @classmethod
  109. def from_controldir(cls, controldir: str, create: bool = False) -> "LFSStore":
  110. """Create LFS store from control directory."""
  111. lfs_dir = os.path.join(controldir, "lfs")
  112. if create:
  113. return cls.create(lfs_dir)
  114. return cls(lfs_dir)
  115. def _sha_path(self, sha: str) -> str:
  116. return os.path.join(self.path, "objects", sha[0:2], sha[2:4], sha)
  117. def open_object(self, sha: str) -> BinaryIO:
  118. """Open an object by sha."""
  119. try:
  120. return open(self._sha_path(sha), "rb")
  121. except FileNotFoundError as exc:
  122. raise KeyError(sha) from exc
  123. def write_object(self, chunks: Iterable[bytes]) -> str:
  124. """Write an object.
  125. Returns: object SHA
  126. """
  127. # First pass: compute SHA256 and collect data
  128. sha = hashlib.sha256()
  129. data_chunks = []
  130. for chunk in chunks:
  131. sha.update(chunk)
  132. data_chunks.append(chunk)
  133. sha_hex = sha.hexdigest()
  134. path = self._sha_path(sha_hex)
  135. # If object already exists, no need to write
  136. if os.path.exists(path):
  137. return sha_hex
  138. # Object doesn't exist, write it
  139. if not os.path.exists(os.path.dirname(path)):
  140. os.makedirs(os.path.dirname(path))
  141. tmpdir = os.path.join(self.path, "tmp")
  142. with tempfile.NamedTemporaryFile(dir=tmpdir, mode="wb", delete=False) as f:
  143. for chunk in data_chunks:
  144. f.write(chunk)
  145. f.flush()
  146. tmppath = f.name
  147. # Handle concurrent writes - if file already exists, just remove temp file
  148. if os.path.exists(path):
  149. os.remove(tmppath)
  150. else:
  151. os.rename(tmppath, path)
  152. return sha_hex
  153. class LFSPointer:
  154. """Represents an LFS pointer file."""
  155. def __init__(self, oid: str, size: int) -> None:
  156. """Initialize LFSPointer."""
  157. self.oid = oid
  158. self.size = size
  159. @classmethod
  160. def from_bytes(cls, data: bytes) -> "LFSPointer | None":
  161. """Parse LFS pointer from bytes.
  162. Returns None if data is not a valid LFS pointer.
  163. """
  164. try:
  165. text = data.decode("utf-8")
  166. except UnicodeDecodeError:
  167. return None
  168. # LFS pointer files have a specific format
  169. lines = text.strip().split("\n")
  170. if len(lines) < 3:
  171. return None
  172. # Must start with version
  173. if not lines[0].startswith("version https://git-lfs.github.com/spec/v1"):
  174. return None
  175. oid = None
  176. size = None
  177. for line in lines[1:]:
  178. if line.startswith("oid sha256:"):
  179. oid = line[11:].strip()
  180. elif line.startswith("size "):
  181. try:
  182. size = int(line[5:].strip())
  183. # Size must be non-negative
  184. if size < 0:
  185. return None
  186. except ValueError:
  187. return None
  188. if oid is None or size is None:
  189. return None
  190. return cls(oid, size)
  191. def to_bytes(self) -> bytes:
  192. """Convert LFS pointer to bytes."""
  193. return (
  194. f"version https://git-lfs.github.com/spec/v1\n"
  195. f"oid sha256:{self.oid}\n"
  196. f"size {self.size}\n"
  197. ).encode()
  198. def is_valid_oid(self) -> bool:
  199. """Check if the OID is valid SHA256."""
  200. if len(self.oid) != 64:
  201. return False
  202. try:
  203. int(self.oid, 16)
  204. return True
  205. except ValueError:
  206. return False
  207. class LFSFilterDriver:
  208. """LFS filter driver implementation."""
  209. def __init__(self, lfs_store: "LFSStore", config: "Config | None" = None) -> None:
  210. """Initialize LFSFilterDriver."""
  211. self.lfs_store = lfs_store
  212. self.config = config
  213. def clean(self, data: bytes) -> bytes:
  214. """Convert file content to LFS pointer (clean filter)."""
  215. # Check if data is already an LFS pointer
  216. pointer = LFSPointer.from_bytes(data)
  217. if pointer is not None:
  218. return data
  219. # Store the file content in LFS
  220. sha = self.lfs_store.write_object([data])
  221. # Create and return LFS pointer
  222. pointer = LFSPointer(sha, len(data))
  223. return pointer.to_bytes()
  224. def smudge(self, data: bytes, path: bytes = b"") -> bytes:
  225. """Convert LFS pointer to file content (smudge filter)."""
  226. # Try to parse as LFS pointer
  227. pointer = LFSPointer.from_bytes(data)
  228. if pointer is None:
  229. # Not an LFS pointer, return as-is
  230. return data
  231. # Validate the pointer
  232. if not pointer.is_valid_oid():
  233. return data
  234. try:
  235. # Read the actual content from LFS store
  236. with self.lfs_store.open_object(pointer.oid) as f:
  237. return f.read()
  238. except KeyError:
  239. # Object not found in LFS store, try to download it
  240. try:
  241. content = self._download_object(pointer)
  242. return content
  243. except LFSError as e:
  244. # Download failed, fall back to returning pointer
  245. logger.warning("LFS object download failed for %s: %s", pointer.oid, e)
  246. # Return pointer as-is when object is missing and download failed
  247. return data
  248. def _download_object(self, pointer: LFSPointer) -> bytes:
  249. """Download an LFS object from the server.
  250. Args:
  251. pointer: LFS pointer containing OID and size
  252. Returns:
  253. Downloaded content
  254. Raises:
  255. LFSError: If download fails for any reason
  256. """
  257. if self.config is None:
  258. raise LFSError("No configuration available for LFS download")
  259. # Create LFS client and download
  260. client = LFSClient.from_config(self.config)
  261. if client is None:
  262. raise LFSError("No LFS client available from configuration")
  263. content = client.download(pointer.oid, pointer.size)
  264. # Store the downloaded content in local LFS store
  265. stored_oid = self.lfs_store.write_object([content])
  266. # Verify the stored OID matches what we expected
  267. if stored_oid != pointer.oid:
  268. raise LFSError(
  269. f"Downloaded OID mismatch: expected {pointer.oid}, got {stored_oid}"
  270. )
  271. return content
  272. def cleanup(self) -> None:
  273. """Clean up any resources held by this filter driver."""
  274. # LFSFilterDriver doesn't hold any resources that need cleanup
  275. def reuse(self, config: "Config | None", filter_name: str) -> bool:
  276. """Check if this filter driver should be reused with the given configuration."""
  277. # LFSFilterDriver is stateless and lightweight, no need to cache
  278. return False
  279. def _get_lfs_user_agent(config: "Config | None") -> str:
  280. """Get User-Agent string for LFS requests, respecting git config."""
  281. try:
  282. if config:
  283. # Use configured user agent verbatim if set
  284. return config.get(b"http", b"useragent").decode()
  285. except KeyError:
  286. pass
  287. # Default LFS user agent (similar to git-lfs format)
  288. from . import __version__
  289. version_str = ".".join([str(x) for x in __version__])
  290. return f"git-lfs/dulwich/{version_str}"
  291. def _is_valid_lfs_url(url: str) -> bool:
  292. """Check if a URL is valid for LFS.
  293. Git LFS supports http://, https://, and file:// URLs.
  294. Args:
  295. url: URL to validate
  296. Returns:
  297. True if URL is a valid LFS URL, False otherwise
  298. """
  299. parsed = urlparse(url)
  300. # Must have a scheme
  301. if not parsed.scheme:
  302. return False
  303. # Only support http, https, and file schemes
  304. if parsed.scheme not in ("http", "https", "file"):
  305. return False
  306. # http/https require a hostname
  307. if parsed.scheme in ("http", "https"):
  308. return bool(parsed.netloc)
  309. # file:// URLs must have a path (netloc is typically empty)
  310. if parsed.scheme == "file":
  311. return bool(parsed.path)
  312. return False
  313. class LFSClient:
  314. """Base class for LFS client operations."""
  315. def __init__(self, url: str, config: "Config | None" = None) -> None:
  316. """Initialize LFS client.
  317. Args:
  318. url: LFS server URL (http://, https://, or file://)
  319. config: Optional git config for authentication/proxy settings
  320. """
  321. self._base_url = url.rstrip("/") + "/" # Ensure trailing slash for urljoin
  322. self.config = config
  323. @property
  324. def url(self) -> str:
  325. """Get the LFS server URL without trailing slash."""
  326. return self._base_url.rstrip("/")
  327. def download(self, oid: str, size: int, ref: str | None = None) -> bytes:
  328. """Download an LFS object.
  329. Args:
  330. oid: Object ID (SHA256)
  331. size: Expected size
  332. ref: Optional ref name
  333. Returns:
  334. Object content
  335. """
  336. raise NotImplementedError
  337. def upload(
  338. self, oid: str, size: int, content: bytes, ref: str | None = None
  339. ) -> None:
  340. """Upload an LFS object.
  341. Args:
  342. oid: Object ID (SHA256)
  343. size: Object size
  344. content: Object content
  345. ref: Optional ref name
  346. """
  347. raise NotImplementedError
  348. @classmethod
  349. def from_config(cls, config: "Config") -> "LFSClient | None":
  350. """Create LFS client from git config.
  351. Returns the appropriate subclass (HTTPLFSClient or FileLFSClient)
  352. based on the URL scheme.
  353. """
  354. # Try to get LFS URL from config first
  355. try:
  356. url = config.get((b"lfs",), b"url").decode()
  357. except KeyError:
  358. pass
  359. else:
  360. # Validate explicitly configured URL - raise error if invalid
  361. if not _is_valid_lfs_url(url):
  362. raise ValueError(
  363. f"Invalid lfs.url in config: {url!r}. "
  364. "URL must be an absolute URL with scheme http://, https://, or file://."
  365. )
  366. # Return appropriate client based on scheme
  367. parsed = urlparse(url)
  368. if parsed.scheme in ("http", "https"):
  369. return HTTPLFSClient(url, config)
  370. elif parsed.scheme == "file":
  371. return FileLFSClient(url, config)
  372. else:
  373. # This shouldn't happen if _is_valid_lfs_url works correctly
  374. raise ValueError(f"Unsupported LFS URL scheme: {parsed.scheme}")
  375. # Fall back to deriving from remote URL (same as git-lfs)
  376. try:
  377. remote_url = config.get((b"remote", b"origin"), b"url").decode()
  378. except KeyError:
  379. pass
  380. else:
  381. # Convert SSH URLs to HTTPS if needed
  382. if remote_url.startswith("git@"):
  383. # Convert git@host:user/repo.git to https://host/user/repo.git
  384. if ":" in remote_url and "/" in remote_url:
  385. host_and_path = remote_url[4:] # Remove "git@"
  386. if ":" in host_and_path:
  387. host, path = host_and_path.split(":", 1)
  388. remote_url = f"https://{host}/{path}"
  389. # Ensure URL ends with .git for consistent LFS endpoint
  390. if not remote_url.endswith(".git"):
  391. remote_url = f"{remote_url}.git"
  392. # Standard LFS endpoint is remote_url + "/info/lfs"
  393. lfs_url = f"{remote_url}/info/lfs"
  394. # Return None if derived URL is invalid (LFS is optional)
  395. if not _is_valid_lfs_url(lfs_url):
  396. return None
  397. # Derived URLs are always http/https
  398. return HTTPLFSClient(lfs_url, config)
  399. return None
  400. class HTTPLFSClient(LFSClient):
  401. """LFS client for HTTP/HTTPS operations."""
  402. def __init__(self, url: str, config: "Config | None" = None) -> None:
  403. """Initialize HTTP LFS client.
  404. Args:
  405. url: LFS server URL (http:// or https://)
  406. config: Optional git config for authentication/proxy settings
  407. """
  408. super().__init__(url, config)
  409. self._pool_manager: urllib3.PoolManager | None = None
  410. def _get_pool_manager(self) -> "urllib3.PoolManager":
  411. """Get urllib3 pool manager with git config applied."""
  412. if self._pool_manager is None:
  413. from dulwich.client import default_urllib3_manager
  414. self._pool_manager = default_urllib3_manager(self.config)
  415. return self._pool_manager
  416. def _make_request(
  417. self,
  418. method: str,
  419. path: str,
  420. data: bytes | None = None,
  421. headers: dict[str, str] | None = None,
  422. ) -> bytes:
  423. """Make an HTTP request to the LFS server."""
  424. url = urljoin(self._base_url, path)
  425. req_headers = {
  426. "Accept": "application/vnd.git-lfs+json",
  427. "Content-Type": "application/vnd.git-lfs+json",
  428. "User-Agent": _get_lfs_user_agent(self.config),
  429. }
  430. if headers:
  431. req_headers.update(headers)
  432. # Use urllib3 pool manager with git config applied
  433. pool_manager = self._get_pool_manager()
  434. response = pool_manager.request(method, url, headers=req_headers, body=data)
  435. if response.status >= 400:
  436. raise ValueError(
  437. f"HTTP {response.status}: {response.data.decode('utf-8', errors='ignore')}"
  438. )
  439. return response.data
  440. def batch(
  441. self,
  442. operation: str,
  443. objects: list[dict[str, str | int]],
  444. ref: str | None = None,
  445. ) -> LFSBatchResponse:
  446. """Perform batch operation to get transfer URLs.
  447. Args:
  448. operation: "download" or "upload"
  449. objects: List of {"oid": str, "size": int} dicts
  450. ref: Optional ref name
  451. Returns:
  452. Batch response from server
  453. """
  454. data: dict[
  455. str, str | list[str] | list[dict[str, str | int]] | dict[str, str]
  456. ] = {
  457. "operation": operation,
  458. "transfers": ["basic"],
  459. "objects": objects,
  460. }
  461. if ref:
  462. data["ref"] = {"name": ref}
  463. response = self._make_request(
  464. "POST", "objects/batch", json.dumps(data).encode("utf-8")
  465. )
  466. if not response:
  467. raise ValueError("Empty response from LFS server")
  468. response_data = json.loads(response)
  469. return self._parse_batch_response(response_data)
  470. def _parse_batch_response(self, data: Mapping[str, Any]) -> LFSBatchResponse:
  471. """Parse JSON response into LFSBatchResponse dataclass."""
  472. objects = []
  473. for obj_data in data.get("objects", []):
  474. actions = None
  475. if "actions" in obj_data:
  476. actions = {}
  477. for action_name, action_data in obj_data["actions"].items():
  478. actions[action_name] = LFSAction(
  479. href=action_data["href"],
  480. header=action_data.get("header"),
  481. expires_at=action_data.get("expires_at"),
  482. )
  483. error = None
  484. if "error" in obj_data:
  485. error = LFSErrorInfo(
  486. code=obj_data["error"]["code"], message=obj_data["error"]["message"]
  487. )
  488. batch_obj = LFSBatchObject(
  489. oid=obj_data["oid"],
  490. size=obj_data["size"],
  491. authenticated=obj_data.get("authenticated"),
  492. actions=actions,
  493. error=error,
  494. )
  495. objects.append(batch_obj)
  496. return LFSBatchResponse(
  497. transfer=data.get("transfer", "basic"),
  498. objects=objects,
  499. hash_algo=data.get("hash_algo"),
  500. )
  501. def download(self, oid: str, size: int, ref: str | None = None) -> bytes:
  502. """Download an LFS object.
  503. Args:
  504. oid: Object ID (SHA256)
  505. size: Expected size
  506. ref: Optional ref name
  507. Returns:
  508. Object content
  509. """
  510. # Get download URL via batch API
  511. batch_resp = self.batch("download", [{"oid": oid, "size": size}], ref)
  512. if not batch_resp.objects:
  513. raise LFSError(f"No objects returned for {oid}")
  514. obj = batch_resp.objects[0]
  515. if obj.error:
  516. raise LFSError(f"Server error for {oid}: {obj.error.message}")
  517. if not obj.actions or "download" not in obj.actions:
  518. raise LFSError(f"No download actions for {oid}")
  519. download_action = obj.actions["download"]
  520. download_url = download_action.href
  521. # Download the object using urllib3 with git config
  522. download_headers = {"User-Agent": _get_lfs_user_agent(self.config)}
  523. if download_action.header:
  524. download_headers.update(download_action.header)
  525. pool_manager = self._get_pool_manager()
  526. response = pool_manager.request("GET", download_url, headers=download_headers)
  527. content = response.data
  528. # Verify size
  529. if len(content) != size:
  530. raise LFSError(f"Downloaded size {len(content)} != expected {size}")
  531. # Verify SHA256
  532. actual_oid = hashlib.sha256(content).hexdigest()
  533. if actual_oid != oid:
  534. raise LFSError(f"Downloaded OID {actual_oid} != expected {oid}")
  535. return content
  536. def upload(
  537. self, oid: str, size: int, content: bytes, ref: str | None = None
  538. ) -> None:
  539. """Upload an LFS object.
  540. Args:
  541. oid: Object ID (SHA256)
  542. size: Object size
  543. content: Object content
  544. ref: Optional ref name
  545. """
  546. # Get upload URL via batch API
  547. batch_resp = self.batch("upload", [{"oid": oid, "size": size}], ref)
  548. if not batch_resp.objects:
  549. raise LFSError(f"No objects returned for {oid}")
  550. obj = batch_resp.objects[0]
  551. if obj.error:
  552. raise LFSError(f"Server error for {oid}: {obj.error.message}")
  553. # If no actions, object already exists
  554. if not obj.actions:
  555. return
  556. if "upload" not in obj.actions:
  557. raise LFSError(f"No upload action for {oid}")
  558. upload_action = obj.actions["upload"]
  559. upload_url = upload_action.href
  560. # Upload the object
  561. req = Request(upload_url, data=content, method="PUT")
  562. if upload_action.header:
  563. for name, value in upload_action.header.items():
  564. req.add_header(name, value)
  565. with urlopen(req) as response:
  566. if response.status >= 400:
  567. raise LFSError(f"Upload failed with status {response.status}")
  568. # Verify if needed
  569. if obj.actions and "verify" in obj.actions:
  570. verify_action = obj.actions["verify"]
  571. verify_data = json.dumps({"oid": oid, "size": size}).encode("utf-8")
  572. req = Request(verify_action.href, data=verify_data, method="POST")
  573. req.add_header("Content-Type", "application/vnd.git-lfs+json")
  574. if verify_action.header:
  575. for name, value in verify_action.header.items():
  576. req.add_header(name, value)
  577. with urlopen(req) as response:
  578. if response.status >= 400:
  579. raise LFSError(f"Verification failed with status {response.status}")
  580. class FileLFSClient(LFSClient):
  581. """LFS client for file:// URLs that accesses local filesystem."""
  582. def __init__(self, url: str, config: "Config | None" = None) -> None:
  583. """Initialize File LFS client.
  584. Args:
  585. url: LFS server URL (file://)
  586. config: Optional git config (unused for file:// URLs)
  587. """
  588. super().__init__(url, config)
  589. # Convert file:// URL to filesystem path
  590. from urllib.request import url2pathname
  591. parsed = urlparse(url)
  592. if parsed.scheme != "file":
  593. raise ValueError(f"FileLFSClient requires file:// URL, got {url!r}")
  594. # url2pathname handles the conversion properly across platforms
  595. path = url2pathname(parsed.path)
  596. self._local_store = LFSStore(path)
  597. def download(self, oid: str, size: int, ref: str | None = None) -> bytes:
  598. """Download an LFS object from local filesystem.
  599. Args:
  600. oid: Object ID (SHA256)
  601. size: Expected size
  602. ref: Optional ref name (ignored for file:// URLs)
  603. Returns:
  604. Object content
  605. Raises:
  606. LFSError: If object not found or size mismatch
  607. """
  608. try:
  609. with self._local_store.open_object(oid) as f:
  610. content = f.read()
  611. except KeyError as exc:
  612. raise LFSError(f"Object not found: {oid}") from exc
  613. # Verify size
  614. if len(content) != size:
  615. raise LFSError(f"Size mismatch: expected {size}, got {len(content)}")
  616. # Verify SHA256
  617. actual_oid = hashlib.sha256(content).hexdigest()
  618. if actual_oid != oid:
  619. raise LFSError(f"OID mismatch: expected {oid}, got {actual_oid}")
  620. return content
  621. def upload(
  622. self, oid: str, size: int, content: bytes, ref: str | None = None
  623. ) -> None:
  624. """Upload an LFS object to local filesystem.
  625. Args:
  626. oid: Object ID (SHA256)
  627. size: Object size
  628. content: Object content
  629. ref: Optional ref name (ignored for file:// URLs)
  630. Raises:
  631. LFSError: If size or OID mismatch
  632. """
  633. # Verify size
  634. if len(content) != size:
  635. raise LFSError(f"Size mismatch: expected {size}, got {len(content)}")
  636. # Verify SHA256
  637. actual_oid = hashlib.sha256(content).hexdigest()
  638. if actual_oid != oid:
  639. raise LFSError(f"OID mismatch: expected {oid}, got {actual_oid}")
  640. # Store the object
  641. stored_oid = self._local_store.write_object([content])
  642. if stored_oid != oid:
  643. raise LFSError(f"Storage OID mismatch: expected {oid}, got {stored_oid}")
  644. class LFSError(Exception):
  645. """LFS-specific error."""