lfs.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795
  1. # lfs.py -- Implementation of the LFS
  2. # Copyright (C) 2020 Jelmer Vernooij
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Git Large File Storage (LFS) support.
  22. This module provides support for Git LFS, which is a Git extension for
  23. versioning large files. It replaces large files with text pointers inside Git,
  24. while storing the file contents on a remote server.
  25. Key components:
  26. - LFS pointer file parsing and creation
  27. - LFS object storage and retrieval
  28. - HTTP client for LFS server communication
  29. - Integration with dulwich repositories
  30. """
  31. import hashlib
  32. import json
  33. import logging
  34. import os
  35. import tempfile
  36. from collections.abc import Iterable, Mapping
  37. from dataclasses import dataclass
  38. from typing import TYPE_CHECKING, Any, BinaryIO, Optional
  39. from urllib.parse import urljoin, urlparse
  40. from urllib.request import Request, urlopen
  41. logger = logging.getLogger(__name__)
  42. if TYPE_CHECKING:
  43. import urllib3
  44. from .config import Config
  45. from .repo import Repo
  46. @dataclass
  47. class LFSAction:
  48. """LFS action structure."""
  49. href: str
  50. header: dict[str, str] | None = None
  51. expires_at: str | None = None
  52. @dataclass
  53. class LFSErrorInfo:
  54. """LFS error structure."""
  55. code: int
  56. message: str
  57. @dataclass
  58. class LFSBatchObject:
  59. """LFS batch object structure."""
  60. oid: str
  61. size: int
  62. authenticated: bool | None = None
  63. actions: dict[str, LFSAction] | None = None
  64. error: LFSErrorInfo | None = None
  65. @dataclass
  66. class LFSBatchResponse:
  67. """LFS batch response structure."""
  68. transfer: str
  69. objects: list[LFSBatchObject]
  70. hash_algo: str | None = None
  71. class LFSStore:
  72. """Stores objects on disk, indexed by SHA256."""
  73. def __init__(self, path: str) -> None:
  74. """Initialize LFSStore."""
  75. self.path = path
  76. @classmethod
  77. def create(cls, lfs_dir: str) -> "LFSStore":
  78. """Create a new LFS store."""
  79. if not os.path.isdir(lfs_dir):
  80. os.mkdir(lfs_dir)
  81. tmp_dir = os.path.join(lfs_dir, "tmp")
  82. if not os.path.isdir(tmp_dir):
  83. os.mkdir(tmp_dir)
  84. objects_dir = os.path.join(lfs_dir, "objects")
  85. if not os.path.isdir(objects_dir):
  86. os.mkdir(objects_dir)
  87. return cls(lfs_dir)
  88. @classmethod
  89. def from_repo(cls, repo: "Repo", create: bool = False) -> "LFSStore":
  90. """Create LFS store from repository."""
  91. lfs_dir = os.path.join(repo.controldir(), "lfs")
  92. if create:
  93. return cls.create(lfs_dir)
  94. return cls(lfs_dir)
  95. @classmethod
  96. def from_controldir(cls, controldir: str, create: bool = False) -> "LFSStore":
  97. """Create LFS store from control directory."""
  98. lfs_dir = os.path.join(controldir, "lfs")
  99. if create:
  100. return cls.create(lfs_dir)
  101. return cls(lfs_dir)
  102. def _sha_path(self, sha: str) -> str:
  103. return os.path.join(self.path, "objects", sha[0:2], sha[2:4], sha)
  104. def open_object(self, sha: str) -> BinaryIO:
  105. """Open an object by sha."""
  106. try:
  107. return open(self._sha_path(sha), "rb")
  108. except FileNotFoundError as exc:
  109. raise KeyError(sha) from exc
  110. def write_object(self, chunks: Iterable[bytes]) -> str:
  111. """Write an object.
  112. Returns: object SHA
  113. """
  114. # First pass: compute SHA256 and collect data
  115. sha = hashlib.sha256()
  116. data_chunks = []
  117. for chunk in chunks:
  118. sha.update(chunk)
  119. data_chunks.append(chunk)
  120. sha_hex = sha.hexdigest()
  121. path = self._sha_path(sha_hex)
  122. # If object already exists, no need to write
  123. if os.path.exists(path):
  124. return sha_hex
  125. # Object doesn't exist, write it
  126. if not os.path.exists(os.path.dirname(path)):
  127. os.makedirs(os.path.dirname(path))
  128. tmpdir = os.path.join(self.path, "tmp")
  129. with tempfile.NamedTemporaryFile(dir=tmpdir, mode="wb", delete=False) as f:
  130. for chunk in data_chunks:
  131. f.write(chunk)
  132. f.flush()
  133. tmppath = f.name
  134. # Handle concurrent writes - if file already exists, just remove temp file
  135. if os.path.exists(path):
  136. os.remove(tmppath)
  137. else:
  138. os.rename(tmppath, path)
  139. return sha_hex
  140. class LFSPointer:
  141. """Represents an LFS pointer file."""
  142. def __init__(self, oid: str, size: int) -> None:
  143. """Initialize LFSPointer."""
  144. self.oid = oid
  145. self.size = size
  146. @classmethod
  147. def from_bytes(cls, data: bytes) -> Optional["LFSPointer"]:
  148. """Parse LFS pointer from bytes.
  149. Returns None if data is not a valid LFS pointer.
  150. """
  151. try:
  152. text = data.decode("utf-8")
  153. except UnicodeDecodeError:
  154. return None
  155. # LFS pointer files have a specific format
  156. lines = text.strip().split("\n")
  157. if len(lines) < 3:
  158. return None
  159. # Must start with version
  160. if not lines[0].startswith("version https://git-lfs.github.com/spec/v1"):
  161. return None
  162. oid = None
  163. size = None
  164. for line in lines[1:]:
  165. if line.startswith("oid sha256:"):
  166. oid = line[11:].strip()
  167. elif line.startswith("size "):
  168. try:
  169. size = int(line[5:].strip())
  170. # Size must be non-negative
  171. if size < 0:
  172. return None
  173. except ValueError:
  174. return None
  175. if oid is None or size is None:
  176. return None
  177. return cls(oid, size)
  178. def to_bytes(self) -> bytes:
  179. """Convert LFS pointer to bytes."""
  180. return (
  181. f"version https://git-lfs.github.com/spec/v1\n"
  182. f"oid sha256:{self.oid}\n"
  183. f"size {self.size}\n"
  184. ).encode()
  185. def is_valid_oid(self) -> bool:
  186. """Check if the OID is valid SHA256."""
  187. if len(self.oid) != 64:
  188. return False
  189. try:
  190. int(self.oid, 16)
  191. return True
  192. except ValueError:
  193. return False
  194. class LFSFilterDriver:
  195. """LFS filter driver implementation."""
  196. def __init__(
  197. self, lfs_store: "LFSStore", config: Optional["Config"] = None
  198. ) -> None:
  199. """Initialize LFSFilterDriver."""
  200. self.lfs_store = lfs_store
  201. self.config = config
  202. def clean(self, data: bytes) -> bytes:
  203. """Convert file content to LFS pointer (clean filter)."""
  204. # Check if data is already an LFS pointer
  205. pointer = LFSPointer.from_bytes(data)
  206. if pointer is not None:
  207. return data
  208. # Store the file content in LFS
  209. sha = self.lfs_store.write_object([data])
  210. # Create and return LFS pointer
  211. pointer = LFSPointer(sha, len(data))
  212. return pointer.to_bytes()
  213. def smudge(self, data: bytes, path: bytes = b"") -> bytes:
  214. """Convert LFS pointer to file content (smudge filter)."""
  215. # Try to parse as LFS pointer
  216. pointer = LFSPointer.from_bytes(data)
  217. if pointer is None:
  218. # Not an LFS pointer, return as-is
  219. return data
  220. # Validate the pointer
  221. if not pointer.is_valid_oid():
  222. return data
  223. try:
  224. # Read the actual content from LFS store
  225. with self.lfs_store.open_object(pointer.oid) as f:
  226. return f.read()
  227. except KeyError:
  228. # Object not found in LFS store, try to download it
  229. try:
  230. content = self._download_object(pointer)
  231. return content
  232. except LFSError as e:
  233. # Download failed, fall back to returning pointer
  234. logger.warning("LFS object download failed for %s: %s", pointer.oid, e)
  235. # Return pointer as-is when object is missing and download failed
  236. return data
  237. def _download_object(self, pointer: LFSPointer) -> bytes:
  238. """Download an LFS object from the server.
  239. Args:
  240. pointer: LFS pointer containing OID and size
  241. Returns:
  242. Downloaded content
  243. Raises:
  244. LFSError: If download fails for any reason
  245. """
  246. if self.config is None:
  247. raise LFSError("No configuration available for LFS download")
  248. # Create LFS client and download
  249. client = LFSClient.from_config(self.config)
  250. if client is None:
  251. raise LFSError("No LFS client available from configuration")
  252. content = client.download(pointer.oid, pointer.size)
  253. # Store the downloaded content in local LFS store
  254. stored_oid = self.lfs_store.write_object([content])
  255. # Verify the stored OID matches what we expected
  256. if stored_oid != pointer.oid:
  257. raise LFSError(
  258. f"Downloaded OID mismatch: expected {pointer.oid}, got {stored_oid}"
  259. )
  260. return content
  261. def cleanup(self) -> None:
  262. """Clean up any resources held by this filter driver."""
  263. # LFSFilterDriver doesn't hold any resources that need cleanup
  264. def reuse(self, config: Optional["Config"], filter_name: str) -> bool:
  265. """Check if this filter driver should be reused with the given configuration."""
  266. # LFSFilterDriver is stateless and lightweight, no need to cache
  267. return False
  268. def _get_lfs_user_agent(config: Optional["Config"]) -> str:
  269. """Get User-Agent string for LFS requests, respecting git config."""
  270. try:
  271. if config:
  272. # Use configured user agent verbatim if set
  273. return config.get(b"http", b"useragent").decode()
  274. except KeyError:
  275. pass
  276. # Default LFS user agent (similar to git-lfs format)
  277. from . import __version__
  278. version_str = ".".join([str(x) for x in __version__])
  279. return f"git-lfs/dulwich/{version_str}"
  280. def _is_valid_lfs_url(url: str) -> bool:
  281. """Check if a URL is valid for LFS.
  282. Git LFS supports http://, https://, and file:// URLs.
  283. Args:
  284. url: URL to validate
  285. Returns:
  286. True if URL is a valid LFS URL, False otherwise
  287. """
  288. parsed = urlparse(url)
  289. # Must have a scheme
  290. if not parsed.scheme:
  291. return False
  292. # Only support http, https, and file schemes
  293. if parsed.scheme not in ("http", "https", "file"):
  294. return False
  295. # http/https require a hostname
  296. if parsed.scheme in ("http", "https"):
  297. return bool(parsed.netloc)
  298. # file:// URLs must have a path (netloc is typically empty)
  299. if parsed.scheme == "file":
  300. return bool(parsed.path)
  301. return False
  302. class LFSClient:
  303. """Base class for LFS client operations."""
  304. def __init__(self, url: str, config: Optional["Config"] = None) -> None:
  305. """Initialize LFS client.
  306. Args:
  307. url: LFS server URL (http://, https://, or file://)
  308. config: Optional git config for authentication/proxy settings
  309. """
  310. self._base_url = url.rstrip("/") + "/" # Ensure trailing slash for urljoin
  311. self.config = config
  312. @property
  313. def url(self) -> str:
  314. """Get the LFS server URL without trailing slash."""
  315. return self._base_url.rstrip("/")
  316. def download(self, oid: str, size: int, ref: str | None = None) -> bytes:
  317. """Download an LFS object.
  318. Args:
  319. oid: Object ID (SHA256)
  320. size: Expected size
  321. ref: Optional ref name
  322. Returns:
  323. Object content
  324. """
  325. raise NotImplementedError
  326. def upload(
  327. self, oid: str, size: int, content: bytes, ref: str | None = None
  328. ) -> None:
  329. """Upload an LFS object.
  330. Args:
  331. oid: Object ID (SHA256)
  332. size: Object size
  333. content: Object content
  334. ref: Optional ref name
  335. """
  336. raise NotImplementedError
  337. @classmethod
  338. def from_config(cls, config: "Config") -> Optional["LFSClient"]:
  339. """Create LFS client from git config.
  340. Returns the appropriate subclass (HTTPLFSClient or FileLFSClient)
  341. based on the URL scheme.
  342. """
  343. # Try to get LFS URL from config first
  344. try:
  345. url = config.get((b"lfs",), b"url").decode()
  346. except KeyError:
  347. pass
  348. else:
  349. # Validate explicitly configured URL - raise error if invalid
  350. if not _is_valid_lfs_url(url):
  351. raise ValueError(
  352. f"Invalid lfs.url in config: {url!r}. "
  353. "URL must be an absolute URL with scheme http://, https://, or file://."
  354. )
  355. # Return appropriate client based on scheme
  356. parsed = urlparse(url)
  357. if parsed.scheme in ("http", "https"):
  358. return HTTPLFSClient(url, config)
  359. elif parsed.scheme == "file":
  360. return FileLFSClient(url, config)
  361. else:
  362. # This shouldn't happen if _is_valid_lfs_url works correctly
  363. raise ValueError(f"Unsupported LFS URL scheme: {parsed.scheme}")
  364. # Fall back to deriving from remote URL (same as git-lfs)
  365. try:
  366. remote_url = config.get((b"remote", b"origin"), b"url").decode()
  367. except KeyError:
  368. pass
  369. else:
  370. # Convert SSH URLs to HTTPS if needed
  371. if remote_url.startswith("git@"):
  372. # Convert git@host:user/repo.git to https://host/user/repo.git
  373. if ":" in remote_url and "/" in remote_url:
  374. host_and_path = remote_url[4:] # Remove "git@"
  375. if ":" in host_and_path:
  376. host, path = host_and_path.split(":", 1)
  377. remote_url = f"https://{host}/{path}"
  378. # Ensure URL ends with .git for consistent LFS endpoint
  379. if not remote_url.endswith(".git"):
  380. remote_url = f"{remote_url}.git"
  381. # Standard LFS endpoint is remote_url + "/info/lfs"
  382. lfs_url = f"{remote_url}/info/lfs"
  383. # Return None if derived URL is invalid (LFS is optional)
  384. if not _is_valid_lfs_url(lfs_url):
  385. return None
  386. # Derived URLs are always http/https
  387. return HTTPLFSClient(lfs_url, config)
  388. return None
  389. class HTTPLFSClient(LFSClient):
  390. """LFS client for HTTP/HTTPS operations."""
  391. def __init__(self, url: str, config: Optional["Config"] = None) -> None:
  392. """Initialize HTTP LFS client.
  393. Args:
  394. url: LFS server URL (http:// or https://)
  395. config: Optional git config for authentication/proxy settings
  396. """
  397. super().__init__(url, config)
  398. self._pool_manager: urllib3.PoolManager | None = None
  399. def _get_pool_manager(self) -> "urllib3.PoolManager":
  400. """Get urllib3 pool manager with git config applied."""
  401. if self._pool_manager is None:
  402. from dulwich.client import default_urllib3_manager
  403. self._pool_manager = default_urllib3_manager(self.config)
  404. return self._pool_manager
  405. def _make_request(
  406. self,
  407. method: str,
  408. path: str,
  409. data: bytes | None = None,
  410. headers: dict[str, str] | None = None,
  411. ) -> bytes:
  412. """Make an HTTP request to the LFS server."""
  413. url = urljoin(self._base_url, path)
  414. req_headers = {
  415. "Accept": "application/vnd.git-lfs+json",
  416. "Content-Type": "application/vnd.git-lfs+json",
  417. "User-Agent": _get_lfs_user_agent(self.config),
  418. }
  419. if headers:
  420. req_headers.update(headers)
  421. # Use urllib3 pool manager with git config applied
  422. pool_manager = self._get_pool_manager()
  423. response = pool_manager.request(method, url, headers=req_headers, body=data)
  424. if response.status >= 400:
  425. raise ValueError(
  426. f"HTTP {response.status}: {response.data.decode('utf-8', errors='ignore')}"
  427. )
  428. return response.data
  429. def batch(
  430. self,
  431. operation: str,
  432. objects: list[dict[str, str | int]],
  433. ref: str | None = None,
  434. ) -> LFSBatchResponse:
  435. """Perform batch operation to get transfer URLs.
  436. Args:
  437. operation: "download" or "upload"
  438. objects: List of {"oid": str, "size": int} dicts
  439. ref: Optional ref name
  440. Returns:
  441. Batch response from server
  442. """
  443. data: dict[
  444. str, str | list[str] | list[dict[str, str | int]] | dict[str, str]
  445. ] = {
  446. "operation": operation,
  447. "transfers": ["basic"],
  448. "objects": objects,
  449. }
  450. if ref:
  451. data["ref"] = {"name": ref}
  452. response = self._make_request(
  453. "POST", "objects/batch", json.dumps(data).encode("utf-8")
  454. )
  455. if not response:
  456. raise ValueError("Empty response from LFS server")
  457. response_data = json.loads(response)
  458. return self._parse_batch_response(response_data)
  459. def _parse_batch_response(self, data: Mapping[str, Any]) -> LFSBatchResponse:
  460. """Parse JSON response into LFSBatchResponse dataclass."""
  461. objects = []
  462. for obj_data in data.get("objects", []):
  463. actions = None
  464. if "actions" in obj_data:
  465. actions = {}
  466. for action_name, action_data in obj_data["actions"].items():
  467. actions[action_name] = LFSAction(
  468. href=action_data["href"],
  469. header=action_data.get("header"),
  470. expires_at=action_data.get("expires_at"),
  471. )
  472. error = None
  473. if "error" in obj_data:
  474. error = LFSErrorInfo(
  475. code=obj_data["error"]["code"], message=obj_data["error"]["message"]
  476. )
  477. batch_obj = LFSBatchObject(
  478. oid=obj_data["oid"],
  479. size=obj_data["size"],
  480. authenticated=obj_data.get("authenticated"),
  481. actions=actions,
  482. error=error,
  483. )
  484. objects.append(batch_obj)
  485. return LFSBatchResponse(
  486. transfer=data.get("transfer", "basic"),
  487. objects=objects,
  488. hash_algo=data.get("hash_algo"),
  489. )
  490. def download(self, oid: str, size: int, ref: str | None = None) -> bytes:
  491. """Download an LFS object.
  492. Args:
  493. oid: Object ID (SHA256)
  494. size: Expected size
  495. ref: Optional ref name
  496. Returns:
  497. Object content
  498. """
  499. # Get download URL via batch API
  500. batch_resp = self.batch("download", [{"oid": oid, "size": size}], ref)
  501. if not batch_resp.objects:
  502. raise LFSError(f"No objects returned for {oid}")
  503. obj = batch_resp.objects[0]
  504. if obj.error:
  505. raise LFSError(f"Server error for {oid}: {obj.error.message}")
  506. if not obj.actions or "download" not in obj.actions:
  507. raise LFSError(f"No download actions for {oid}")
  508. download_action = obj.actions["download"]
  509. download_url = download_action.href
  510. # Download the object using urllib3 with git config
  511. download_headers = {"User-Agent": _get_lfs_user_agent(self.config)}
  512. if download_action.header:
  513. download_headers.update(download_action.header)
  514. pool_manager = self._get_pool_manager()
  515. response = pool_manager.request("GET", download_url, headers=download_headers)
  516. content = response.data
  517. # Verify size
  518. if len(content) != size:
  519. raise LFSError(f"Downloaded size {len(content)} != expected {size}")
  520. # Verify SHA256
  521. actual_oid = hashlib.sha256(content).hexdigest()
  522. if actual_oid != oid:
  523. raise LFSError(f"Downloaded OID {actual_oid} != expected {oid}")
  524. return content
  525. def upload(
  526. self, oid: str, size: int, content: bytes, ref: str | None = None
  527. ) -> None:
  528. """Upload an LFS object.
  529. Args:
  530. oid: Object ID (SHA256)
  531. size: Object size
  532. content: Object content
  533. ref: Optional ref name
  534. """
  535. # Get upload URL via batch API
  536. batch_resp = self.batch("upload", [{"oid": oid, "size": size}], ref)
  537. if not batch_resp.objects:
  538. raise LFSError(f"No objects returned for {oid}")
  539. obj = batch_resp.objects[0]
  540. if obj.error:
  541. raise LFSError(f"Server error for {oid}: {obj.error.message}")
  542. # If no actions, object already exists
  543. if not obj.actions:
  544. return
  545. if "upload" not in obj.actions:
  546. raise LFSError(f"No upload action for {oid}")
  547. upload_action = obj.actions["upload"]
  548. upload_url = upload_action.href
  549. # Upload the object
  550. req = Request(upload_url, data=content, method="PUT")
  551. if upload_action.header:
  552. for name, value in upload_action.header.items():
  553. req.add_header(name, value)
  554. with urlopen(req) as response:
  555. if response.status >= 400:
  556. raise LFSError(f"Upload failed with status {response.status}")
  557. # Verify if needed
  558. if obj.actions and "verify" in obj.actions:
  559. verify_action = obj.actions["verify"]
  560. verify_data = json.dumps({"oid": oid, "size": size}).encode("utf-8")
  561. req = Request(verify_action.href, data=verify_data, method="POST")
  562. req.add_header("Content-Type", "application/vnd.git-lfs+json")
  563. if verify_action.header:
  564. for name, value in verify_action.header.items():
  565. req.add_header(name, value)
  566. with urlopen(req) as response:
  567. if response.status >= 400:
  568. raise LFSError(f"Verification failed with status {response.status}")
  569. class FileLFSClient(LFSClient):
  570. """LFS client for file:// URLs that accesses local filesystem."""
  571. def __init__(self, url: str, config: Optional["Config"] = None) -> None:
  572. """Initialize File LFS client.
  573. Args:
  574. url: LFS server URL (file://)
  575. config: Optional git config (unused for file:// URLs)
  576. """
  577. super().__init__(url, config)
  578. # Convert file:// URL to filesystem path
  579. from urllib.request import url2pathname
  580. parsed = urlparse(url)
  581. if parsed.scheme != "file":
  582. raise ValueError(f"FileLFSClient requires file:// URL, got {url!r}")
  583. # url2pathname handles the conversion properly across platforms
  584. path = url2pathname(parsed.path)
  585. self._local_store = LFSStore(path)
  586. def download(self, oid: str, size: int, ref: str | None = None) -> bytes:
  587. """Download an LFS object from local filesystem.
  588. Args:
  589. oid: Object ID (SHA256)
  590. size: Expected size
  591. ref: Optional ref name (ignored for file:// URLs)
  592. Returns:
  593. Object content
  594. Raises:
  595. LFSError: If object not found or size mismatch
  596. """
  597. try:
  598. with self._local_store.open_object(oid) as f:
  599. content = f.read()
  600. except KeyError as exc:
  601. raise LFSError(f"Object not found: {oid}") from exc
  602. # Verify size
  603. if len(content) != size:
  604. raise LFSError(f"Size mismatch: expected {size}, got {len(content)}")
  605. # Verify SHA256
  606. actual_oid = hashlib.sha256(content).hexdigest()
  607. if actual_oid != oid:
  608. raise LFSError(f"OID mismatch: expected {oid}, got {actual_oid}")
  609. return content
  610. def upload(
  611. self, oid: str, size: int, content: bytes, ref: str | None = None
  612. ) -> None:
  613. """Upload an LFS object to local filesystem.
  614. Args:
  615. oid: Object ID (SHA256)
  616. size: Object size
  617. content: Object content
  618. ref: Optional ref name (ignored for file:// URLs)
  619. Raises:
  620. LFSError: If size or OID mismatch
  621. """
  622. # Verify size
  623. if len(content) != size:
  624. raise LFSError(f"Size mismatch: expected {size}, got {len(content)}")
  625. # Verify SHA256
  626. actual_oid = hashlib.sha256(content).hexdigest()
  627. if actual_oid != oid:
  628. raise LFSError(f"OID mismatch: expected {oid}, got {actual_oid}")
  629. # Store the object
  630. stored_oid = self._local_store.write_object([content])
  631. if stored_oid != oid:
  632. raise LFSError(f"Storage OID mismatch: expected {oid}, got {stored_oid}")
  633. class LFSError(Exception):
  634. """LFS-specific error."""