ソースを参照

And support for local files in LFS (#1953)

Jelmer Vernooij 3 ヶ月 前
コミット
ac79684cdc
3 ファイル変更385 行追加15 行削除
  1. 5 0
      NEWS
  2. 186 12
      dulwich/lfs.py
  3. 194 3
      tests/test_lfs.py

+ 5 - 0
NEWS

@@ -11,6 +11,11 @@
  * Fix passing ssh_command, password, and key_filename parameters to the SSH
    vendor. Regression from 0.24.2.  (Jelmer Vernooij, #1945)
 
+ * Fix LFS URL validation to prevent DNS resolution errors when ``lfs.url`` is
+   configured with an invalid value. Implement full support for ``file://`` URLs
+   to access local LFS repositories, matching git-lfs behavior.
+   (Jelmer Vernooij, #1951)
+
 0.24.6	2025-10-19
 
  * Fix import failure when ``sys.stdin`` is ``None``. The ``dulwich.server``

+ 186 - 12
dulwich/lfs.py

@@ -350,30 +350,111 @@ def _get_lfs_user_agent(config: Optional["Config"]) -> str:
     return f"git-lfs/dulwich/{version_str}"
 
 
+def _is_valid_lfs_url(url: str) -> bool:
+    """Check if a URL is valid for LFS.
+
+    Git LFS supports http://, https://, and file:// URLs.
+
+    Args:
+        url: URL to validate
+
+    Returns:
+        True if URL is a valid LFS URL, False otherwise
+    """
+    parsed = urlparse(url)
+
+    # Must have a scheme
+    if not parsed.scheme:
+        return False
+
+    # Only support http, https, and file schemes
+    if parsed.scheme not in ("http", "https", "file"):
+        return False
+
+    # http/https require a hostname
+    if parsed.scheme in ("http", "https"):
+        return bool(parsed.netloc)
+
+    # file:// URLs must have a path (netloc is typically empty)
+    if parsed.scheme == "file":
+        return bool(parsed.path)
+
+    return False
+
+
 class LFSClient:
-    """LFS client for network operations."""
+    """Base class for LFS client operations."""
 
     def __init__(self, url: str, config: Optional["Config"] = None) -> None:
         """Initialize LFS client.
 
         Args:
-            url: LFS server URL
+            url: LFS server URL (http://, https://, or file://)
             config: Optional git config for authentication/proxy settings
         """
         self._base_url = url.rstrip("/") + "/"  # Ensure trailing slash for urljoin
         self.config = config
-        self._pool_manager: Optional[urllib3.PoolManager] = None
+
+    @property
+    def url(self) -> str:
+        """Get the LFS server URL without trailing slash."""
+        return self._base_url.rstrip("/")
+
+    def download(self, oid: str, size: int, ref: Optional[str] = None) -> bytes:
+        """Download an LFS object.
+
+        Args:
+            oid: Object ID (SHA256)
+            size: Expected size
+            ref: Optional ref name
+
+        Returns:
+            Object content
+        """
+        raise NotImplementedError
+
+    def upload(
+        self, oid: str, size: int, content: bytes, ref: Optional[str] = None
+    ) -> None:
+        """Upload an LFS object.
+
+        Args:
+            oid: Object ID (SHA256)
+            size: Object size
+            content: Object content
+            ref: Optional ref name
+        """
+        raise NotImplementedError
 
     @classmethod
     def from_config(cls, config: "Config") -> Optional["LFSClient"]:
-        """Create LFS client from git config."""
+        """Create LFS client from git config.
+
+        Returns the appropriate subclass (HTTPLFSClient or FileLFSClient)
+        based on the URL scheme.
+        """
         # Try to get LFS URL from config first
         try:
             url = config.get((b"lfs",), b"url").decode()
         except KeyError:
             pass
         else:
-            return cls(url, config)
+            # Validate explicitly configured URL - raise error if invalid
+            if not _is_valid_lfs_url(url):
+                raise ValueError(
+                    f"Invalid lfs.url in config: {url!r}. "
+                    "URL must be an absolute URL with scheme http://, https://, or file://."
+                )
+
+            # Return appropriate client based on scheme
+            parsed = urlparse(url)
+            if parsed.scheme in ("http", "https"):
+                return HTTPLFSClient(url, config)
+            elif parsed.scheme == "file":
+                return FileLFSClient(url, config)
+            else:
+                # This shouldn't happen if _is_valid_lfs_url works correctly
+                raise ValueError(f"Unsupported LFS URL scheme: {parsed.scheme}")
 
         # Fall back to deriving from remote URL (same as git-lfs)
         try:
@@ -397,18 +478,28 @@ class LFSClient:
             # Standard LFS endpoint is remote_url + "/info/lfs"
             lfs_url = f"{remote_url}/info/lfs"
 
-            parsed = urlparse(lfs_url)
-            if not parsed.scheme or not parsed.netloc:
+            # Return None if derived URL is invalid (LFS is optional)
+            if not _is_valid_lfs_url(lfs_url):
                 return None
 
-            return LFSClient(lfs_url, config)
+            # Derived URLs are always http/https
+            return HTTPLFSClient(lfs_url, config)
 
         return None
 
-    @property
-    def url(self) -> str:
-        """Get the LFS server URL without trailing slash."""
-        return self._base_url.rstrip("/")
+
+class HTTPLFSClient(LFSClient):
+    """LFS client for HTTP/HTTPS operations."""
+
+    def __init__(self, url: str, config: Optional["Config"] = None) -> None:
+        """Initialize HTTP LFS client.
+
+        Args:
+            url: LFS server URL (http:// or https://)
+            config: Optional git config for authentication/proxy settings
+        """
+        super().__init__(url, config)
+        self._pool_manager: Optional[urllib3.PoolManager] = None
 
     def _get_pool_manager(self) -> "urllib3.PoolManager":
         """Get urllib3 pool manager with git config applied."""
@@ -617,5 +708,88 @@ class LFSClient:
                     raise LFSError(f"Verification failed with status {response.status}")
 
 
+class FileLFSClient(LFSClient):
+    """LFS client for file:// URLs that accesses local filesystem."""
+
+    def __init__(self, url: str, config: Optional["Config"] = None) -> None:
+        """Initialize File LFS client.
+
+        Args:
+            url: LFS server URL (file://)
+            config: Optional git config (unused for file:// URLs)
+        """
+        super().__init__(url, config)
+
+        # Convert file:// URL to filesystem path
+        from urllib.request import url2pathname
+
+        parsed = urlparse(url)
+        if parsed.scheme != "file":
+            raise ValueError(f"FileLFSClient requires file:// URL, got {url!r}")
+
+        # url2pathname handles the conversion properly across platforms
+        path = url2pathname(parsed.path)
+        self._local_store = LFSStore(path)
+
+    def download(self, oid: str, size: int, ref: Optional[str] = None) -> bytes:
+        """Download an LFS object from local filesystem.
+
+        Args:
+            oid: Object ID (SHA256)
+            size: Expected size
+            ref: Optional ref name (ignored for file:// URLs)
+
+        Returns:
+            Object content
+
+        Raises:
+            LFSError: If object not found or size mismatch
+        """
+        try:
+            with self._local_store.open_object(oid) as f:
+                content = f.read()
+        except KeyError as exc:
+            raise LFSError(f"Object not found: {oid}") from exc
+
+        # Verify size
+        if len(content) != size:
+            raise LFSError(f"Size mismatch: expected {size}, got {len(content)}")
+
+        # Verify SHA256
+        actual_oid = hashlib.sha256(content).hexdigest()
+        if actual_oid != oid:
+            raise LFSError(f"OID mismatch: expected {oid}, got {actual_oid}")
+
+        return content
+
+    def upload(
+        self, oid: str, size: int, content: bytes, ref: Optional[str] = None
+    ) -> None:
+        """Upload an LFS object to local filesystem.
+
+        Args:
+            oid: Object ID (SHA256)
+            size: Object size
+            content: Object content
+            ref: Optional ref name (ignored for file:// URLs)
+
+        Raises:
+            LFSError: If size or OID mismatch
+        """
+        # Verify size
+        if len(content) != size:
+            raise LFSError(f"Size mismatch: expected {size}, got {len(content)}")
+
+        # Verify SHA256
+        actual_oid = hashlib.sha256(content).hexdigest()
+        if actual_oid != oid:
+            raise LFSError(f"OID mismatch: expected {oid}, got {actual_oid}")
+
+        # Store the object
+        stored_oid = self._local_store.write_object([content])
+        if stored_oid != oid:
+            raise LFSError(f"Storage OID mismatch: expected {oid}, got {stored_oid}")
+
+
 class LFSError(Exception):
     """LFS-specific error."""

+ 194 - 3
tests/test_lfs.py

@@ -25,6 +25,7 @@ import json
 import os
 import shutil
 import tempfile
+from pathlib import Path
 
 from dulwich import porcelain
 from dulwich.lfs import LFSFilterDriver, LFSPointer, LFSStore
@@ -991,7 +992,7 @@ class LFSClientTests(TestCase):
         super().setUp()
         import threading
 
-        from dulwich.lfs import LFSClient
+        from dulwich.lfs import HTTPLFSClient
         from dulwich.lfs_server import run_lfs_server
 
         # Create temporary directory for LFS storage
@@ -1011,8 +1012,8 @@ class LFSClientTests(TestCase):
 
         self.addCleanup(cleanup_server)
 
-        # Create LFS client pointing to our test server
-        self.client = LFSClient(self.server_url)
+        # Create HTTP LFS client pointing to our test server
+        self.client = HTTPLFSClient(self.server_url)
 
     def test_client_url_normalization(self) -> None:
         """Test that client URL is normalized correctly."""
@@ -1116,3 +1117,193 @@ class LFSClientTests(TestCase):
             self.client.upload("wrong_oid", 5, b"hello")
         # Server should reject due to OID mismatch
         self.assertIn("OID mismatch", str(cm.exception))
+
+    def test_from_config_validates_lfs_url(self) -> None:
+        """Test that from_config validates lfs.url and raises error for invalid URLs."""
+        from dulwich.config import ConfigFile
+        from dulwich.lfs import LFSClient
+
+        # Test with invalid lfs.url - no scheme/host
+        config = ConfigFile()
+        config.set((b"lfs",), b"url", b"objects")
+        with self.assertRaises(ValueError) as cm:
+            LFSClient.from_config(config)
+        self.assertIn("Invalid lfs.url", str(cm.exception))
+        self.assertIn("objects", str(cm.exception))
+
+        # Test with another malformed URL - no scheme
+        config.set((b"lfs",), b"url", b"//example.com/path")
+        with self.assertRaises(ValueError) as cm:
+            LFSClient.from_config(config)
+        self.assertIn("Invalid lfs.url", str(cm.exception))
+
+        # Test with relative path - should be rejected (not supported by git-lfs)
+        config.set((b"lfs",), b"url", b"../lfs")
+        with self.assertRaises(ValueError) as cm:
+            LFSClient.from_config(config)
+        self.assertIn("Invalid lfs.url", str(cm.exception))
+
+        # Test with relative path starting with ./
+        config.set((b"lfs",), b"url", b"./lfs")
+        with self.assertRaises(ValueError) as cm:
+            LFSClient.from_config(config)
+        self.assertIn("Invalid lfs.url", str(cm.exception))
+
+        # Test with unsupported scheme - git://
+        config.set((b"lfs",), b"url", b"git://example.com/repo.git")
+        with self.assertRaises(ValueError) as cm:
+            LFSClient.from_config(config)
+        self.assertIn("Invalid lfs.url", str(cm.exception))
+
+        # Test with unsupported scheme - ssh://
+        config.set((b"lfs",), b"url", b"ssh://git@example.com/repo.git")
+        with self.assertRaises(ValueError) as cm:
+            LFSClient.from_config(config)
+        self.assertIn("Invalid lfs.url", str(cm.exception))
+
+        # Test with http:// but no hostname
+        config.set((b"lfs",), b"url", b"http://")
+        with self.assertRaises(ValueError) as cm:
+            LFSClient.from_config(config)
+        self.assertIn("Invalid lfs.url", str(cm.exception))
+
+        # Test with valid https URL - should succeed
+        config.set((b"lfs",), b"url", b"https://example.com/repo.git/info/lfs")
+        client = LFSClient.from_config(config)
+        self.assertIsNotNone(client)
+        assert client is not None  # for mypy
+        self.assertEqual(client.url, "https://example.com/repo.git/info/lfs")
+
+        # Test with valid http URL - should succeed
+        config.set((b"lfs",), b"url", b"http://localhost:8080/lfs")
+        client = LFSClient.from_config(config)
+        self.assertIsNotNone(client)
+        assert client is not None  # for mypy
+        self.assertEqual(client.url, "http://localhost:8080/lfs")
+
+        # Test with valid file:// URL - should succeed
+        config.set((b"lfs",), b"url", b"file:///path/to/lfs")
+        client = LFSClient.from_config(config)
+        self.assertIsNotNone(client)
+        assert client is not None  # for mypy
+        self.assertEqual(client.url, "file:///path/to/lfs")
+
+        # Test with no lfs.url but valid remote - should derive URL
+        config2 = ConfigFile()
+        config2.set(
+            (b"remote", b"origin"), b"url", b"https://example.com/user/repo.git"
+        )
+        client2 = LFSClient.from_config(config2)
+        self.assertIsNotNone(client2)
+        assert client2 is not None  # for mypy
+        self.assertEqual(client2.url, "https://example.com/user/repo.git/info/lfs")
+
+
+class FileLFSClientTests(TestCase):
+    """Tests for FileLFSClient with file:// URLs."""
+
+    def setUp(self) -> None:
+        super().setUp()
+        # Create temporary directory for LFS storage
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.test_dir)
+
+        # Create LFS store and populate with test data
+        from dulwich.lfs import FileLFSClient, LFSStore
+
+        self.lfs_store = LFSStore.create(self.test_dir)
+        self.test_content = b"Test file content for FileLFSClient"
+        self.test_oid = self.lfs_store.write_object([self.test_content])
+
+        # Create FileLFSClient pointing to the test directory
+        # Use Path.as_uri() to create proper file:// URLs on all platforms
+        file_url = Path(self.test_dir).as_uri()
+        self.client = FileLFSClient(file_url)
+
+    def test_download_existing_object(self) -> None:
+        """Test downloading an existing object from file:// URL."""
+        content = self.client.download(self.test_oid, len(self.test_content))
+        self.assertEqual(content, self.test_content)
+
+    def test_download_missing_object(self) -> None:
+        """Test downloading a non-existent object raises LFSError."""
+        from dulwich.lfs import LFSError
+
+        fake_oid = "0" * 64
+        with self.assertRaises(LFSError) as cm:
+            self.client.download(fake_oid, 100)
+        self.assertIn("Object not found", str(cm.exception))
+
+    def test_download_size_mismatch(self) -> None:
+        """Test download with wrong size raises LFSError."""
+        from dulwich.lfs import LFSError
+
+        with self.assertRaises(LFSError) as cm:
+            self.client.download(self.test_oid, 999)  # Wrong size
+        self.assertIn("Size mismatch", str(cm.exception))
+
+    def test_upload_new_object(self) -> None:
+        """Test uploading a new object to file:// URL."""
+        import hashlib
+
+        new_content = b"New content to upload"
+        new_oid = hashlib.sha256(new_content).hexdigest()
+
+        # Upload
+        self.client.upload(new_oid, len(new_content), new_content)
+
+        # Verify it was stored
+        with self.lfs_store.open_object(new_oid) as f:
+            stored_content = f.read()
+        self.assertEqual(stored_content, new_content)
+
+    def test_upload_size_mismatch(self) -> None:
+        """Test upload with mismatched size raises LFSError."""
+        from dulwich.lfs import LFSError
+
+        content = b"test"
+        oid = "0" * 64
+
+        with self.assertRaises(LFSError) as cm:
+            self.client.upload(oid, 999, content)  # Wrong size
+        self.assertIn("Size mismatch", str(cm.exception))
+
+    def test_upload_oid_mismatch(self) -> None:
+        """Test upload with mismatched OID raises LFSError."""
+        from dulwich.lfs import LFSError
+
+        content = b"test"
+        wrong_oid = "0" * 64  # Won't match actual SHA256
+
+        with self.assertRaises(LFSError) as cm:
+            self.client.upload(wrong_oid, len(content), content)
+        self.assertIn("OID mismatch", str(cm.exception))
+
+    def test_from_config_creates_file_client(self) -> None:
+        """Test that from_config creates FileLFSClient for file:// URLs."""
+        from dulwich.config import ConfigFile
+        from dulwich.lfs import FileLFSClient, LFSClient
+
+        config = ConfigFile()
+        file_url = Path(self.test_dir).as_uri()
+        config.set((b"lfs",), b"url", file_url.encode())
+
+        client = LFSClient.from_config(config)
+        self.assertIsInstance(client, FileLFSClient)
+        assert client is not None  # for mypy
+        self.assertEqual(client.url, file_url)
+
+    def test_round_trip(self) -> None:
+        """Test uploading and then downloading an object."""
+        import hashlib
+
+        content = b"Round trip test content"
+        oid = hashlib.sha256(content).hexdigest()
+
+        # Upload
+        self.client.upload(oid, len(content), content)
+
+        # Download
+        downloaded = self.client.download(oid, len(content))
+
+        self.assertEqual(downloaded, content)