Sfoglia il codice sorgente

Add automatic object format detection and conversion for network clones

Jelmer Vernooij 1 mese fa
parent
commit
ac8ff0a3b8
3 ha cambiato i file con 73 aggiunte e 37 eliminazioni
  1. 8 14
      dulwich/client.py
  2. 43 7
      dulwich/repo.py
  3. 22 16
      tests/test_client.py

+ 8 - 14
dulwich/client.py

@@ -408,7 +408,7 @@ def extract_object_format_from_capabilities(
     for capability in capabilities:
         k, v = parse_capability(capability)
         if k == b"object-format" and v is not None:
-            return v.decode("ascii")
+            return v.decode("ascii").strip()
     return None
 
 
@@ -1105,9 +1105,10 @@ class GitClient:
             os.mkdir(target_path)
 
         try:
-            # For network clones, create repository with default SHA-1 format
-            # If remote uses SHA-256, fetch() will raise GitProtocolError
-            # Subclasses (e.g., LocalGitClient) override to detect format first
+            # For network clones, create repository with default SHA-1 format initially.
+            # If remote uses a different format, fetch() will auto-change the repo's format
+            # (since repo is empty at this point).
+            # Subclasses (e.g., LocalGitClient) override to detect format first for efficiency.
             target = None
             if not bare:
                 target = Repo.init(target_path)
@@ -1148,11 +1149,6 @@ class GitClient:
                 protocol_version=protocol_version,
             )
 
-            # Note: For network clones, if remote uses a different object format than
-            # the default SHA-1, fetch() will raise GitProtocolError. Subclasses like
-            # LocalGitClient override clone() to detect format before creating the repo.
-            # TODO: Fix network clones to detect object format before creating target repo.
-
             if origin is not None:
                 _import_remote_refs(
                     target.refs, origin, result.refs, message=ref_message
@@ -1270,15 +1266,13 @@ class GitClient:
                 shallow_exclude=shallow_exclude,
             )
 
-            # Validate object format compatibility
+            # Fix object format if needed
             if (
                 result.object_format
                 and result.object_format != target.object_format.name
             ):
-                raise GitProtocolError(
-                    f"Object format mismatch: remote uses {result.object_format}, "
-                    f"local repository uses {target.object_format.name}"
-                )
+                # Change the target repo's format if it's empty
+                target._change_object_format(result.object_format)
         except BaseException:
             abort()
             raise

+ 43 - 7
dulwich/repo.py

@@ -656,6 +656,46 @@ class BaseRepo:
         """
         raise NotImplementedError(self.open_index)
 
+    def _change_object_format(self, object_format_name: str) -> None:
+        """Change the object format of this repository.
+
+        This can only be done if the object store is empty (no objects written yet).
+
+        Args:
+          object_format_name: Name of the new object format (e.g., "sha1", "sha256")
+
+        Raises:
+          AssertionError: If the object store is not empty
+        """
+        # Check if object store has any objects
+        for _ in self.object_store:
+            raise AssertionError(
+                "Cannot change object format: repository already contains objects"
+            )
+
+        # Update the object format
+        from .object_format import get_object_format
+
+        new_format = get_object_format(object_format_name)
+        self.object_format = new_format
+        self.object_store.object_format = new_format
+
+        # Update config file
+        config = self.get_config()
+
+        if object_format_name == "sha1":
+            # For SHA-1, explicitly remove objectformat extension if present
+            try:
+                config.remove("extensions", "objectformat")
+            except KeyError:
+                pass
+        else:
+            # For non-SHA-1 formats, set repositoryformatversion to 1 and objectformat extension
+            config.set("core", "repositoryformatversion", "1")
+            config.set("extensions", "objectformat", object_format_name)
+
+        config.write_to_path()
+
     def fetch(
         self,
         target: "BaseRepo",
@@ -674,14 +714,10 @@ class BaseRepo:
           depth: Optional shallow fetch depth
         Returns: The local refs
         """
-        # Validate object format compatibility
+        # Fix object format if needed
         if self.object_format != target.object_format:
-            from .errors import GitProtocolError
-
-            raise GitProtocolError(
-                f"Object format mismatch: source uses {self.object_format.name}, "
-                f"target uses {target.object_format.name}"
-            )
+            # Change the target repo's format if it's empty
+            target._change_object_format(self.object_format.name)
 
         if determine_wants is None:
             determine_wants = target.object_store.determine_wants_all

+ 22 - 16
tests/test_client.py

@@ -1247,8 +1247,8 @@ class LocalGitClientTests(TestCase):
         self.assertIn(b"HEAD", result.symrefs)
 
     def test_fetch_object_format_mismatch_sha256_to_sha1(self) -> None:
-        """Test that fetching from SHA-256 to SHA-1 repository fails."""
-        from dulwich.errors import GitProtocolError
+        """Test that fetching from SHA-256 to non-empty SHA-1 repository fails."""
+        from dulwich.objects import Blob
 
         client = LocalGitClient()
 
@@ -1258,23 +1258,26 @@ class LocalGitClientTests(TestCase):
         sha256_repo = Repo.init(sha256_path, object_format="sha256")
         self.addCleanup(sha256_repo.close)
 
-        # Create SHA-1 target repository
+        # Create SHA-1 target repository with an object (so it can't be auto-changed)
         sha1_path = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, sha1_path)
         sha1_repo = Repo.init(sha1_path, object_format="sha1")
         self.addCleanup(sha1_repo.close)
 
-        # Attempt to fetch should raise GitProtocolError
-        with self.assertRaises(GitProtocolError) as cm:
+        # Add an object to make the repo non-empty
+        blob = Blob.from_string(b"test content")
+        sha1_repo.object_store.add_object(blob)
+
+        # Attempt to fetch should raise AssertionError (repo not empty)
+        with self.assertRaises(AssertionError) as cm:
             client.fetch(sha256_path, sha1_repo)
 
-        self.assertIn("Object format mismatch", str(cm.exception))
-        self.assertIn("sha256", str(cm.exception))
-        self.assertIn("sha1", str(cm.exception))
+        self.assertIn("Cannot change object format", str(cm.exception))
+        self.assertIn("already contains objects", str(cm.exception))
 
     def test_fetch_object_format_mismatch_sha1_to_sha256(self) -> None:
-        """Test that fetching from SHA-1 to SHA-256 repository fails."""
-        from dulwich.errors import GitProtocolError
+        """Test that fetching from SHA-1 to non-empty SHA-256 repository fails."""
+        from dulwich.objects import Blob
 
         client = LocalGitClient()
 
@@ -1284,19 +1287,22 @@ class LocalGitClientTests(TestCase):
         sha1_repo = Repo.init(sha1_path, object_format="sha1")
         self.addCleanup(sha1_repo.close)
 
-        # Create SHA-256 target repository
+        # Create SHA-256 target repository with an object (so it can't be auto-changed)
         sha256_path = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, sha256_path)
         sha256_repo = Repo.init(sha256_path, object_format="sha256")
         self.addCleanup(sha256_repo.close)
 
-        # Attempt to fetch should raise GitProtocolError
-        with self.assertRaises(GitProtocolError) as cm:
+        # Add an object to make the repo non-empty
+        blob = Blob.from_string(b"test content")
+        sha256_repo.object_store.add_object(blob)
+
+        # Attempt to fetch should raise AssertionError (repo not empty)
+        with self.assertRaises(AssertionError) as cm:
             client.fetch(sha1_path, sha256_repo)
 
-        self.assertIn("Object format mismatch", str(cm.exception))
-        self.assertIn("sha1", str(cm.exception))
-        self.assertIn("sha256", str(cm.exception))
+        self.assertIn("Cannot change object format", str(cm.exception))
+        self.assertIn("already contains objects", str(cm.exception))
 
     def test_fetch_object_format_same(self) -> None:
         """Test that fetching between repositories with same object format works."""