Przeglądaj źródła

Fix KeyError when pulling from a shallow clone (#1628)

Handle missing commits gracefully in graph traversal operations for
shallow repositories. When operating on shallow clones, commits
referenced as parents may not exist in the repository. This change:

Fixes #813
Jelmer Vernooij 1 miesiąc temu
rodzic
commit
fffb1e5a0d
3 zmienionych plików z 251 dodań i 8 usunięć
  1. 4 0
      NEWS
  2. 64 8
      dulwich/graph.py
  3. 183 0
      tests/test_graph.py

+ 4 - 0
NEWS

@@ -4,6 +4,10 @@
    and make this the default.
    (Jelmer Vernooij, #835)
 
+ * Fix KeyError when pulling from a shallow clone. Handle missing commits
+   gracefully in graph traversal operations for shallow repositories.
+   (Jelmer Vernooij, #813)
+
  * Return symrefs from ls_refs. (Jelmer Vernooij, #863)
 
  * Support short commit hashes in ``porcelain.reset()``.

+ 64 - 8
dulwich/graph.py

@@ -62,6 +62,7 @@ def _find_lcas(
     c2s: list[ObjectID],
     lookup_stamp: Callable[[ObjectID], int],
     min_stamp: int = 0,
+    shallows: Optional[set[ObjectID]] = None,
 ) -> list[ObjectID]:
     cands = []
     cstates = {}
@@ -83,11 +84,26 @@ def _find_lcas(
     # note possibility of c1 being one of c2s should be handled
     wlst: WorkList[bytes] = WorkList()
     cstates[c1] = _ANC_OF_1
-    wlst.add((lookup_stamp(c1), c1))
+    try:
+        wlst.add((lookup_stamp(c1), c1))
+    except KeyError:
+        # If c1 doesn't exist and we have shallow commits, it might be a missing parent
+        if shallows is None or not shallows:
+            raise
+        # For missing commits in shallow repos, use a minimal timestamp
+        wlst.add((0, c1))
+
     for c2 in c2s:
         cflags = cstates.get(c2, 0)
         cstates[c2] = cflags | _ANC_OF_2
-        wlst.add((lookup_stamp(c2), c2))
+        try:
+            wlst.add((lookup_stamp(c2), c2))
+        except KeyError:
+            # If c2 doesn't exist and we have shallow commits, it might be a missing parent
+            if shallows is None or not shallows:
+                raise
+            # For missing commits in shallow repos, use a minimal timestamp
+            wlst.add((0, c2))
 
     # loop while at least one working list commit is still viable (not marked as _DNC)
     # adding any parents to the list in a breadth first manner
@@ -107,7 +123,15 @@ def _find_lcas(
             # mark any parents of this node _DNC as all parents
             # would be one generation further removed common ancestors
             cflags = cflags | _DNC
-        parents = lookup_parents(cmt)
+        try:
+            parents = lookup_parents(cmt)
+        except KeyError:
+            # If we can't get parents in a shallow repo, skip this node
+            # This is safer than pretending it has no parents
+            if shallows is not None and shallows:
+                continue
+            raise
+
         if parents:
             for pcmt in parents:
                 pflags = cstates.get(pcmt, 0)
@@ -115,7 +139,13 @@ def _find_lcas(
                 # do not add it to the working list again
                 if (pflags & cflags) == cflags:
                     continue
-                pdt = lookup_stamp(pcmt)
+                try:
+                    pdt = lookup_stamp(pcmt)
+                except KeyError:
+                    # Parent doesn't exist - if we're in a shallow repo, skip it
+                    if shallows is not None and shallows:
+                        continue
+                    raise
                 if pdt < min_stamp:
                     continue
                 cstates[pcmt] = pflags | cflags
@@ -165,7 +195,9 @@ def find_merge_base(repo: "BaseRepo", commit_ids: list[ObjectID]) -> list[Object
     c2s = commit_ids[1:]
     if c1 in c2s:
         return [c1]
-    lcas = _find_lcas(lookup_parents, c1, c2s, lookup_stamp)
+    lcas = _find_lcas(
+        lookup_parents, c1, c2s, lookup_stamp, shallows=parents_provider.shallows
+    )
     return lcas
 
 
@@ -202,7 +234,13 @@ def find_octopus_base(repo: "BaseRepo", commit_ids: list[ObjectID]) -> list[Obje
     for cmt in others:
         next_lcas = []
         for ca in lcas:
-            res = _find_lcas(lookup_parents, cmt, [ca], lookup_stamp)
+            res = _find_lcas(
+                lookup_parents,
+                cmt,
+                [ca],
+                lookup_stamp,
+                shallows=parents_provider.shallows,
+            )
             next_lcas.extend(res)
         lcas = next_lcas[:]
     return lcas
@@ -235,6 +273,24 @@ def can_fast_forward(repo: "BaseRepo", c1: bytes, c2: bytes) -> bool:
         return True
 
     # Algorithm: Find the common ancestor
-    min_stamp = lookup_stamp(c1)
-    lcas = _find_lcas(lookup_parents, c1, [c2], lookup_stamp, min_stamp=min_stamp)
+    try:
+        min_stamp = lookup_stamp(c1)
+    except KeyError:
+        # If c1 doesn't exist in the object store, we can't determine fast-forward
+        # This can happen in shallow clones where c1 is a missing parent
+        # Check if any shallow commits have c1 as a parent
+        if parents_provider.shallows:
+            # We're in a shallow repository and c1 doesn't exist
+            # We can't determine if fast-forward is possible
+            return False
+        raise
+
+    lcas = _find_lcas(
+        lookup_parents,
+        c1,
+        [c2],
+        lookup_stamp,
+        min_stamp=min_stamp,
+        shallows=parents_provider.shallows,
+    )
     return lcas == [c1]

+ 183 - 0
tests/test_graph.py

@@ -303,6 +303,189 @@ class CanFastForwardTests(TestCase):
         self.assertFalse(can_fast_forward(r, c2a.id, c2b.id))
         self.assertFalse(can_fast_forward(r, c2b.id, c2a.id))
 
+    def test_shallow_repository(self) -> None:
+        r = MemoryRepo()
+        # Create a shallow repository structure:
+        # base (missing) -> c1 -> c2
+        # We only have c1 and c2, base is missing (shallow boundary at c1)
+
+        # Create a fake base commit ID (won't exist in repo)
+        base_sha = b"1" * 20  # Valid SHA format but doesn't exist (20 bytes)
+
+        c1 = make_commit(parents=[base_sha], commit_time=2000)
+        c2 = make_commit(parents=[c1.id], commit_time=3000)
+
+        # Only add c1 and c2 to the repo (base is missing)
+        r.object_store.add_objects([(c1, None), (c2, None)])
+
+        # Mark c1 as shallow using the proper API
+        r.update_shallow([c1.id], [])
+
+        # Should be able to fast-forward from c1 to c2
+        self.assertTrue(can_fast_forward(r, c1.id, c2.id))
+
+        # Should return False when trying to check against missing parent
+        # (not raise KeyError)
+        self.assertFalse(can_fast_forward(r, base_sha, c1.id))
+        self.assertFalse(can_fast_forward(r, base_sha, c2.id))
+
+
+class FindLCAsTests(TestCase):
+    """Tests for _find_lcas function with shallow repository support."""
+
+    def test_find_lcas_normal(self) -> None:
+        """Test _find_lcas works normally without shallow commits."""
+        # Set up a simple repository structure:
+        #   base
+        #   /  \
+        #  c1  c2
+        commits = {
+            b"base": (1000, []),
+            b"c1": (2000, [b"base"]),
+            b"c2": (3000, [b"base"]),
+        }
+
+        def lookup_parents(sha):
+            return commits[sha][1]
+
+        def lookup_stamp(sha):
+            return commits[sha][0]
+
+        # Find LCA of c1 and c2, should be base
+        lcas = _find_lcas(lookup_parents, b"c1", [b"c2"], lookup_stamp)
+        self.assertEqual(lcas, [b"base"])
+
+    def test_find_lcas_with_shallow_missing_c1(self) -> None:
+        """Test _find_lcas when c1 doesn't exist in shallow clone."""
+        # Only have c2 and base, c1 is missing (shallow boundary)
+        commits = {
+            b"base": (1000, []),
+            b"c2": (3000, [b"base"]),
+        }
+        shallows = {b"c2"}  # c2 is at shallow boundary
+
+        def lookup_parents(sha):
+            return commits[sha][1]
+
+        def lookup_stamp(sha):
+            if sha not in commits:
+                raise KeyError(sha)
+            return commits[sha][0]
+
+        # c1 doesn't exist, but we have shallow commits
+        lcas = _find_lcas(
+            lookup_parents, b"c1", [b"c2"], lookup_stamp, shallows=shallows
+        )
+        # Should handle gracefully
+        self.assertEqual(lcas, [])
+
+    def test_find_lcas_with_shallow_missing_parent(self) -> None:
+        """Test _find_lcas when parent commits are missing in shallow clone."""
+        # Have c1 and c2, but base is missing
+        commits = {
+            b"c1": (2000, [b"base"]),  # base doesn't exist
+            b"c2": (3000, [b"base"]),  # base doesn't exist
+        }
+        shallows = {b"c1", b"c2"}  # Both at shallow boundary
+
+        def lookup_parents(sha):
+            if sha not in commits:
+                raise KeyError(sha)
+            return commits[sha][1]
+
+        def lookup_stamp(sha):
+            if sha not in commits:
+                raise KeyError(sha)
+            return commits[sha][0]
+
+        # Should handle missing parent gracefully
+        lcas = _find_lcas(
+            lookup_parents, b"c1", [b"c2"], lookup_stamp, shallows=shallows
+        )
+        # Can't find LCA because parents are missing
+        self.assertEqual(lcas, [])
+
+    def test_find_lcas_with_shallow_partial_history(self) -> None:
+        """Test _find_lcas with partial history in shallow clone."""
+        # Complex structure where some history is missing:
+        #      base (missing)
+        #      /  \
+        #    c1    c2
+        #     |     |
+        #    c3    c4
+        commits = {
+            b"c1": (2000, [b"base"]),  # base missing
+            b"c2": (2500, [b"base"]),  # base missing
+            b"c3": (3000, [b"c1"]),
+            b"c4": (3500, [b"c2"]),
+        }
+        shallows = {b"c1", b"c2"}  # c1 and c2 are at shallow boundary
+
+        def lookup_parents(sha):
+            if sha not in commits:
+                raise KeyError(sha)
+            return commits[sha][1]
+
+        def lookup_stamp(sha):
+            if sha not in commits:
+                raise KeyError(sha)
+            return commits[sha][0]
+
+        # Find LCA of c3 and c4
+        lcas = _find_lcas(
+            lookup_parents, b"c3", [b"c4"], lookup_stamp, shallows=shallows
+        )
+        # Can't determine LCA because base is missing
+        self.assertEqual(lcas, [])
+
+    def test_find_lcas_without_shallows_raises_keyerror(self) -> None:
+        """Test _find_lcas raises KeyError when commit missing without shallows."""
+        commits = {
+            b"c2": (3000, [b"base"]),
+        }
+
+        def lookup_parents(sha):
+            return commits[sha][1]
+
+        def lookup_stamp(sha):
+            if sha not in commits:
+                raise KeyError(sha)
+            return commits[sha][0]
+
+        # Without shallows parameter, should raise KeyError
+        with self.assertRaises(KeyError):
+            _find_lcas(lookup_parents, b"c1", [b"c2"], lookup_stamp)
+
+    def test_find_lcas_octopus_with_shallow(self) -> None:
+        """Test _find_lcas with multiple commits in shallow clone."""
+        # Structure:
+        #    base (missing)
+        #   / | \
+        #  c1 c2 c3
+        commits = {
+            b"c1": (2000, [b"base"]),
+            b"c2": (2100, [b"base"]),
+            b"c3": (2200, [b"base"]),
+        }
+        shallows = {b"c1", b"c2", b"c3"}
+
+        def lookup_parents(sha):
+            if sha not in commits:
+                raise KeyError(sha)
+            return commits[sha][1]
+
+        def lookup_stamp(sha):
+            if sha not in commits:
+                raise KeyError(sha)
+            return commits[sha][0]
+
+        # Find LCA of c1 with c2 and c3
+        lcas = _find_lcas(
+            lookup_parents, b"c1", [b"c2", b"c3"], lookup_stamp, shallows=shallows
+        )
+        # Can't find LCA because base is missing
+        self.assertEqual(lcas, [])
+
 
 class WorkListTest(TestCase):
     def test_WorkList(self) -> None: