Ver Fonte

Add support for Git revision syntax operators ~ and ^ in objectspec

This adds support for parsing Git revision specifications like HEAD~1
and HEAD^2 in dulwich.objectspec.parse_object():

- ~N: Follow first parent N times (N-th generation ancestor)
- ^N: Select N-th parent of a commit (1-indexed)
- ^0: Return the commit itself
- Supports combinations like HEAD~2^1

These operators are essential for navigating Git history and are
commonly used in Git commands. The implementation processes operators
from left to right as they appear in the revision specification.
Jelmer Vernooij há 1 mês atrás
pai
commit
79487d36ca
3 ficheiros alterados com 114 adições e 1 exclusões
  1. 4 0
      NEWS
  2. 51 1
      dulwich/objectspec.py
  3. 59 0
      tests/test_objectspec.py

+ 4 - 0
NEWS

@@ -6,6 +6,10 @@
  * Add support for merge drivers.
    (Jelmer Vernooij)
 
+ * Add support for Git revision syntax operators ``~`` and ``^`` in
+   ``dulwich.objectspec.parse_object``, e.g. ``HEAD~1``, ``HEAD^2``.
+   (Jelmer Vernooij)
+
  * Add support for ``GIT_CONFIG_GLOBAL`` and ``GIT_CONFIG_SYSTEM``
    environment variables to override global and system configuration
    paths. (Jelmer Vernooij, #1193)

+ 51 - 1
dulwich/objectspec.py

@@ -48,7 +48,57 @@ def parse_object(repo: "Repo", objectish: Union[bytes, str]) -> "ShaFile":
       KeyError: If the object can not be found
     """
     objectish = to_bytes(objectish)
-    return repo[objectish]
+
+    # Find where the operators start
+    base_end = 0
+    for i, c in enumerate(objectish):
+        if c in (ord(b"~"), ord(b"^")):
+            base_end = i
+            break
+    else:
+        # No operators found
+        return repo[objectish]
+
+    if base_end == 0:
+        raise ValueError("Empty base object before ~ or ^")
+
+    # Get the base commit
+    base_ref = objectish[:base_end]
+    current = parse_commit(repo, base_ref)
+
+    # Process operators from left to right
+    i = base_end
+    while i < len(objectish):
+        op = chr(objectish[i])
+        i += 1
+
+        # Parse the number after the operator (default is 1)
+        num_str = ""
+        while i < len(objectish) and chr(objectish[i]).isdigit():
+            num_str += chr(objectish[i])
+            i += 1
+        num = int(num_str) if num_str else 1
+
+        if op == "~":
+            # ~N means N-th generation ancestor (follow first parent N times)
+            for _ in range(num):
+                if not current.parents:
+                    raise ValueError(f"Commit {current.id.decode('ascii', 'replace')} has no parents")
+                current = repo[current.parents[0]]
+        elif op == "^":
+            # ^N means N-th parent (1-indexed)
+            if num == 0:
+                # ^0 means the commit itself
+                continue
+            if num > len(current.parents):
+                raise ValueError(f"Commit {current.id.decode('ascii', 'replace')} does not have parent #{num}")
+            current = repo[current.parents[num - 1]]
+        else:
+            # If we encounter a non-operator character, this might be part of the base ref
+            # This shouldn't happen if we parsed correctly above
+            raise ValueError(f"Unexpected character in revision spec: {op!r}")
+
+    return current
 
 
 def parse_tree(repo: "Repo", treeish: Union[bytes, str, Tree, Commit, Tag]) -> "Tree":

+ 59 - 0
tests/test_objectspec.py

@@ -53,6 +53,65 @@ class ParseObjectTests(TestCase):
         r.object_store.add_object(b)
         self.assertEqual(b, parse_object(r, b.id))
 
+    def test_parent_caret(self) -> None:
+        r = MemoryRepo()
+        c1, c2, c3 = build_commit_graph(r.object_store, [[1], [2, 1], [3, 1, 2]])
+        # c3's parents are [c1, c2]
+        self.assertEqual(c1, parse_object(r, c3.id + b"^1"))
+        self.assertEqual(c1, parse_object(r, c3.id + b"^"))  # ^ defaults to ^1
+        self.assertEqual(c2, parse_object(r, c3.id + b"^2"))
+
+    def test_parent_tilde(self) -> None:
+        r = MemoryRepo()
+        c1, c2, c3 = build_commit_graph(r.object_store, [[1], [2, 1], [3, 2]])
+        self.assertEqual(c2, parse_object(r, c3.id + b"~"))
+        self.assertEqual(c2, parse_object(r, c3.id + b"~1"))
+        self.assertEqual(c1, parse_object(r, c3.id + b"~2"))
+
+    def test_combined_operators(self) -> None:
+        r = MemoryRepo()
+        c1, c2, c3, c4 = build_commit_graph(
+            r.object_store, [[1], [2, 1], [3, 1, 2], [4, 3]]
+        )
+        # c4~1^2 means: go back 1 generation from c4 (to c3), then take its 2nd parent
+        # c3's parents are [c1, c2], so ^2 is c2
+        self.assertEqual(c2, parse_object(r, c4.id + b"~1^2"))
+        self.assertEqual(c1, parse_object(r, c4.id + b"~^"))
+
+    def test_with_ref(self) -> None:
+        r = MemoryRepo()
+        c1, c2, c3 = build_commit_graph(r.object_store, [[1], [2, 1], [3, 2]])
+        r.refs[b"refs/heads/master"] = c3.id
+        self.assertEqual(c2, parse_object(r, b"master~"))
+        self.assertEqual(c1, parse_object(r, b"master~2"))
+
+    def test_caret_zero(self) -> None:
+        r = MemoryRepo()
+        c1, c2 = build_commit_graph(r.object_store, [[1], [2, 1]])
+        # ^0 means the commit itself
+        self.assertEqual(c2, parse_object(r, c2.id + b"^0"))
+        self.assertEqual(c1, parse_object(r, c2.id + b"~^0"))
+
+    def test_missing_parent(self) -> None:
+        r = MemoryRepo()
+        c1, c2 = build_commit_graph(r.object_store, [[1], [2, 1]])
+        # c2 only has 1 parent, so ^2 should fail
+        self.assertRaises(ValueError, parse_object, r, c2.id + b"^2")
+        # c1 has no parents, so ~ should fail
+        self.assertRaises(ValueError, parse_object, r, c1.id + b"~")
+
+    def test_empty_base(self) -> None:
+        r = MemoryRepo()
+        self.assertRaises(ValueError, parse_object, r, b"~1")
+        self.assertRaises(ValueError, parse_object, r, b"^1")
+
+    def test_non_commit_with_operators(self) -> None:
+        r = MemoryRepo()
+        b = Blob.from_string(b"Blah")
+        r.object_store.add_object(b)
+        # Can't apply ~ or ^ to a blob
+        self.assertRaises(ValueError, parse_object, r, b.id + b"~1")
+
 
 class ParseCommitRangeTests(TestCase):
     """Test parse_commit_range."""