2
0
Эх сурвалжийг харах

Support more complicated revision syntax (#1704)

E.g. ^1, ~1, rev:path, ^{}
Jelmer Vernooij 1 сар өмнө
parent
commit
485b3db96c
3 өөрчлөгдсөн 298 нэмэгдсэн , 2 устгасан
  1. 5 0
      NEWS
  2. 99 1
      dulwich/objectspec.py
  3. 194 1
      tests/test_objectspec.py

+ 5 - 0
NEWS

@@ -9,6 +9,11 @@
  * Add support for merge drivers.
    (Jelmer Vernooij)
 
+ * Add support for Git revision syntax operators ``~``, ``^``, ``^{}``,
+   ``@{N}``, and ``:path`` in ``dulwich.objectspec.parse_object``,
+   e.g. ``HEAD~1``, ``HEAD^2``, ``v1.0^{}``, ``HEAD@{1}``, ``HEAD:README``.
+   (Jelmer Vernooij)
+
  * Add support for ``GIT_CONFIG_GLOBAL`` and ``GIT_CONFIG_SYSTEM``
    environment variables to override global and system configuration
    paths. (Jelmer Vernooij, #1193)

+ 99 - 1
dulwich/objectspec.py

@@ -36,6 +36,33 @@ def to_bytes(text: Union[str, bytes]) -> bytes:
     return text  # type: ignore
 
 
+def _resolve_object(repo: "Repo", ref: bytes) -> "ShaFile":
+    """Resolve a reference to an object using multiple strategies."""
+    try:
+        return repo[ref]
+    except KeyError:
+        try:
+            ref_sha = parse_ref(repo, ref)
+            return repo[ref_sha]
+        except KeyError:
+            try:
+                return repo.object_store[ref]
+            except (KeyError, ValueError):
+                # Re-raise original KeyError for consistency
+                raise KeyError(ref)
+
+
+def _parse_number_suffix(suffix: bytes) -> tuple[int, bytes]:
+    """Parse a number from the start of suffix, return (number, remaining)."""
+    if not suffix or not suffix[0:1].isdigit():
+        return 1, suffix
+
+    end = 1
+    while end < len(suffix) and suffix[end : end + 1].isdigit():
+        end += 1
+    return int(suffix[:end]), suffix[end:]
+
+
 def parse_object(repo: "Repo", objectish: Union[bytes, str]) -> "ShaFile":
     """Parse a string referring to an object.
 
@@ -47,7 +74,78 @@ def parse_object(repo: "Repo", objectish: Union[bytes, str]) -> "ShaFile":
       KeyError: If the object can not be found
     """
     objectish = to_bytes(objectish)
-    return repo[objectish]
+
+    # Handle :<path> - lookup path in tree
+    if b":" in objectish:
+        rev, path = objectish.split(b":", 1)
+        if not rev:
+            raise NotImplementedError("Index path lookup (:path) not yet supported")
+        tree = parse_tree(repo, rev)
+        mode, sha = tree.lookup_path(repo.object_store.__getitem__, path)
+        return repo[sha]
+
+    # Handle @{N} - reflog lookup
+    if b"@{" in objectish:
+        base, rest = objectish.split(b"@{", 1)
+        if not rest.endswith(b"}"):
+            raise ValueError("Invalid @{} syntax")
+        spec = rest[:-1]
+        if not spec.isdigit():
+            raise NotImplementedError(f"Only @{{N}} supported, not @{{{spec!r}}}")
+
+        ref = base if base else b"HEAD"
+        entries = list(repo.read_reflog(ref))
+        entries.reverse()  # Git uses reverse chronological order
+        index = int(spec)
+        if index >= len(entries):
+            raise ValueError(f"Reflog for {ref!r} has only {len(entries)} entries")
+        return repo[entries[index].new_sha]
+
+    # Handle ^{} - tag dereferencing
+    if objectish.endswith(b"^{}"):
+        obj = _resolve_object(repo, objectish[:-3])
+        while isinstance(obj, Tag):
+            obj_type, obj_sha = obj.object
+            obj = repo[obj_sha]
+        return obj
+
+    # Handle ~ and ^ operators
+    for sep in [b"~", b"^"]:
+        if sep in objectish:
+            base, suffix = objectish.split(sep, 1)
+            if not base:
+                raise ValueError(f"Empty base before {sep!r}")
+
+            obj = _resolve_object(repo, base)
+            num, suffix = _parse_number_suffix(suffix)
+
+            if sep == b"~":
+                # Follow first parent N times
+                commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
+                for _ in range(num):
+                    if not commit.parents:
+                        raise ValueError(
+                            f"Commit {commit.id.decode('ascii', 'replace')} has no parents"
+                        )
+                    commit = repo[commit.parents[0]]
+                obj = commit
+            else:  # sep == b"^"
+                # Get N-th parent (or commit itself if N=0)
+                commit = obj if isinstance(obj, Commit) else parse_commit(repo, obj.id)
+                if num == 0:
+                    obj = commit
+                elif num > len(commit.parents):
+                    raise ValueError(
+                        f"Commit {commit.id.decode('ascii', 'replace')} does not have parent #{num}"
+                    )
+                else:
+                    obj = repo[commit.parents[num - 1]]
+
+            # Process remaining operators recursively
+            return parse_object(repo, obj.id + suffix) if suffix else obj
+
+    # No operators, just return the object
+    return _resolve_object(repo, objectish)
 
 
 def parse_tree(repo: "Repo", treeish: Union[bytes, str, Tree, Commit, Tag]) -> "Tree":

+ 194 - 1
tests/test_objectspec.py

@@ -23,7 +23,7 @@
 
 # TODO: Round-trip parse-serialize-parse and serialize-parse-serialize tests.
 
-from dulwich.objects import Blob, Commit, Tag
+from dulwich.objects import Blob, Commit, Tag, Tree
 from dulwich.objectspec import (
     parse_commit,
     parse_commit_range,
@@ -53,6 +53,199 @@ class ParseObjectTests(TestCase):
         r.object_store.add_object(b)
         self.assertEqual(b, parse_object(r, b.id))
 
+    def test_parent_caret(self) -> None:
+        r = MemoryRepo()
+        c1, c2, c3 = build_commit_graph(r.object_store, [[1], [2, 1], [3, 1, 2]])
+        # c3's parents are [c1, c2]
+        self.assertEqual(c1, parse_object(r, c3.id + b"^1"))
+        self.assertEqual(c1, parse_object(r, c3.id + b"^"))  # ^ defaults to ^1
+        self.assertEqual(c2, parse_object(r, c3.id + b"^2"))
+
+    def test_parent_tilde(self) -> None:
+        r = MemoryRepo()
+        c1, c2, c3 = build_commit_graph(r.object_store, [[1], [2, 1], [3, 2]])
+        self.assertEqual(c2, parse_object(r, c3.id + b"~"))
+        self.assertEqual(c2, parse_object(r, c3.id + b"~1"))
+        self.assertEqual(c1, parse_object(r, c3.id + b"~2"))
+
+    def test_combined_operators(self) -> None:
+        r = MemoryRepo()
+        c1, c2, c3, c4 = build_commit_graph(
+            r.object_store, [[1], [2, 1], [3, 1, 2], [4, 3]]
+        )
+        # c4~1^2 means: go back 1 generation from c4 (to c3), then take its 2nd parent
+        # c3's parents are [c1, c2], so ^2 is c2
+        self.assertEqual(c2, parse_object(r, c4.id + b"~1^2"))
+        self.assertEqual(c1, parse_object(r, c4.id + b"~^"))
+
+    def test_with_ref(self) -> None:
+        r = MemoryRepo()
+        c1, c2, c3 = build_commit_graph(r.object_store, [[1], [2, 1], [3, 2]])
+        r.refs[b"refs/heads/master"] = c3.id
+        self.assertEqual(c2, parse_object(r, b"master~"))
+        self.assertEqual(c1, parse_object(r, b"master~2"))
+
+    def test_caret_zero(self) -> None:
+        r = MemoryRepo()
+        c1, c2 = build_commit_graph(r.object_store, [[1], [2, 1]])
+        # ^0 means the commit itself
+        self.assertEqual(c2, parse_object(r, c2.id + b"^0"))
+        self.assertEqual(c1, parse_object(r, c2.id + b"~^0"))
+
+    def test_missing_parent(self) -> None:
+        r = MemoryRepo()
+        c1, c2 = build_commit_graph(r.object_store, [[1], [2, 1]])
+        # c2 only has 1 parent, so ^2 should fail
+        self.assertRaises(ValueError, parse_object, r, c2.id + b"^2")
+        # c1 has no parents, so ~ should fail
+        self.assertRaises(ValueError, parse_object, r, c1.id + b"~")
+
+    def test_empty_base(self) -> None:
+        r = MemoryRepo()
+        self.assertRaises(ValueError, parse_object, r, b"~1")
+        self.assertRaises(ValueError, parse_object, r, b"^1")
+
+    def test_non_commit_with_operators(self) -> None:
+        r = MemoryRepo()
+        b = Blob.from_string(b"Blah")
+        r.object_store.add_object(b)
+        # Can't apply ~ or ^ to a blob
+        self.assertRaises(ValueError, parse_object, r, b.id + b"~1")
+
+    def test_tag_dereference(self) -> None:
+        r = MemoryRepo()
+        [c1] = build_commit_graph(r.object_store, [[1]])
+        # Create an annotated tag
+        tag = Tag()
+        tag.name = b"v1.0"
+        tag.message = b"Test tag"
+        tag.tag_time = 1234567890
+        tag.tag_timezone = 0
+        tag.object = (Commit, c1.id)
+        tag.tagger = b"Test Tagger <test@example.com>"
+        r.object_store.add_object(tag)
+        # ^{} dereferences the tag
+        self.assertEqual(c1, parse_object(r, tag.id + b"^{}"))
+
+    def test_nested_tag_dereference(self) -> None:
+        r = MemoryRepo()
+        [c1] = build_commit_graph(r.object_store, [[1]])
+        # Create a tag pointing to a commit
+        tag1 = Tag()
+        tag1.name = b"v1.0"
+        tag1.message = b"Test tag"
+        tag1.tag_time = 1234567890
+        tag1.tag_timezone = 0
+        tag1.object = (Commit, c1.id)
+        tag1.tagger = b"Test Tagger <test@example.com>"
+        r.object_store.add_object(tag1)
+
+        # Create another tag pointing to the first tag
+        tag2 = Tag()
+        tag2.name = b"v1.0-release"
+        tag2.message = b"Release tag"
+        tag2.tag_time = 1234567900
+        tag2.tag_timezone = 0
+        tag2.object = (Tag, tag1.id)
+        tag2.tagger = b"Test Tagger <test@example.com>"
+        r.object_store.add_object(tag2)
+
+        # ^{} should recursively dereference to the commit
+        self.assertEqual(c1, parse_object(r, tag2.id + b"^{}"))
+
+    def test_path_in_tree(self) -> None:
+        r = MemoryRepo()
+        # Create a blob
+        b = Blob.from_string(b"Test content")
+
+        # Create a commit with the blob in its tree
+        [c1] = build_commit_graph(r.object_store, [[1]], trees={1: [(b"test.txt", b)]})
+
+        # HEAD:test.txt should return the blob
+        r.refs[b"HEAD"] = c1.id
+        result = parse_object(r, b"HEAD:test.txt")
+        self.assertEqual(b"Test content", result.data)
+
+    def test_path_in_tree_nested(self) -> None:
+        r = MemoryRepo()
+        # Create blobs
+        b1 = Blob.from_string(b"Content 1")
+        b2 = Blob.from_string(b"Content 2")
+
+        # For nested trees, we need to create them manually
+        # Create subtree
+        subtree = Tree()
+        subtree.add(b"file.txt", 0o100644, b1.id)
+        r.object_store.add_object(b1)
+        r.object_store.add_object(subtree)
+
+        # Create main tree
+        main_tree = Tree()
+        main_tree.add(b"README", 0o100644, b2.id)
+        main_tree.add(b"subdir", 0o040000, subtree.id)
+        r.object_store.add_object(b2)
+        r.object_store.add_object(main_tree)
+
+        # Create commit with our tree
+        c = Commit()
+        c.tree = main_tree.id
+        c.author = c.committer = b"Test User <test@example.com>"
+        c.author_time = c.commit_time = 1234567890
+        c.author_timezone = c.commit_timezone = 0
+        c.message = b"Test commit"
+        r.object_store.add_object(c)
+
+        # Lookup nested path
+        result = parse_object(r, c.id + b":subdir/file.txt")
+        self.assertEqual(b"Content 1", result.data)
+
+    def test_reflog_lookup(self) -> None:
+        # Use a real repo for reflog testing
+        import tempfile
+
+        from dulwich.repo import Repo
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            r = Repo.init_bare(tmpdir)
+            c1, c2, c3 = build_commit_graph(r.object_store, [[1], [2, 1], [3, 2]])
+
+            # Write reflog entries using the repo's _write_reflog method
+            # These are written in chronological order (oldest first)
+            r._write_reflog(
+                b"HEAD",
+                None,
+                c1.id,
+                b"Test User <test@example.com>",
+                1234567890,
+                0,
+                b"commit: Initial commit",
+            )
+            r._write_reflog(
+                b"HEAD",
+                c1.id,
+                c2.id,
+                b"Test User <test@example.com>",
+                1234567891,
+                0,
+                b"commit: Second commit",
+            )
+            r._write_reflog(
+                b"HEAD",
+                c2.id,
+                c3.id,
+                b"Test User <test@example.com>",
+                1234567892,
+                0,
+                b"commit: Third commit",
+            )
+
+            # HEAD@{0} is the most recent (c3)
+            self.assertEqual(c3, parse_object(r, b"HEAD@{0}"))
+            # HEAD@{1} is the second most recent (c2)
+            self.assertEqual(c2, parse_object(r, b"HEAD@{1}"))
+            # HEAD@{2} is the third/oldest (c1)
+            self.assertEqual(c1, parse_object(r, b"HEAD@{2}"))
+
 
 class ParseCommitRangeTests(TestCase):
     """Test parse_commit_range."""