Просмотр исходного кода

Add support for core.commitGraph configuration setting

This implements Git's core.commitGraph configuration setting which controls
whether commit-graph files are used for performance optimization. When enabled
(default), commit graph files are used to speed up parent lookups in operations
like find_shallow(), get_depth(), and commit ancestry traversal.

Also adds write_commit_graph() porcelain function to generate commit graphs.
Jelmer Vernooij 1 месяц назад
Родитель
Сommit
f7cf85bab9
6 измененных файлов с 452 добавлено и 10 удалено
  1. 4 0
      NEWS
  2. 53 10
      dulwich/object_store.py
  3. 8 0
      dulwich/pack.py
  4. 16 0
      dulwich/porcelain.py
  5. 261 0
      tests/test_object_store.py
  6. 110 0
      tests/test_porcelain.py

+ 4 - 0
NEWS

@@ -1,5 +1,9 @@
 0.23.3	UNRELEASED
 
+ * Add support for ``core.commitGraph`` configuration setting to control
+   whether commit-graph files are used for performance optimization.
+   (Jelmer Vernooij)
+
  * Add ``reflog`` command in porcelain. (Jelmer Vernooij)
 
 0.23.2	2025-07-07

+ 53 - 10
dulwich/object_store.py

@@ -108,10 +108,19 @@ def find_shallow(store, heads, depth):
         these sets may overlap if a commit is reachable along multiple paths.
     """
     parents = {}
+    commit_graph = store.get_commit_graph()
 
     def get_parents(sha):
         result = parents.get(sha, None)
         if not result:
+            # Try to use commit graph first if available
+            if commit_graph:
+                graph_parents = commit_graph.get_parents(sha)
+                if graph_parents is not None:
+                    result = graph_parents
+                    parents[sha] = result
+                    return result
+            # Fall back to loading the object
             result = store[sha].parents
             parents[sha] = result
         return result
@@ -159,16 +168,26 @@ def get_depth(
         return 0
     current_depth = 1
     queue = [(head, current_depth)]
+    commit_graph = store.get_commit_graph()
+
     while queue and (max_depth is None or current_depth < max_depth):
         e, depth = queue.pop(0)
         current_depth = max(current_depth, depth)
-        cmt = store[e]
-        if isinstance(cmt, Tag):
-            _cls, sha = cmt.object
-            cmt = store[sha]
-        queue.extend(
-            (parent, depth + 1) for parent in get_parents(cmt) if parent in store
-        )
+
+        # Try to use commit graph for parent lookup if available
+        parents = None
+        if commit_graph:
+            parents = commit_graph.get_parents(e)
+
+        if parents is None:
+            # Fall back to loading the object
+            cmt = store[e]
+            if isinstance(cmt, Tag):
+                _cls, sha = cmt.object
+                cmt = store[sha]
+            parents = get_parents(cmt)
+
+        queue.extend((parent, depth + 1) for parent in parents if parent in store)
     return current_depth
 
 
@@ -914,6 +933,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
         # Commit graph support - lazy loaded
         self._commit_graph = None
+        self._use_commit_graph = True  # Default to true
 
     def __repr__(self) -> str:
         return f"<{self.__class__.__name__}({self.path!r})>"
@@ -942,9 +962,15 @@ class DiskObjectStore(PackBasedObjectStore):
             pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())
         except KeyError:
             pack_index_version = None
-        return cls(
+
+        # Read core.commitGraph setting
+        use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
+
+        instance = cls(
             path, loose_compression_level, pack_compression_level, pack_index_version
         )
+        instance._use_commit_graph = use_commit_graph
+        return instance
 
     @property
     def alternates(self):
@@ -1310,6 +1336,9 @@ class DiskObjectStore(PackBasedObjectStore):
         Returns:
           CommitGraph object if available, None otherwise
         """
+        if not self._use_commit_graph:
+            return None
+
         if self._commit_graph is None:
             from .commit_graph import read_commit_graph
 
@@ -2146,6 +2175,10 @@ def _collect_ancestors(
     commits = set()
     queue = []
     queue.extend(heads)
+
+    # Try to use commit graph if available
+    commit_graph = store.get_commit_graph()
+
     while queue:
         e = queue.pop(0)
         if e in common:
@@ -2154,8 +2187,18 @@ def _collect_ancestors(
             commits.add(e)
             if e in shallow:
                 continue
-            cmt = store[e]
-            queue.extend(get_parents(cmt))
+
+            # Try to use commit graph for parent lookup
+            parents = None
+            if commit_graph:
+                parents = commit_graph.get_parents(e)
+
+            if parents is None:
+                # Fall back to loading the object
+                cmt = store[e]
+                parents = get_parents(cmt)
+
+            queue.extend(parents)
     return (commits, bases)
 
 

+ 8 - 0
dulwich/pack.py

@@ -129,6 +129,14 @@ class ObjectContainer(Protocol):
     def __getitem__(self, sha1: bytes) -> ShaFile:
         """Retrieve an object."""
 
+    def get_commit_graph(self):
+        """Get the commit graph for this object store.
+
+        Returns:
+          CommitGraph object if available, None otherwise
+        """
+        return None
+
 
 class PackedObjectContainer(ObjectContainer):
     def get_unpacked_object(

+ 16 - 0
dulwich/porcelain.py

@@ -62,6 +62,7 @@ Currently implemented:
  * tag{_create,_delete,_list}
  * upload_pack
  * update_server_info
+ * write_commit_graph
  * status
  * symbolic_ref
 
@@ -414,6 +415,21 @@ def update_server_info(repo=".") -> None:
         server_update_server_info(r)
 
 
+def write_commit_graph(repo=".", reachable=True) -> None:
+    """Write a commit graph file for a repository.
+
+    Args:
+      repo: path to the repository or a Repo object
+      reachable: if True, include all commits reachable from refs.
+                 if False, only include direct ref targets.
+    """
+    with open_repo_closing(repo) as r:
+        # Get all refs
+        refs = list(r.refs.as_dict().values())
+        if refs:
+            r.object_store.write_commit_graph(refs, reachable=reachable)
+
+
 def symbolic_ref(repo, ref_name, force=False) -> None:
     """Set git symbolic ref into HEAD.
 

+ 261 - 0
tests/test_object_store.py

@@ -505,6 +505,267 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         # Verify the orphaned file was cleaned up
         self.assertFalse(os.path.exists(tmp_pack_path))
 
+    def test_commit_graph_enabled_by_default(self) -> None:
+        """Test that commit graph is enabled by default."""
+        from dulwich.config import ConfigDict
+
+        config = ConfigDict()
+        store = DiskObjectStore.from_config(self.store_dir, config)
+        self.addCleanup(store.close)
+
+        # Should be enabled by default
+        self.assertTrue(store._use_commit_graph)
+
+    def test_commit_graph_disabled_by_config(self) -> None:
+        """Test that commit graph can be disabled via config."""
+        from dulwich.config import ConfigDict
+
+        config = ConfigDict()
+        config[(b"core",)] = {b"commitGraph": b"false"}
+        store = DiskObjectStore.from_config(self.store_dir, config)
+        self.addCleanup(store.close)
+
+        # Should be disabled
+        self.assertFalse(store._use_commit_graph)
+
+        # get_commit_graph should return None when disabled
+        self.assertIsNone(store.get_commit_graph())
+
+    def test_commit_graph_enabled_by_config(self) -> None:
+        """Test that commit graph can be explicitly enabled via config."""
+        from dulwich.config import ConfigDict
+
+        config = ConfigDict()
+        config[(b"core",)] = {b"commitGraph": b"true"}
+        store = DiskObjectStore.from_config(self.store_dir, config)
+        self.addCleanup(store.close)
+
+        # Should be enabled
+        self.assertTrue(store._use_commit_graph)
+
+    def test_commit_graph_usage_in_find_shallow(self) -> None:
+        """Test that find_shallow uses commit graph when available."""
+        import time
+
+        from dulwich.object_store import find_shallow
+        from dulwich.objects import Commit
+
+        # Create a simple commit chain: c1 -> c2 -> c3
+        ts = int(time.time())
+        c1 = make_object(
+            Commit,
+            message=b"commit 1",
+            tree=b"1" * 40,
+            parents=[],
+            author=b"Test Author <test@example.com>",
+            committer=b"Test Committer <test@example.com>",
+            commit_time=ts,
+            commit_timezone=0,
+            author_time=ts,
+            author_timezone=0,
+        )
+
+        c2 = make_object(
+            Commit,
+            message=b"commit 2",
+            tree=b"2" * 40,
+            parents=[c1.id],
+            author=b"Test Author <test@example.com>",
+            committer=b"Test Committer <test@example.com>",
+            commit_time=ts + 1,
+            commit_timezone=0,
+            author_time=ts + 1,
+            author_timezone=0,
+        )
+
+        c3 = make_object(
+            Commit,
+            message=b"commit 3",
+            tree=b"3" * 40,
+            parents=[c2.id],
+            author=b"Test Author <test@example.com>",
+            committer=b"Test Committer <test@example.com>",
+            commit_time=ts + 2,
+            commit_timezone=0,
+            author_time=ts + 2,
+            author_timezone=0,
+        )
+
+        self.store.add_objects([(c1, None), (c2, None), (c3, None)])
+
+        # Write a commit graph
+        self.store.write_commit_graph([c1.id, c2.id, c3.id])
+
+        # Verify commit graph was written
+        commit_graph = self.store.get_commit_graph()
+        self.assertIsNotNone(commit_graph)
+        self.assertEqual(3, len(commit_graph))
+
+        # Test find_shallow with depth
+        # With depth 2 starting from c3:
+        # - depth 1 includes c3 itself (not shallow)
+        # - depth 2 includes c3 and c2 (not shallow)
+        # - c1 is at depth 3, so it's marked as shallow
+        shallow, not_shallow = find_shallow(self.store, [c3.id], 2)
+
+        # c2 should be marked as shallow since it's at the depth boundary
+        self.assertEqual({c2.id}, shallow)
+        self.assertEqual({c3.id}, not_shallow)
+
+    def test_commit_graph_end_to_end(self) -> None:
+        """Test end-to-end commit graph generation and usage."""
+        import os
+        import time
+
+        from dulwich.object_store import get_depth
+        from dulwich.objects import Blob, Commit, Tree
+
+        # Create a more complex commit history:
+        #   c1 -- c2 -- c4
+        #     \        /
+        #      \-- c3 -/
+
+        ts = int(time.time())
+
+        # Create some blobs and trees for the commits
+        blob1 = make_object(Blob, data=b"content 1")
+        blob2 = make_object(Blob, data=b"content 2")
+        blob3 = make_object(Blob, data=b"content 3")
+        blob4 = make_object(Blob, data=b"content 4")
+
+        tree1 = make_object(Tree)
+        tree1[b"file1.txt"] = (0o100644, blob1.id)
+
+        tree2 = make_object(Tree)
+        tree2[b"file1.txt"] = (0o100644, blob1.id)
+        tree2[b"file2.txt"] = (0o100644, blob2.id)
+
+        tree3 = make_object(Tree)
+        tree3[b"file1.txt"] = (0o100644, blob1.id)
+        tree3[b"file3.txt"] = (0o100644, blob3.id)
+
+        tree4 = make_object(Tree)
+        tree4[b"file1.txt"] = (0o100644, blob1.id)
+        tree4[b"file2.txt"] = (0o100644, blob2.id)
+        tree4[b"file3.txt"] = (0o100644, blob3.id)
+        tree4[b"file4.txt"] = (0o100644, blob4.id)
+
+        # Add all objects to store
+        self.store.add_objects(
+            [
+                (blob1, None),
+                (blob2, None),
+                (blob3, None),
+                (blob4, None),
+                (tree1, None),
+                (tree2, None),
+                (tree3, None),
+                (tree4, None),
+            ]
+        )
+
+        # Create commits
+        c1 = make_object(
+            Commit,
+            message=b"Initial commit",
+            tree=tree1.id,
+            parents=[],
+            author=b"Test Author <test@example.com>",
+            committer=b"Test Committer <test@example.com>",
+            commit_time=ts,
+            commit_timezone=0,
+            author_time=ts,
+            author_timezone=0,
+        )
+
+        c2 = make_object(
+            Commit,
+            message=b"Second commit",
+            tree=tree2.id,
+            parents=[c1.id],
+            author=b"Test Author <test@example.com>",
+            committer=b"Test Committer <test@example.com>",
+            commit_time=ts + 10,
+            commit_timezone=0,
+            author_time=ts + 10,
+            author_timezone=0,
+        )
+
+        c3 = make_object(
+            Commit,
+            message=b"Branch commit",
+            tree=tree3.id,
+            parents=[c1.id],
+            author=b"Test Author <test@example.com>",
+            committer=b"Test Committer <test@example.com>",
+            commit_time=ts + 20,
+            commit_timezone=0,
+            author_time=ts + 20,
+            author_timezone=0,
+        )
+
+        c4 = make_object(
+            Commit,
+            message=b"Merge commit",
+            tree=tree4.id,
+            parents=[c2.id, c3.id],
+            author=b"Test Author <test@example.com>",
+            committer=b"Test Committer <test@example.com>",
+            commit_time=ts + 30,
+            commit_timezone=0,
+            author_time=ts + 30,
+            author_timezone=0,
+        )
+
+        self.store.add_objects([(c1, None), (c2, None), (c3, None), (c4, None)])
+
+        # First, verify operations work without commit graph
+        # Check depth calculation
+        depth_before = get_depth(self.store, c4.id)
+        self.assertEqual(3, depth_before)  # c4 -> c2/c3 -> c1
+
+        # Generate commit graph
+        self.store.write_commit_graph([c1.id, c2.id, c3.id, c4.id])
+
+        # Verify commit graph file was created
+        graph_path = os.path.join(self.store.path, "info", "commit-graph")
+        self.assertTrue(os.path.exists(graph_path))
+
+        # Load and verify commit graph
+        commit_graph = self.store.get_commit_graph()
+        self.assertIsNotNone(commit_graph)
+        self.assertEqual(4, len(commit_graph))
+
+        # Verify commit graph contains correct parent information
+        c1_entry = commit_graph.get_entry_by_oid(c1.id)
+        self.assertIsNotNone(c1_entry)
+        self.assertEqual([], c1_entry.parents)
+
+        c2_entry = commit_graph.get_entry_by_oid(c2.id)
+        self.assertIsNotNone(c2_entry)
+        self.assertEqual([c1.id], c2_entry.parents)
+
+        c3_entry = commit_graph.get_entry_by_oid(c3.id)
+        self.assertIsNotNone(c3_entry)
+        self.assertEqual([c1.id], c3_entry.parents)
+
+        c4_entry = commit_graph.get_entry_by_oid(c4.id)
+        self.assertIsNotNone(c4_entry)
+        self.assertEqual([c2.id, c3.id], c4_entry.parents)
+
+        # Test that operations now use the commit graph
+        # Check depth calculation again - should use commit graph
+        depth_after = get_depth(self.store, c4.id)
+        self.assertEqual(3, depth_after)
+
+        # Test with commit graph disabled
+        self.store._use_commit_graph = False
+        self.assertIsNone(self.store.get_commit_graph())
+
+        # Operations should still work without commit graph
+        depth_disabled = get_depth(self.store, c4.id)
+        self.assertEqual(3, depth_disabled)
+
 
 class TreeLookupPathTests(TestCase):
     def setUp(self) -> None:

+ 110 - 0
tests/test_porcelain.py

@@ -7557,3 +7557,113 @@ class ReflogTest(PorcelainTestCase):
         # Should have seen at least HEAD and refs/heads/master
         self.assertIn(b"HEAD", refs_seen)
         self.assertIn(b"refs/heads/master", refs_seen)
+
+
+class WriteCommitGraphTests(PorcelainTestCase):
+    """Tests for the write_commit_graph porcelain function."""
+
+    def test_write_commit_graph_empty_repo(self):
+        """Test writing commit graph on empty repository."""
+        # Should not fail on empty repo
+        porcelain.write_commit_graph(self.repo_path)
+
+        # No commit graph should be created for empty repo
+        graph_path = os.path.join(
+            self.repo_path, ".git", "objects", "info", "commit-graph"
+        )
+        self.assertFalse(os.path.exists(graph_path))
+
+    def test_write_commit_graph_with_commits(self):
+        """Test writing commit graph with commits."""
+        # Create some commits
+        c1, c2, c3 = build_commit_graph(
+            self.repo.object_store,
+            [[1], [2, 1], [3, 1, 2]],
+            attrs={
+                1: {"message": b"First commit"},
+                2: {"message": b"Second commit"},
+                3: {"message": b"Merge commit"},
+            },
+        )
+        self.repo.refs[b"refs/heads/master"] = c3.id
+        self.repo.refs[b"refs/heads/branch"] = c2.id
+
+        # Write commit graph
+        porcelain.write_commit_graph(self.repo_path)
+
+        # Verify commit graph was created
+        graph_path = os.path.join(
+            self.repo_path, ".git", "objects", "info", "commit-graph"
+        )
+        self.assertTrue(os.path.exists(graph_path))
+
+        # Load and verify the commit graph
+        from dulwich.commit_graph import read_commit_graph
+
+        commit_graph = read_commit_graph(graph_path)
+        self.assertIsNotNone(commit_graph)
+        self.assertEqual(3, len(commit_graph))
+
+        # Verify all commits are in the graph
+        for commit_obj in [c1, c2, c3]:
+            entry = commit_graph.get_entry_by_oid(commit_obj.id)
+            self.assertIsNotNone(entry)
+
+    def test_write_commit_graph_config_disabled(self):
+        """Test that commit graph is not written when disabled by config."""
+        # Create a commit
+        (c1,) = build_commit_graph(
+            self.repo.object_store, [[1]], attrs={1: {"message": b"First commit"}}
+        )
+        self.repo.refs[b"refs/heads/master"] = c1.id
+
+        # Disable commit graph in config
+        config = self.repo.get_config()
+        config.set((b"core",), b"commitGraph", b"false")
+        config.write_to_path()
+
+        # Write commit graph (should respect config)
+        porcelain.write_commit_graph(self.repo_path)
+
+        # Verify commit graph still gets written
+        # (porcelain.write_commit_graph explicitly writes regardless of config)
+        graph_path = os.path.join(
+            self.repo_path, ".git", "objects", "info", "commit-graph"
+        )
+        self.assertTrue(os.path.exists(graph_path))
+
+    def test_write_commit_graph_reachable_false(self):
+        """Test writing commit graph with reachable=False."""
+        # Create commits
+        c1, c2, c3 = build_commit_graph(
+            self.repo.object_store,
+            [[1], [2, 1], [3, 2]],
+            attrs={
+                1: {"message": b"First commit"},
+                2: {"message": b"Second commit"},
+                3: {"message": b"Third commit"},
+            },
+        )
+        # Only reference the third commit
+        self.repo.refs[b"refs/heads/master"] = c3.id
+
+        # Write commit graph with reachable=False
+        porcelain.write_commit_graph(self.repo_path, reachable=False)
+
+        # Verify commit graph was created
+        graph_path = os.path.join(
+            self.repo_path, ".git", "objects", "info", "commit-graph"
+        )
+        self.assertTrue(os.path.exists(graph_path))
+
+        # Load and verify the commit graph
+        from dulwich.commit_graph import read_commit_graph
+
+        commit_graph = read_commit_graph(graph_path)
+        self.assertIsNotNone(commit_graph)
+
+        # With reachable=False, only directly referenced commits should be included
+        # In this case, only c3 (from refs/heads/master)
+        self.assertEqual(1, len(commit_graph))
+        entry = commit_graph.get_entry_by_oid(c3.id)
+        self.assertIsNotNone(entry)