Browse Source

Add basic commit graph support. See #1191 (#1564)

Jelmer Vernooij 2 months ago
parent
commit
5fb1481aed

+ 3 - 0
NEWS

@@ -49,6 +49,9 @@
  * Handle trailing backslashes in config files appropriately.
    (Jelmer Vernooij, #1088)
 
+ * Add basic support for reading git commit graphs.
+   (Jelmer Vernooij, #1191)
+
 0.22.8	2025-03-02
 
  * Allow passing in plain strings to ``dulwich.porcelain.tag_create``

+ 595 - 0
dulwich/commit_graph.py

@@ -0,0 +1,595 @@
+# commit_graph.py -- Git commit graph file format support
+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+
+"""Git commit graph file format support.
+
+Git's commit graph files store commit metadata and generation numbers
+for faster graph traversal operations like merge-base computation.
+
+The commit graph format is documented at:
+https://git-scm.com/docs/gitformat-commit-graph
+"""
+
+import os
+import struct
+from typing import BinaryIO, Optional, Union
+
+from .objects import ObjectID, hex_to_sha, sha_to_hex
+
+# File format constants
+COMMIT_GRAPH_SIGNATURE = b"CGPH"
+COMMIT_GRAPH_VERSION = 1
+HASH_VERSION_SHA1 = 1
+HASH_VERSION_SHA256 = 2
+
+# Chunk IDs
+CHUNK_OID_FANOUT = b"OIDF"
+CHUNK_OID_LOOKUP = b"OIDL"
+CHUNK_COMMIT_DATA = b"CDAT"
+CHUNK_GENERATION_DATA = b"GDA2"
+CHUNK_GENERATION_DATA_OVERFLOW = b"GDO2"
+CHUNK_EXTRA_EDGE_LIST = b"EDGE"
+CHUNK_BLOOM_FILTER_INDEX = b"BIDX"
+CHUNK_BLOOM_FILTER_DATA = b"BDAT"
+CHUNK_BASE_GRAPHS_LIST = b"BASE"
+
+# Generation number constants
+GENERATION_NUMBER_INFINITY = 0xFFFFFFFF
+GENERATION_NUMBER_ZERO = 0
+GENERATION_NUMBER_V1_MAX = 0x3FFFFFFF
+
+# Parent encoding constants
+GRAPH_PARENT_MISSING = 0x70000000
+GRAPH_PARENT_NONE = 0x70000000
+GRAPH_EXTRA_EDGES_NEEDED = 0x80000000
+GRAPH_LAST_EDGE = 0x80000000
+
+
+class CommitGraphEntry:
+    """Represents a single commit entry in the commit graph."""
+
+    def __init__(
+        self,
+        commit_id: ObjectID,
+        tree_id: ObjectID,
+        parents: list[ObjectID],
+        generation: int,
+        commit_time: int,
+    ) -> None:
+        self.commit_id = commit_id
+        self.tree_id = tree_id
+        self.parents = parents
+        self.generation = generation
+        self.commit_time = commit_time
+
+    def __repr__(self) -> str:
+        return (
+            f"CommitGraphEntry(commit_id={self.commit_id!r}, "
+            f"tree_id={self.tree_id!r}, parents={self.parents!r}, "
+            f"generation={self.generation}, commit_time={self.commit_time})"
+        )
+
+
+class CommitGraphChunk:
+    """Represents a chunk in the commit graph file."""
+
+    def __init__(self, chunk_id: bytes, data: bytes) -> None:
+        self.chunk_id = chunk_id
+        self.data = data
+
+    def __repr__(self) -> str:
+        return f"CommitGraphChunk(chunk_id={self.chunk_id!r}, size={len(self.data)})"
+
+
+class CommitGraph:
+    """Git commit graph file reader/writer."""
+
+    def __init__(self, hash_version: int = HASH_VERSION_SHA1) -> None:
+        self.hash_version = hash_version
+        self.chunks: dict[bytes, CommitGraphChunk] = {}
+        self.entries: list[CommitGraphEntry] = []
+        self._oid_to_index: dict[ObjectID, int] = {}
+        self._hash_size = 20 if hash_version == HASH_VERSION_SHA1 else 32
+
+    @classmethod
+    def from_file(cls, f: BinaryIO) -> "CommitGraph":
+        """Read commit graph from file."""
+        graph = cls()
+        graph._read_from_file(f)
+        return graph
+
+    def _read_from_file(self, f: BinaryIO) -> None:
+        """Read commit graph data from file."""
+        # Read header
+        signature = f.read(4)
+        if signature != COMMIT_GRAPH_SIGNATURE:
+            raise ValueError(f"Invalid commit graph signature: {signature!r}")
+
+        version = struct.unpack(">B", f.read(1))[0]
+        if version != COMMIT_GRAPH_VERSION:
+            raise ValueError(f"Unsupported commit graph version: {version}")
+
+        self.hash_version = struct.unpack(">B", f.read(1))[0]
+        if self.hash_version not in (HASH_VERSION_SHA1, HASH_VERSION_SHA256):
+            raise ValueError(f"Unsupported hash version: {self.hash_version}")
+
+        self._hash_size = 20 if self.hash_version == HASH_VERSION_SHA1 else 32
+
+        num_chunks = struct.unpack(">B", f.read(1))[0]
+        struct.unpack(">B", f.read(1))[0]
+
+        # Read table of contents
+        toc_entries = []
+        for _ in range(num_chunks + 1):  # +1 for terminating entry
+            chunk_id = f.read(4)
+            offset = struct.unpack(">Q", f.read(8))[0]
+            toc_entries.append((chunk_id, offset))
+
+        # Read chunks
+        # Offsets in TOC are absolute from start of file
+        for i in range(num_chunks):
+            chunk_id, offset = toc_entries[i]
+            next_offset = toc_entries[i + 1][1]
+            chunk_size = next_offset - offset
+
+            f.seek(offset)
+            chunk_data = f.read(chunk_size)
+            self.chunks[chunk_id] = CommitGraphChunk(chunk_id, chunk_data)
+
+        # Parse chunks
+        self._parse_chunks()
+
+    def _parse_chunks(self) -> None:
+        """Parse chunk data into entries."""
+        if CHUNK_OID_LOOKUP not in self.chunks:
+            raise ValueError("Missing required OID lookup chunk")
+        if CHUNK_COMMIT_DATA not in self.chunks:
+            raise ValueError("Missing required commit data chunk")
+
+        # Parse OID lookup chunk
+        oid_lookup_data = self.chunks[CHUNK_OID_LOOKUP].data
+        num_commits = len(oid_lookup_data) // self._hash_size
+
+        oids = []
+        for i in range(num_commits):
+            start = i * self._hash_size
+            end = start + self._hash_size
+            oid = oid_lookup_data[start:end]
+            oids.append(oid)
+            self._oid_to_index[oid] = i
+
+        # Parse commit data chunk
+        commit_data = self.chunks[CHUNK_COMMIT_DATA].data
+        expected_size = num_commits * (self._hash_size + 16)
+        if len(commit_data) != expected_size:
+            raise ValueError(
+                f"Invalid commit data chunk size: {len(commit_data)}, expected {expected_size}"
+            )
+
+        self.entries = []
+        for i in range(num_commits):
+            offset = i * (self._hash_size + 16)
+
+            # Tree OID
+            tree_id = commit_data[offset : offset + self._hash_size]
+            offset += self._hash_size
+
+            # Parent positions (2 x 4 bytes)
+            parent1_pos, parent2_pos = struct.unpack(
+                ">LL", commit_data[offset : offset + 8]
+            )
+            offset += 8
+
+            # Generation number and commit time (2 x 4 bytes)
+            gen_and_time = struct.unpack(">LL", commit_data[offset : offset + 8])
+            generation = gen_and_time[0] >> 2  # Upper 30 bits
+            commit_time = gen_and_time[1] | (
+                (gen_and_time[0] & 0x3) << 32
+            )  # 34 bits total
+
+            # Parse parents
+            parents = []
+            if parent1_pos < GRAPH_PARENT_MISSING:
+                if parent1_pos >= len(oids):
+                    raise ValueError(f"Invalid parent1 position: {parent1_pos}")
+                parents.append(oids[parent1_pos])
+
+            if parent2_pos < GRAPH_PARENT_MISSING:
+                if parent2_pos >= len(oids):
+                    raise ValueError(f"Invalid parent2 position: {parent2_pos}")
+                parents.append(oids[parent2_pos])
+            elif parent2_pos >= GRAPH_EXTRA_EDGES_NEEDED:
+                # Handle extra edges (3+ parents)
+                edge_offset = parent2_pos & ~GRAPH_EXTRA_EDGES_NEEDED
+                parents.extend(self._parse_extra_edges(edge_offset, oids))
+
+            entry = CommitGraphEntry(
+                commit_id=sha_to_hex(oids[i]),
+                tree_id=sha_to_hex(tree_id),
+                parents=[sha_to_hex(p) for p in parents],
+                generation=generation,
+                commit_time=commit_time,
+            )
+            self.entries.append(entry)
+
+    def _parse_extra_edges(self, offset: int, oids: list[bytes]) -> list[bytes]:
+        """Parse extra parent edges for commits with 3+ parents."""
+        if CHUNK_EXTRA_EDGE_LIST not in self.chunks:
+            return []
+
+        edge_data = self.chunks[CHUNK_EXTRA_EDGE_LIST].data
+        parents = []
+
+        while offset < len(edge_data):
+            parent_pos = struct.unpack(">L", edge_data[offset : offset + 4])[0]
+            offset += 4
+
+            if parent_pos & GRAPH_LAST_EDGE:
+                parent_pos &= ~GRAPH_LAST_EDGE
+                if parent_pos < len(oids):
+                    parents.append(oids[parent_pos])
+                break
+            else:
+                if parent_pos < len(oids):
+                    parents.append(oids[parent_pos])
+
+        return parents
+
+    def get_entry_by_oid(self, oid: ObjectID) -> Optional[CommitGraphEntry]:
+        """Get commit graph entry by commit OID."""
+        # Convert hex ObjectID to binary if needed for lookup
+        if isinstance(oid, bytes) and len(oid) == 40:
+            # Input is hex ObjectID, convert to binary for internal lookup
+            lookup_oid = hex_to_sha(oid)
+        else:
+            # Input is already binary
+            lookup_oid = oid
+        index = self._oid_to_index.get(lookup_oid)
+        if index is not None:
+            return self.entries[index]
+        return None
+
+    def get_generation_number(self, oid: ObjectID) -> Optional[int]:
+        """Get generation number for a commit."""
+        entry = self.get_entry_by_oid(oid)
+        return entry.generation if entry else None
+
+    def get_parents(self, oid: ObjectID) -> Optional[list[ObjectID]]:
+        """Get parent commit IDs for a commit."""
+        entry = self.get_entry_by_oid(oid)
+        return entry.parents if entry else None
+
+    def write_to_file(self, f: BinaryIO) -> None:
+        """Write commit graph to file."""
+        if not self.entries:
+            raise ValueError("Cannot write empty commit graph")
+
+        # Sort entries by commit ID for consistent output
+        sorted_entries = sorted(self.entries, key=lambda e: e.commit_id)
+
+        # Build OID lookup chunk
+        oid_lookup_data = b""
+        for entry in sorted_entries:
+            oid_lookup_data += hex_to_sha(entry.commit_id)
+
+        # Build commit data chunk
+        commit_data = b""
+        # Create OID to index mapping for parent lookups
+        oid_to_index = {entry.commit_id: i for i, entry in enumerate(sorted_entries)}
+
+        for entry in sorted_entries:
+            # Tree OID (20 bytes)
+            commit_data += hex_to_sha(entry.tree_id)
+
+            # Parent positions (2 x 4 bytes)
+            if len(entry.parents) == 0:
+                parent1_pos = GRAPH_PARENT_MISSING
+                parent2_pos = GRAPH_PARENT_MISSING
+            elif len(entry.parents) == 1:
+                parent1_pos = oid_to_index.get(entry.parents[0], GRAPH_PARENT_MISSING)
+                parent2_pos = GRAPH_PARENT_MISSING
+            elif len(entry.parents) == 2:
+                parent1_pos = oid_to_index.get(entry.parents[0], GRAPH_PARENT_MISSING)
+                parent2_pos = oid_to_index.get(entry.parents[1], GRAPH_PARENT_MISSING)
+            else:
+                # More than 2 parents - would need extra edge list chunk
+                # For now, just store first two parents
+                parent1_pos = oid_to_index.get(entry.parents[0], GRAPH_PARENT_MISSING)
+                parent2_pos = oid_to_index.get(entry.parents[1], GRAPH_PARENT_MISSING)
+
+            commit_data += struct.pack(">LL", parent1_pos, parent2_pos)
+
+            # Generation and commit time (2 x 4 bytes)
+            gen_and_time = (entry.generation << 2) | (entry.commit_time >> 32)
+            commit_time_lower = entry.commit_time & 0xFFFFFFFF
+            commit_data += struct.pack(">LL", gen_and_time, commit_time_lower)
+
+        # Build fanout table
+        fanout_data = b""
+        fanout_counts = [0] * 256
+        for i, entry in enumerate(sorted_entries):
+            commit_oid_bytes = hex_to_sha(entry.commit_id)
+            fanout_counts[commit_oid_bytes[0]] = i + 1
+
+        # Fill in gaps - each fanout entry should be cumulative
+        for i in range(1, 256):
+            if fanout_counts[i] == 0:
+                fanout_counts[i] = fanout_counts[i - 1]
+
+        for count in fanout_counts:
+            fanout_data += struct.pack(">L", count)
+
+        # Calculate chunk offsets
+        header_size = (
+            8  # signature + version + hash_version + num_chunks + base_graph_count
+        )
+        toc_size = 4 * 12  # 4 entries (3 chunks + terminator) * 12 bytes each
+
+        chunk1_offset = header_size + toc_size  # OID Fanout
+        chunk2_offset = chunk1_offset + len(fanout_data)  # OID Lookup
+        chunk3_offset = chunk2_offset + len(oid_lookup_data)  # Commit Data
+        terminator_offset = chunk3_offset + len(commit_data)
+
+        # Write header
+        f.write(COMMIT_GRAPH_SIGNATURE)
+        f.write(struct.pack(">B", COMMIT_GRAPH_VERSION))
+        f.write(struct.pack(">B", self.hash_version))
+        f.write(struct.pack(">B", 3))  # 3 chunks
+        f.write(struct.pack(">B", 0))  # 0 base graphs
+
+        # Write table of contents
+        f.write(CHUNK_OID_FANOUT + struct.pack(">Q", chunk1_offset))
+        f.write(CHUNK_OID_LOOKUP + struct.pack(">Q", chunk2_offset))
+        f.write(CHUNK_COMMIT_DATA + struct.pack(">Q", chunk3_offset))
+        f.write(b"\x00\x00\x00\x00" + struct.pack(">Q", terminator_offset))
+
+        # Write chunks
+        f.write(fanout_data)
+        f.write(oid_lookup_data)
+        f.write(commit_data)
+
+    def __len__(self) -> int:
+        """Return number of commits in the graph."""
+        return len(self.entries)
+
+    def __iter__(self):
+        """Iterate over commit graph entries."""
+        return iter(self.entries)
+
+
+def read_commit_graph(path: Union[str, bytes]) -> Optional[CommitGraph]:
+    """Read commit graph from file path."""
+    if isinstance(path, str):
+        path = path.encode()
+
+    if not os.path.exists(path):
+        return None
+
+    with open(path, "rb") as f:
+        return CommitGraph.from_file(f)
+
+
+def find_commit_graph_file(git_dir: Union[str, bytes]) -> Optional[bytes]:
+    """Find commit graph file in a Git repository."""
+    if isinstance(git_dir, str):
+        git_dir = git_dir.encode()
+
+    # Standard location: .git/objects/info/commit-graph
+    commit_graph_path = os.path.join(git_dir, b"objects", b"info", b"commit-graph")
+    if os.path.exists(commit_graph_path):
+        return commit_graph_path
+
+    # Chain files in .git/objects/info/commit-graphs/
+    commit_graphs_dir = os.path.join(git_dir, b"objects", b"info", b"commit-graphs")
+    if os.path.exists(commit_graphs_dir):
+        # Look for graph-{hash}.graph files
+        for filename in os.listdir(commit_graphs_dir):
+            if filename.startswith(b"graph-") and filename.endswith(b".graph"):
+                return os.path.join(commit_graphs_dir, filename)
+
+    return None
+
+
+def generate_commit_graph(object_store, commit_ids: list[ObjectID]) -> CommitGraph:
+    """Generate a commit graph from a set of commits.
+
+    Args:
+        object_store: Object store to retrieve commits from
+        commit_ids: List of commit IDs to include in the graph
+
+    Returns:
+        CommitGraph object containing the specified commits
+    """
+    graph = CommitGraph()
+
+    if not commit_ids:
+        return graph
+
+    # Ensure all commit_ids are in the correct format for object store access
+    # DiskObjectStore expects hex ObjectIDs (40-byte hex strings)
+    normalized_commit_ids = []
+    for commit_id in commit_ids:
+        if isinstance(commit_id, bytes) and len(commit_id) == 40:
+            # Already hex ObjectID
+            normalized_commit_ids.append(commit_id)
+        elif isinstance(commit_id, bytes) and len(commit_id) == 20:
+            # Binary SHA, convert to hex ObjectID
+            normalized_commit_ids.append(sha_to_hex(commit_id))
+        else:
+            # Assume it's already correct format
+            normalized_commit_ids.append(commit_id)
+
+    # Build a map of all commits and their metadata
+    commit_map = {}
+    for commit_id in normalized_commit_ids:
+        try:
+            commit_obj = object_store[commit_id]
+            if commit_obj.type_name != b"commit":
+                continue
+            commit_map[commit_id] = commit_obj
+        except KeyError:
+            # Commit not found, skip
+            continue
+
+    # Calculate generation numbers using topological sort
+    generation_map: dict[bytes, int] = {}
+
+    def calculate_generation(commit_id):
+        if commit_id in generation_map:
+            return generation_map[commit_id]
+
+        if commit_id not in commit_map:
+            # Unknown commit, assume generation 0
+            generation_map[commit_id] = 0
+            return 0
+
+        commit_obj = commit_map[commit_id]
+        if not commit_obj.parents:
+            # Root commit
+            generation_map[commit_id] = 1
+            return 1
+
+        # Calculate based on parents
+        max_parent_gen = 0
+        for parent_id in commit_obj.parents:
+            parent_gen = calculate_generation(parent_id)
+            max_parent_gen = max(max_parent_gen, parent_gen)
+
+        generation = max_parent_gen + 1
+        generation_map[commit_id] = generation
+        return generation
+
+    # Calculate generation numbers for all commits
+    for commit_id in commit_map:
+        calculate_generation(commit_id)
+
+    # Build commit graph entries
+    for commit_id, commit_obj in commit_map.items():
+        # commit_id is already hex ObjectID from normalized_commit_ids
+        commit_hex = commit_id
+
+        # Handle tree ID - might already be hex ObjectID
+        if isinstance(commit_obj.tree, bytes) and len(commit_obj.tree) == 40:
+            tree_hex = commit_obj.tree  # Already hex ObjectID
+        else:
+            tree_hex = sha_to_hex(commit_obj.tree)  # Binary, convert to hex
+
+        # Handle parent IDs - might already be hex ObjectIDs
+        parents_hex = []
+        for parent_id in commit_obj.parents:
+            if isinstance(parent_id, bytes) and len(parent_id) == 40:
+                parents_hex.append(parent_id)  # Already hex ObjectID
+            else:
+                parents_hex.append(sha_to_hex(parent_id))  # Binary, convert to hex
+
+        entry = CommitGraphEntry(
+            commit_id=commit_hex,
+            tree_id=tree_hex,
+            parents=parents_hex,
+            generation=generation_map[commit_id],
+            commit_time=commit_obj.commit_time,
+        )
+        graph.entries.append(entry)
+
+    # Build the OID to index mapping for lookups
+    graph._oid_to_index = {}
+    for i, entry in enumerate(graph.entries):
+        binary_oid = hex_to_sha(entry.commit_id.decode())
+        graph._oid_to_index[binary_oid] = i
+
+    return graph
+
+
+def write_commit_graph(
+    git_dir: Union[str, bytes], object_store, commit_ids: list[ObjectID]
+) -> None:
+    """Write a commit graph file for the given commits.
+
+    Args:
+        git_dir: Git directory path
+        object_store: Object store to retrieve commits from
+        commit_ids: List of commit IDs to include in the graph
+    """
+    if isinstance(git_dir, str):
+        git_dir = git_dir.encode()
+
+    # Generate the commit graph
+    graph = generate_commit_graph(object_store, commit_ids)
+
+    if not graph.entries:
+        return  # Nothing to write
+
+    # Ensure the objects/info directory exists
+    info_dir = os.path.join(git_dir, b"objects", b"info")
+    os.makedirs(info_dir, exist_ok=True)
+
+    # Write using GitFile for atomic operation
+    from .file import GitFile
+
+    graph_path = os.path.join(info_dir, b"commit-graph")
+    with GitFile(graph_path, "wb") as f:
+        graph.write_to_file(f)
+
+
+def get_reachable_commits(
+    object_store, start_commits: list[ObjectID]
+) -> list[ObjectID]:
+    """Get all commits reachable from the given starting commits.
+
+    Args:
+        object_store: Object store to retrieve commits from
+        start_commits: List of starting commit IDs
+
+    Returns:
+        List of all reachable commit IDs (including the starting commits)
+    """
+    visited = set()
+    reachable = []
+    stack = []
+
+    # Normalize commit IDs for object store access and tracking
+    for commit_id in start_commits:
+        if isinstance(commit_id, bytes) and len(commit_id) == 40:
+            # Hex ObjectID - use directly for object store access
+            if commit_id not in visited:
+                stack.append(commit_id)
+        elif isinstance(commit_id, bytes) and len(commit_id) == 20:
+            # Binary SHA, convert to hex ObjectID for object store access
+            hex_id = sha_to_hex(commit_id)
+            if hex_id not in visited:
+                stack.append(hex_id)
+        else:
+            # Assume it's already correct format
+            if commit_id not in visited:
+                stack.append(commit_id)
+
+    while stack:
+        commit_id = stack.pop()
+        if commit_id in visited:
+            continue
+
+        visited.add(commit_id)
+
+        try:
+            commit_obj = object_store[commit_id]
+            if commit_obj.type_name != b"commit":
+                continue
+
+            # Add to reachable list (commit_id is already hex ObjectID)
+            reachable.append(commit_id)
+
+            # Add parents to stack
+            for parent_id in commit_obj.parents:
+                if parent_id not in visited:
+                    stack.append(parent_id)
+        except KeyError:
+            # Commit not found, skip
+            continue
+
+    return reachable

+ 107 - 0
dulwich/object_store.py

@@ -364,6 +364,28 @@ class BaseObjectStore:
             if sha.startswith(prefix):
                 yield sha
 
+    def get_commit_graph(self):
+        """Get the commit graph for this object store.
+
+        Returns:
+          CommitGraph object if available, None otherwise
+        """
+        return None
+
+    def write_commit_graph(self, refs=None, reachable=True) -> None:
+        """Write a commit graph file for this object store.
+
+        Args:
+            refs: List of refs to include. If None, includes all refs from object store.
+            reachable: If True, includes all commits reachable from refs.
+                      If False, only includes the direct ref targets.
+
+        Note:
+            Default implementation does nothing. Subclasses should override
+            this method to provide commit graph writing functionality.
+        """
+        raise NotImplementedError(self.write_commit_graph)
+
 
 class PackBasedObjectStore(BaseObjectStore):
     def __init__(self, pack_compression_level=-1) -> None:
@@ -745,6 +767,9 @@ class DiskObjectStore(PackBasedObjectStore):
         self.loose_compression_level = loose_compression_level
         self.pack_compression_level = pack_compression_level
 
+        # Commit graph support - lazy loaded
+        self._commit_graph = None
+
     def __repr__(self) -> str:
         return f"<{self.__class__.__name__}({self.path!r})>"
 
@@ -1065,6 +1090,88 @@ class DiskObjectStore(PackBasedObjectStore):
                     seen.add(sha)
                     yield sha
 
+    def get_commit_graph(self):
+        """Get the commit graph for this object store.
+
+        Returns:
+          CommitGraph object if available, None otherwise
+        """
+        if self._commit_graph is None:
+            from .commit_graph import read_commit_graph
+
+            # Look for commit graph in our objects directory
+            graph_file = os.path.join(self.path, "info", "commit-graph")
+            if os.path.exists(graph_file):
+                self._commit_graph = read_commit_graph(graph_file)
+        return self._commit_graph
+
+    def write_commit_graph(self, refs=None, reachable=True) -> None:
+        """Write a commit graph file for this object store.
+
+        Args:
+            refs: List of refs to include. If None, includes all refs from object store.
+            reachable: If True, includes all commits reachable from refs.
+                      If False, only includes the direct ref targets.
+        """
+        from .commit_graph import get_reachable_commits
+
+        if refs is None:
+            # Get all commit objects from the object store
+            all_refs = []
+            # Iterate through all objects to find commits
+            for sha in self:
+                try:
+                    obj = self[sha]
+                    if obj.type_name == b"commit":
+                        all_refs.append(sha)
+                except KeyError:
+                    continue
+        else:
+            # Use provided refs
+            all_refs = refs
+
+        if not all_refs:
+            return  # No commits to include
+
+        if reachable:
+            # Get all reachable commits
+            commit_ids = get_reachable_commits(self, all_refs)
+        else:
+            # Just use the direct ref targets - ensure they're hex ObjectIDs
+            commit_ids = []
+            for ref in all_refs:
+                if isinstance(ref, bytes) and len(ref) == 40:
+                    # Already hex ObjectID
+                    commit_ids.append(ref)
+                elif isinstance(ref, bytes) and len(ref) == 20:
+                    # Binary SHA, convert to hex ObjectID
+                    from .objects import sha_to_hex
+
+                    commit_ids.append(sha_to_hex(ref))
+                else:
+                    # Assume it's already correct format
+                    commit_ids.append(ref)
+
+        if commit_ids:
+            # Write commit graph directly to our object store path
+            # Generate the commit graph
+            from .commit_graph import generate_commit_graph
+
+            graph = generate_commit_graph(self, commit_ids)
+
+            if graph.entries:
+                # Ensure the info directory exists
+                info_dir = os.path.join(self.path, "info")
+                os.makedirs(info_dir, exist_ok=True)
+
+                # Write using GitFile for atomic operation
+                graph_path = os.path.join(info_dir, "commit-graph")
+                with GitFile(graph_path, "wb") as f:
+                    graph.write_to_file(f)
+
+            # Clear cached commit graph so it gets reloaded
+            self._commit_graph = None
+
 
 class MemoryObjectStore(BaseObjectStore):
     """Object store that keeps all objects in memory."""

+ 1 - 0
dulwich/porcelain.py

@@ -1825,6 +1825,7 @@ def fetch(
                 prune=prune,
                 prune_tags=prune_tags,
             )
+
     return fetch_result
 
 

+ 11 - 0
dulwich/repo.py

@@ -334,6 +334,9 @@ class ParentsProvider:
         self.grafts = grafts
         self.shallows = set(shallows)
 
+        # Get commit graph once at initialization for performance
+        self.commit_graph = store.get_commit_graph()
+
     def get_parents(self, commit_id, commit=None):
         try:
             return self.grafts[commit_id]
@@ -341,6 +344,14 @@ class ParentsProvider:
             pass
         if commit_id in self.shallows:
             return []
+
+        # Try to use commit graph for faster parent lookup
+        if self.commit_graph:
+            parents = self.commit_graph.get_parents(commit_id)
+            if parents is not None:
+                return parents
+
+        # Fallback to reading the commit object
         if commit is None:
             commit = self.store[commit_id]
         return commit.parents

+ 1 - 7
dulwich/tests/test_object_store.py

@@ -22,7 +22,7 @@
 """Tests for the object store interface."""
 
 from typing import TYPE_CHECKING, Any, Callable
-from unittest import skipUnless
+from unittest.mock import patch
 
 from dulwich.index import commit_tree
 from dulwich.object_store import (
@@ -42,11 +42,6 @@ from .utils import make_object, make_tag
 if TYPE_CHECKING:
     from dulwich.object_store import BaseObjectStore
 
-try:
-    from unittest.mock import patch
-except ImportError:
-    patch = None  # type: ignore
-
 
 testobject = make_object(Blob, data=b"yummy data")
 
@@ -72,7 +67,6 @@ class ObjectStoreTests:
             [], self.store.determine_wants_all({b"refs/heads/foo": b"0" * 40})
         )
 
-    @skipUnless(patch, "Required mock.patch")
     def test_determine_wants_all_depth(self) -> None:
         self.store.add_object(testobject)
         refs = {b"refs/heads/foo": testobject.id}

+ 1 - 0
tests/__init__.py

@@ -120,6 +120,7 @@ def self_test_suite():
         "bundle",
         "client",
         "cloud_gcs",
+        "commit_graph",
         "config",
         "credentials",
         "diff_tree",

+ 1 - 0
tests/compat/__init__.py

@@ -27,6 +27,7 @@ import unittest
 def test_suite():
     names = [
         "client",
+        "commit_graph",
         "pack",
         "patch",
         "porcelain",

+ 461 - 0
tests/compat/test_commit_graph.py

@@ -0,0 +1,461 @@
+# test_commit_graph.py -- Compatibility tests for commit graph functionality
+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+
+"""Compatibility tests for Git commit graph functionality.
+
+These tests verify that dulwich's commit graph implementation behaves
+identically to C Git's implementation.
+"""
+
+import os
+import tempfile
+
+from dulwich.commit_graph import find_commit_graph_file, read_commit_graph
+from dulwich.graph import can_fast_forward, find_merge_base
+from dulwich.repo import Repo
+
+from .utils import CompatTestCase, run_git_or_fail
+
+
+class CommitGraphCompatTests(CompatTestCase):
+    """Compatibility tests for commit graph functionality."""
+
+    # Commit graph was introduced in Git 2.18.0
+    min_git_version = (2, 18, 0)
+
+    def setUp(self):
+        super().setUp()
+        self.test_dir = tempfile.mkdtemp()
+        self.repo_path = os.path.join(self.test_dir, "test-repo")
+
+        # Set up git identity to avoid committer identity errors
+        self.overrideEnv("GIT_COMMITTER_NAME", "Test Author")
+        self.overrideEnv("GIT_COMMITTER_EMAIL", "test@example.com")
+        self.overrideEnv("GIT_AUTHOR_NAME", "Test Author")
+        self.overrideEnv("GIT_AUTHOR_EMAIL", "test@example.com")
+
+    def tearDown(self):
+        from .utils import rmtree_ro
+
+        rmtree_ro(self.test_dir)
+
+    def create_test_repo_with_history(self):
+        """Create a test repository with some commit history."""
+        # Initialize repository
+        run_git_or_fail(["init"], cwd=self.test_dir)
+        os.rename(os.path.join(self.test_dir, ".git"), self.repo_path)
+
+        work_dir = os.path.join(self.test_dir, "work")
+        os.makedirs(work_dir)
+
+        # Create .git file pointing to our repo
+        with open(os.path.join(work_dir, ".git"), "w") as f:
+            f.write(f"gitdir: {self.repo_path}\n")
+
+        # Create some commits
+        commits = []
+        for i in range(5):
+            filename = f"file{i}.txt"
+            with open(os.path.join(work_dir, filename), "w") as f:
+                f.write(f"Content {i}\n")
+
+            run_git_or_fail(["add", filename], cwd=work_dir)
+            run_git_or_fail(
+                [
+                    "commit",
+                    "-m",
+                    f"Commit {i}",
+                    "--author",
+                    "Test Author <test@example.com>",
+                    "--date",
+                    f"2024-01-0{i + 1} 12:00:00 +0000",
+                ],
+                cwd=work_dir,
+            )
+
+            # Get the commit SHA
+            result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
+            commits.append(result.strip())
+
+        # Create a branch and merge
+        run_git_or_fail(["checkout", "-b", "feature"], cwd=work_dir)
+
+        with open(os.path.join(work_dir, "feature.txt"), "w") as f:
+            f.write("Feature content\n")
+        run_git_or_fail(["add", "feature.txt"], cwd=work_dir)
+        run_git_or_fail(
+            [
+                "commit",
+                "-m",
+                "Feature commit",
+                "--author",
+                "Test Author <test@example.com>",
+                "--date",
+                "2024-01-06 12:00:00 +0000",
+            ],
+            cwd=work_dir,
+        )
+
+        result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
+        feature_commit = result.strip()
+
+        # Merge back to master
+        run_git_or_fail(["checkout", "master"], cwd=work_dir)
+        run_git_or_fail(
+            ["merge", "feature", "--no-ff", "-m", "Merge feature"], cwd=work_dir
+        )
+
+        result = run_git_or_fail(["rev-parse", "HEAD"], cwd=work_dir)
+        merge_commit = result.strip()
+
+        commits.extend([feature_commit, merge_commit])
+        return commits, work_dir
+
+    def test_commit_graph_generation_and_reading(self):
+        """Test that dulwich can read commit graphs generated by C Git."""
+        commits, work_dir = self.create_test_repo_with_history()
+
+        # Generate commit graph with C Git
+        run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
+
+        # Verify commit graph file exists
+        graph_file = find_commit_graph_file(self.repo_path)
+        self.assertIsNotNone(graph_file, "Commit graph file should exist")
+
+        # Read with dulwich
+        commit_graph = read_commit_graph(graph_file)
+        self.assertIsNotNone(commit_graph, "Should be able to read commit graph")
+
+        # Verify we have the expected number of commits
+        self.assertGreater(len(commit_graph), 0, "Commit graph should contain commits")
+
+        # Open the repository with dulwich
+        repo = Repo(self.repo_path)
+
+        # Verify that all commits in the graph are accessible
+        for entry in commit_graph:
+            # entry.commit_id is hex ObjectID
+            commit_obj = repo.object_store[entry.commit_id]
+            self.assertIsNotNone(
+                commit_obj, f"Commit {entry.commit_id.decode()} should be accessible"
+            )
+
+            # Verify tree ID matches
+            self.assertEqual(
+                entry.tree_id,
+                commit_obj.tree,
+                f"Tree ID mismatch for commit {entry.commit_id.decode()}",
+            )
+
+            # Verify parent information
+            self.assertEqual(
+                entry.parents,
+                list(commit_obj.parents),
+                f"Parent mismatch for commit {entry.commit_id.decode()}",
+            )
+
+    def test_merge_base_with_commit_graph(self):
+        """Test that merge-base calculations work the same with and without commit graph."""
+        commits, work_dir = self.create_test_repo_with_history()
+
+        repo = Repo(self.repo_path)
+
+        # Get some commit IDs for testing
+        main_head = repo.refs[b"refs/heads/master"]
+        feature_head = repo.refs[b"refs/heads/feature"]
+
+        # Calculate merge base without commit graph
+        merge_base_no_graph = find_merge_base(repo, [main_head, feature_head])
+
+        # Generate commit graph
+        run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
+
+        # Force reload of repository to pick up commit graph
+        repo = Repo(self.repo_path)
+
+        # Calculate merge base with commit graph
+        merge_base_with_graph = find_merge_base(repo, [main_head, feature_head])
+
+        # Results should be identical
+        self.assertEqual(
+            merge_base_no_graph,
+            merge_base_with_graph,
+            "Merge base should be same with and without commit graph",
+        )
+
+        # Compare with C Git's result
+        git_result = run_git_or_fail(
+            ["merge-base", main_head.decode(), feature_head.decode()], cwd=work_dir
+        )
+        git_merge_base = [git_result.strip()]
+
+        self.assertEqual(
+            merge_base_with_graph,
+            git_merge_base,
+            "Dulwich merge base should match C Git result",
+        )
+
+    def test_fast_forward_with_commit_graph(self):
+        """Test that fast-forward detection works the same with and without commit graph."""
+        commits, work_dir = self.create_test_repo_with_history()
+
+        repo = Repo(self.repo_path)
+
+        # Test with a simple fast-forward case (older commit to newer commit)
+        commit1 = commits[1]  # Second commit
+        commit2 = commits[3]  # Fourth commit
+
+        # Check without commit graph
+        can_ff_no_graph = can_fast_forward(repo, commit1, commit2)
+
+        # Generate commit graph
+        run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
+
+        # Force reload
+        repo = Repo(self.repo_path)
+
+        # Check with commit graph
+        can_ff_with_graph = can_fast_forward(repo, commit1, commit2)
+
+        # Results should be identical
+        self.assertEqual(
+            can_ff_no_graph,
+            can_ff_with_graph,
+            "Fast-forward detection should be same with and without commit graph",
+        )
+
+        # Compare with C Git (check if commit1 is ancestor of commit2)
+        from .utils import run_git
+
+        returncode, stdout, stderr = run_git(
+            ["merge-base", "--is-ancestor", commit1.decode(), commit2.decode()],
+            cwd=work_dir,
+            capture_stdout=True,
+            capture_stderr=True,
+        )
+        git_can_ff = returncode == 0
+
+        self.assertEqual(
+            can_ff_with_graph,
+            git_can_ff,
+            "Dulwich fast-forward detection should match C Git",
+        )
+
+    def test_generation_numbers_consistency(self):
+        """Test that generation numbers are consistent with Git's topology."""
+        commits, work_dir = self.create_test_repo_with_history()
+
+        # Generate commit graph
+        run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
+
+        # Read with dulwich
+        graph_file = find_commit_graph_file(self.repo_path)
+        commit_graph = read_commit_graph(graph_file)
+
+        repo = Repo(self.repo_path)
+
+        # Build a map of commit to generation number
+        generation_map = {}
+        for entry in commit_graph:
+            generation_map[entry.commit_id] = entry.generation
+
+        # Verify generation number properties:
+        # 1. Root commits have generation 1
+        # 2. For any commit, generation > max(parent_generations)
+        for entry in commit_graph:
+            repo.object_store[entry.commit_id]
+
+            if not entry.parents:
+                # Root commit should have generation 1
+                self.assertGreaterEqual(
+                    entry.generation,
+                    1,
+                    f"Root commit {entry.commit_id.decode()} should have generation >= 1",
+                )
+            else:
+                # Non-root commit should have generation > max parent generation
+                max_parent_gen = 0
+                for parent_id in entry.parents:
+                    if parent_id in generation_map:
+                        max_parent_gen = max(max_parent_gen, generation_map[parent_id])
+
+                if max_parent_gen > 0:  # Only check if we have parent generation info
+                    self.assertGreater(
+                        entry.generation,
+                        max_parent_gen,
+                        f"Commit {entry.commit_id.decode()} generation should be > max parent generation",
+                    )
+
+    def test_commit_graph_with_different_options(self):
+        """Test commit graph generation with different C Git options."""
+        commits, work_dir = self.create_test_repo_with_history()
+
+        # Test different generation strategies
+        strategies = [
+            ["--reachable"],
+            ["--stdin-commits"],
+            ["--append"],
+        ]
+
+        for i, strategy in enumerate(strategies):
+            with self.subTest(strategy=strategy):
+                # Clean up any existing commit graph
+                graph_path = os.path.join(
+                    self.repo_path, "objects", "info", "commit-graph"
+                )
+                if os.path.exists(graph_path):
+                    try:
+                        os.remove(graph_path)
+                    except PermissionError:
+                        # On Windows, handle read-only files
+                        from .utils import remove_ro
+
+                        remove_ro(graph_path)
+
+                if strategy == ["--stdin-commits"]:
+                    # For stdin-commits, we need to provide commit IDs
+                    process_input = b"\n".join(commits[:3]) + b"\n"  # First 3 commits
+                    run_git_or_fail(
+                        ["commit-graph", "write", *strategy],
+                        cwd=work_dir,
+                        input=process_input,
+                    )
+                elif strategy == ["--append"]:
+                    # First create a base graph
+                    run_git_or_fail(
+                        ["commit-graph", "write", "--reachable"], cwd=work_dir
+                    )
+                    # Then append (this should work even if nothing new to add)
+                    run_git_or_fail(["commit-graph", "write", *strategy], cwd=work_dir)
+                else:
+                    run_git_or_fail(["commit-graph", "write", *strategy], cwd=work_dir)
+
+                # Verify dulwich can read the generated graph
+                graph_file = find_commit_graph_file(self.repo_path)
+                if graph_file:  # Some strategies might not generate a graph
+                    commit_graph = read_commit_graph(graph_file)
+                    self.assertIsNotNone(
+                        commit_graph,
+                        f"Should be able to read commit graph generated with {strategy}",
+                    )
+
+    def test_commit_graph_incremental_update(self):
+        """Test that dulwich can read incrementally updated commit graphs."""
+        commits, work_dir = self.create_test_repo_with_history()
+
+        # Create initial commit graph
+        run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
+
+        # Read initial graph
+        graph_file = find_commit_graph_file(self.repo_path)
+        initial_graph = read_commit_graph(graph_file)
+        initial_count = len(initial_graph)
+
+        # Add another commit
+        with open(os.path.join(work_dir, "new_file.txt"), "w") as f:
+            f.write("New content\n")
+        run_git_or_fail(["add", "new_file.txt"], cwd=work_dir)
+        run_git_or_fail(
+            [
+                "commit",
+                "-m",
+                "New commit",
+                "--author",
+                "Test Author <test@example.com>",
+                "--date",
+                "2024-01-08 12:00:00 +0000",
+            ],
+            cwd=work_dir,
+        )
+
+        # Update commit graph
+        run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
+
+        # Read updated graph
+        updated_graph = read_commit_graph(graph_file)
+
+        # Should have one more commit
+        self.assertEqual(
+            len(updated_graph),
+            initial_count + 1,
+            "Updated commit graph should have one more commit",
+        )
+
+        # Verify all original commits are still present
+        initial_commit_ids = {entry.commit_id for entry in initial_graph}
+        updated_commit_ids = {entry.commit_id for entry in updated_graph}
+
+        self.assertTrue(
+            initial_commit_ids.issubset(updated_commit_ids),
+            "All original commits should still be in updated graph",
+        )
+
+    def test_commit_graph_with_tags_and_refs(self):
+        """Test commit graph behavior with various refs and tags."""
+        commits, work_dir = self.create_test_repo_with_history()
+
+        # Create some tags
+        run_git_or_fail(["tag", "v1.0", commits[2].decode()], cwd=work_dir)
+        run_git_or_fail(
+            ["tag", "-a", "v2.0", commits[4].decode(), "-m", "Version 2.0"],
+            cwd=work_dir,
+        )
+
+        # Generate commit graph
+        run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=work_dir)
+
+        # Verify dulwich can read the graph
+        graph_file = find_commit_graph_file(self.repo_path)
+        commit_graph = read_commit_graph(graph_file)
+
+        repo = Repo(self.repo_path)
+
+        # Verify tagged commits are in the graph
+        tagged_commits = [commits[2], commits[4]]
+        graph_commit_ids = {entry.commit_id for entry in commit_graph}
+
+        for tagged_commit in tagged_commits:
+            self.assertIn(
+                tagged_commit,
+                graph_commit_ids,
+                f"Tagged commit {tagged_commit.decode()} should be in commit graph",
+            )
+
+        # Test merge base with tagged commits
+        merge_base = find_merge_base(repo, [commits[0], commits[4]])
+        self.assertEqual(
+            merge_base,
+            [commits[0]],
+            "Merge base calculation should work with tagged commits",
+        )
+
+    def test_empty_repository_commit_graph(self):
+        """Test commit graph behavior with empty repository."""
+        # Create empty repository
+        run_git_or_fail(["init", "--bare"], cwd=self.test_dir)
+        empty_repo_path = os.path.join(self.test_dir, ".git")
+
+        # Try to write commit graph (should succeed but create empty graph)
+        try:
+            run_git_or_fail(["commit-graph", "write", "--reachable"], cwd=self.test_dir)
+        except Exception:
+            # Some Git versions might fail on empty repos, which is fine
+            pass
+
+        # Check if commit graph file exists
+        graph_file = find_commit_graph_file(empty_repo_path)
+        if graph_file:
+            # If it exists, dulwich should be able to read it
+            commit_graph = read_commit_graph(graph_file)
+            self.assertIsNotNone(
+                commit_graph, "Should be able to read empty commit graph"
+            )
+            self.assertEqual(
+                len(commit_graph), 0, "Empty repository should have empty commit graph"
+            )

+ 21 - 5
tests/compat/utils.py

@@ -140,6 +140,19 @@ def run_git(
     env["LC_ALL"] = env["LANG"] = "C"
     env["PATH"] = os.getenv("PATH")
 
+    # Preserve Git identity environment variables if they exist, otherwise set dummy values
+    git_env_defaults = {
+        "GIT_AUTHOR_NAME": "Test User",
+        "GIT_AUTHOR_EMAIL": "test@example.com",
+        "GIT_COMMITTER_NAME": "Test User",
+        "GIT_COMMITTER_EMAIL": "test@example.com",
+    }
+
+    for git_env_var, default_value in git_env_defaults.items():
+        if git_env_var not in env:
+            # If the environment variable is not set, use the default value
+            env[git_env_var] = default_value
+
     args = [git_path, *args]
     popen_kwargs["stdin"] = subprocess.PIPE
     if capture_stdout:
@@ -274,12 +287,15 @@ class CompatTestCase(TestCase):
         return repo
 
 
-if sys.platform == "win32":
+def remove_ro(path: str) -> None:
+    """Remove a read-only file."""
+    os.chmod(path, stat.S_IWRITE)
+    os.remove(path)
 
-    def remove_ro(action, name, exc) -> None:
-        os.chmod(name, stat.S_IWRITE)
-        os.remove(name)
 
-    rmtree_ro = functools.partial(shutil.rmtree, onerror=remove_ro)
+if sys.platform == "win32":
+    rmtree_ro = functools.partial(
+        shutil.rmtree, onerror=lambda action, name, exc: remove_ro(name)
+    )
 else:
     rmtree_ro = shutil.rmtree

+ 809 - 0
tests/test_commit_graph.py

@@ -0,0 +1,809 @@
+# test_commit_graph.py -- Tests for commit graph functionality
+# Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+
+"""Tests for Git commit graph functionality."""
+
+import io
+import os
+import struct
+import tempfile
+import unittest
+
+from dulwich.commit_graph import (
+    CHUNK_COMMIT_DATA,
+    CHUNK_OID_FANOUT,
+    CHUNK_OID_LOOKUP,
+    COMMIT_GRAPH_SIGNATURE,
+    COMMIT_GRAPH_VERSION,
+    HASH_VERSION_SHA1,
+    CommitGraph,
+    CommitGraphChunk,
+    CommitGraphEntry,
+    find_commit_graph_file,
+    generate_commit_graph,
+    get_reachable_commits,
+    read_commit_graph,
+)
+
+
+class CommitGraphEntryTests(unittest.TestCase):
+    """Tests for CommitGraphEntry."""
+
+    def test_init(self):
+        commit_id = b"a" * 40
+        tree_id = b"b" * 40
+        parents = [b"c" * 40, b"d" * 40]
+        generation = 42
+        commit_time = 1234567890
+
+        entry = CommitGraphEntry(commit_id, tree_id, parents, generation, commit_time)
+
+        self.assertEqual(entry.commit_id, commit_id)
+        self.assertEqual(entry.tree_id, tree_id)
+        self.assertEqual(entry.parents, parents)
+        self.assertEqual(entry.generation, generation)
+        self.assertEqual(entry.commit_time, commit_time)
+
+    def test_repr(self):
+        entry = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
+        repr_str = repr(entry)
+        self.assertIn("CommitGraphEntry", repr_str)
+        self.assertIn("generation=1", repr_str)
+
+
+class CommitGraphChunkTests(unittest.TestCase):
+    """Tests for CommitGraphChunk."""
+
+    def test_init(self):
+        chunk = CommitGraphChunk(b"TEST", b"test data")
+        self.assertEqual(chunk.chunk_id, b"TEST")
+        self.assertEqual(chunk.data, b"test data")
+
+    def test_repr(self):
+        chunk = CommitGraphChunk(b"TEST", b"x" * 100)
+        repr_str = repr(chunk)
+        self.assertIn("CommitGraphChunk", repr_str)
+        self.assertIn("size=100", repr_str)
+
+
+class CommitGraphTests(unittest.TestCase):
+    """Tests for CommitGraph."""
+
+    def test_init(self):
+        graph = CommitGraph()
+        self.assertEqual(graph.hash_version, HASH_VERSION_SHA1)
+        self.assertEqual(len(graph.entries), 0)
+        self.assertEqual(len(graph.chunks), 0)
+
+    def test_len(self):
+        graph = CommitGraph()
+        self.assertEqual(len(graph), 0)
+
+        # Add a dummy entry
+        entry = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
+        graph.entries.append(entry)
+        self.assertEqual(len(graph), 1)
+
+    def test_iter(self):
+        graph = CommitGraph()
+        entry1 = CommitGraphEntry(b"a" * 40, b"b" * 40, [], 1, 1000)
+        entry2 = CommitGraphEntry(b"c" * 40, b"d" * 40, [], 2, 2000)
+        graph.entries.extend([entry1, entry2])
+
+        entries = list(graph)
+        self.assertEqual(len(entries), 2)
+        self.assertEqual(entries[0], entry1)
+        self.assertEqual(entries[1], entry2)
+
+    def test_get_entry_by_oid_missing(self):
+        graph = CommitGraph()
+        result = graph.get_entry_by_oid(b"f" * 40)
+        self.assertIsNone(result)
+
+    def test_get_generation_number_missing(self):
+        graph = CommitGraph()
+        result = graph.get_generation_number(b"f" * 40)
+        self.assertIsNone(result)
+
+    def test_get_parents_missing(self):
+        graph = CommitGraph()
+        result = graph.get_parents(b"f" * 40)
+        self.assertIsNone(result)
+
+    def test_from_invalid_signature(self):
+        data = b"XXXX" + b"\\x00" * 100
+        f = io.BytesIO(data)
+
+        with self.assertRaises(ValueError) as cm:
+            CommitGraph.from_file(f)
+        self.assertIn("Invalid commit graph signature", str(cm.exception))
+
+    def test_from_invalid_version(self):
+        data = COMMIT_GRAPH_SIGNATURE + struct.pack(">B", 99) + b"\\x00" * 100
+        f = io.BytesIO(data)
+
+        with self.assertRaises(ValueError) as cm:
+            CommitGraph.from_file(f)
+        self.assertIn("Unsupported commit graph version", str(cm.exception))
+
+    def test_from_invalid_hash_version(self):
+        data = (
+            COMMIT_GRAPH_SIGNATURE
+            + struct.pack(">B", COMMIT_GRAPH_VERSION)
+            + struct.pack(">B", 99)  # Invalid hash version
+            + b"\\x00" * 100
+        )
+        f = io.BytesIO(data)
+
+        with self.assertRaises(ValueError) as cm:
+            CommitGraph.from_file(f)
+        self.assertIn("Unsupported hash version", str(cm.exception))
+
+    def create_minimal_commit_graph_data(self):
+        """Create minimal valid commit graph data for testing."""
+        # Create the data in order and calculate offsets properly
+
+        # Header: signature + version + hash_version + num_chunks + base_graph_count
+        header = (
+            COMMIT_GRAPH_SIGNATURE
+            + struct.pack(">B", COMMIT_GRAPH_VERSION)
+            + struct.pack(">B", HASH_VERSION_SHA1)
+            + struct.pack(">B", 3)  # 3 chunks
+            + struct.pack(">B", 0)
+        )  # 0 base graphs
+
+        # Table of contents: 4 entries (3 chunks + terminator) = 4 * 12 = 48 bytes
+        toc_size = 4 * 12
+
+        # Calculate chunk offsets from start of file
+        header_size = 8
+        chunk1_offset = header_size + toc_size  # OID Fanout
+        chunk2_offset = chunk1_offset + 256 * 4  # OID Lookup (after fanout)
+        chunk3_offset = chunk2_offset + 20  # Commit Data (after 1 commit * 20 bytes)
+        terminator_offset = (
+            chunk3_offset + 36
+        )  # After commit data (1 commit * 36 bytes)
+
+        # Build table of contents
+        toc = (
+            CHUNK_OID_FANOUT
+            + struct.pack(">Q", chunk1_offset)
+            + CHUNK_OID_LOOKUP
+            + struct.pack(">Q", chunk2_offset)
+            + CHUNK_COMMIT_DATA
+            + struct.pack(">Q", chunk3_offset)
+            + b"\x00\x00\x00\x00"
+            + struct.pack(">Q", terminator_offset)
+        )
+
+        # OID Fanout chunk (256 * 4 bytes)
+        fanout = b""
+        for i in range(256):
+            if i < 0xAA:  # Our test commit starts with 0xaa
+                fanout += struct.pack(">L", 0)
+            else:
+                fanout += struct.pack(">L", 1)  # 1 commit total
+
+        # OID Lookup chunk (1 commit = 20 bytes)
+        commit_oid = b"\xaa" + b"\x00" * 19
+        oid_lookup = commit_oid
+
+        # Commit Data chunk (1 commit = 20 + 16 = 36 bytes)
+        tree_oid = b"\xbb" + b"\x00" * 19
+        parent1_pos = 0x70000000  # GRAPH_PARENT_MISSING
+        parent2_pos = 0x70000000  # GRAPH_PARENT_MISSING
+        generation = 1
+        commit_time = 1234567890
+        gen_and_time = (generation << 2) | (commit_time >> 32)
+        commit_data = (
+            tree_oid
+            + struct.pack(">LL", parent1_pos, parent2_pos)
+            + struct.pack(">LL", gen_and_time, commit_time & 0xFFFFFFFF)
+        )
+
+        return header + toc + fanout + oid_lookup + commit_data
+
+    def test_from_minimal_valid_file(self):
+        """Test parsing a minimal but valid commit graph file."""
+        data = self.create_minimal_commit_graph_data()
+        f = io.BytesIO(data)
+
+        graph = CommitGraph.from_file(f)
+
+        self.assertEqual(graph.hash_version, HASH_VERSION_SHA1)
+        self.assertEqual(len(graph), 1)
+
+        # Check the parsed entry
+        entry = graph.entries[0]
+        self.assertEqual(entry.commit_id, b"aa" + b"00" * 19)
+        self.assertEqual(entry.tree_id, b"bb" + b"00" * 19)
+        self.assertEqual(entry.parents, [])  # No parents
+        self.assertEqual(entry.generation, 1)
+        self.assertEqual(entry.commit_time, 1234567890)
+
+        # Test lookup methods
+        commit_oid = b"aa" + b"00" * 19
+        self.assertEqual(graph.get_generation_number(commit_oid), 1)
+        self.assertEqual(graph.get_parents(commit_oid), [])
+        self.assertIsNotNone(graph.get_entry_by_oid(commit_oid))
+
+    def test_missing_required_chunks(self):
+        """Test error handling for missing required chunks."""
+        # Create data with header but no chunks
+        header = (
+            COMMIT_GRAPH_SIGNATURE
+            + struct.pack(">B", COMMIT_GRAPH_VERSION)
+            + struct.pack(">B", HASH_VERSION_SHA1)
+            + struct.pack(">B", 0)  # 0 chunks
+            + struct.pack(">B", 0)
+        )
+
+        # TOC with just terminator
+        toc = b"\\x00\\x00\\x00\\x00" + struct.pack(">Q", 12)
+
+        data = header + toc
+        f = io.BytesIO(data)
+
+        with self.assertRaises(ValueError) as cm:
+            CommitGraph.from_file(f)
+        self.assertIn("Missing required OID lookup chunk", str(cm.exception))
+
+    def test_write_empty_graph_raises(self):
+        """Test that writing empty graph raises ValueError."""
+        graph = CommitGraph()
+        f = io.BytesIO()
+
+        with self.assertRaises(ValueError):
+            graph.write_to_file(f)
+
+    def test_write_and_read_round_trip(self):
+        """Test writing and reading a commit graph."""
+        # Create a simple commit graph
+        graph = CommitGraph()
+        entry = CommitGraphEntry(
+            commit_id=b"aa" + b"00" * 19,
+            tree_id=b"bb" + b"00" * 19,
+            parents=[],
+            generation=1,
+            commit_time=1234567890,
+        )
+        graph.entries.append(entry)
+        graph._oid_to_index = {bytes.fromhex(entry.commit_id.decode()): 0}
+
+        # Write to bytes
+        f = io.BytesIO()
+        graph.write_to_file(f)
+
+        # Read back
+        f.seek(0)
+        read_graph = CommitGraph.from_file(f)
+
+        # Verify
+        self.assertEqual(len(read_graph), 1)
+        read_entry = read_graph.entries[0]
+        self.assertEqual(read_entry.commit_id, entry.commit_id)
+        self.assertEqual(read_entry.tree_id, entry.tree_id)
+        self.assertEqual(read_entry.parents, entry.parents)
+        self.assertEqual(read_entry.generation, entry.generation)
+        self.assertEqual(read_entry.commit_time, entry.commit_time)
+
+
+class CommitGraphFileOperationsTests(unittest.TestCase):
+    """Tests for commit graph file operations."""
+
+    def setUp(self):
+        self.tempdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        import shutil
+
+        shutil.rmtree(self.tempdir, ignore_errors=True)
+
+    def test_read_commit_graph_missing_file(self):
+        """Test reading from non-existent file."""
+        missing_path = os.path.join(self.tempdir, "missing.graph")
+        result = read_commit_graph(missing_path)
+        self.assertIsNone(result)
+
+    def test_read_commit_graph_invalid_file(self):
+        """Test reading from invalid file."""
+        invalid_path = os.path.join(self.tempdir, "invalid.graph")
+        with open(invalid_path, "wb") as f:
+            f.write(b"invalid data")
+
+        with self.assertRaises(ValueError):
+            read_commit_graph(invalid_path)
+
+    def test_find_commit_graph_file_missing(self):
+        """Test finding commit graph file when it doesn't exist."""
+        result = find_commit_graph_file(self.tempdir)
+        self.assertIsNone(result)
+
+    def test_find_commit_graph_file_standard_location(self):
+        """Test finding commit graph file in standard location."""
+        # Create .git/objects/info/commit-graph
+        objects_dir = os.path.join(self.tempdir, "objects")
+        info_dir = os.path.join(objects_dir, "info")
+        os.makedirs(info_dir)
+
+        graph_path = os.path.join(info_dir, "commit-graph")
+        with open(graph_path, "wb") as f:
+            f.write(b"dummy")
+
+        result = find_commit_graph_file(self.tempdir)
+        self.assertEqual(result, graph_path.encode())
+
+    def test_find_commit_graph_file_chain_location(self):
+        """Test finding commit graph file in chain location."""
+        # Create .git/objects/info/commit-graphs/graph-{hash}.graph
+        objects_dir = os.path.join(self.tempdir, "objects")
+        info_dir = os.path.join(objects_dir, "info")
+        graphs_dir = os.path.join(info_dir, "commit-graphs")
+        os.makedirs(graphs_dir)
+
+        graph_path = os.path.join(graphs_dir, "graph-abc123.graph")
+        with open(graph_path, "wb") as f:
+            f.write(b"dummy")
+
+        result = find_commit_graph_file(self.tempdir)
+        self.assertEqual(result, graph_path.encode())
+
+    def test_find_commit_graph_file_prefers_standard(self):
+        """Test that standard location is preferred over chain location."""
+        # Create both locations
+        objects_dir = os.path.join(self.tempdir, "objects")
+        info_dir = os.path.join(objects_dir, "info")
+        graphs_dir = os.path.join(info_dir, "commit-graphs")
+        os.makedirs(info_dir)
+        os.makedirs(graphs_dir)
+
+        # Standard location
+        standard_path = os.path.join(info_dir, "commit-graph")
+        with open(standard_path, "wb") as f:
+            f.write(b"standard")
+
+        # Chain location
+        chain_path = os.path.join(graphs_dir, "graph-abc123.graph")
+        with open(chain_path, "wb") as f:
+            f.write(b"chain")
+
+        result = find_commit_graph_file(self.tempdir)
+        self.assertEqual(result, standard_path.encode())
+
+
+class CommitGraphGenerationTests(unittest.TestCase):
+    """Tests for commit graph generation functionality."""
+
+    def setUp(self):
+        self.tempdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        import shutil
+
+        shutil.rmtree(self.tempdir, ignore_errors=True)
+
+    def test_generate_commit_graph_empty(self):
+        """Test generating commit graph with no commits."""
+        from dulwich.object_store import MemoryObjectStore
+
+        object_store = MemoryObjectStore()
+        graph = generate_commit_graph(object_store, [])
+
+        self.assertEqual(len(graph), 0)
+
+    def test_generate_commit_graph_single_commit(self):
+        """Test generating commit graph with single commit."""
+        from dulwich.object_store import MemoryObjectStore
+        from dulwich.objects import Commit, Tree
+
+        object_store = MemoryObjectStore()
+
+        # Create a tree and commit
+        tree = Tree()
+        object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.author = b"Test Author <test@example.com>"
+        commit.committer = b"Test Author <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+        object_store.add_object(commit)
+
+        # Generate graph
+        graph = generate_commit_graph(object_store, [commit.id])
+
+        self.assertEqual(len(graph), 1)
+        entry = graph.entries[0]
+        self.assertEqual(entry.commit_id, commit.id)
+        self.assertEqual(entry.tree_id, commit.tree)
+        self.assertEqual(entry.parents, [])
+        self.assertEqual(entry.generation, 1)
+        self.assertEqual(entry.commit_time, 1234567890)
+
+    def test_get_reachable_commits(self):
+        """Test getting reachable commits."""
+        from dulwich.object_store import MemoryObjectStore
+        from dulwich.objects import Commit, Tree
+
+        object_store = MemoryObjectStore()
+
+        # Create tree
+        tree = Tree()
+        object_store.add_object(tree)
+
+        # Create commit chain: commit1 -> commit2
+        commit1 = Commit()
+        commit1.tree = tree.id
+        commit1.author = commit1.committer = b"Test <test@example.com>"
+        commit1.commit_time = commit1.author_time = 1234567890
+        commit1.commit_timezone = commit1.author_timezone = 0
+        commit1.message = b"First commit"
+        object_store.add_object(commit1)
+
+        commit2 = Commit()
+        commit2.tree = tree.id
+        commit2.parents = [commit1.id]
+        commit2.author = commit2.committer = b"Test <test@example.com>"
+        commit2.commit_time = commit2.author_time = 1234567891
+        commit2.commit_timezone = commit2.author_timezone = 0
+        commit2.message = b"Second commit"
+        object_store.add_object(commit2)
+
+        # Get reachable commits from commit2
+        reachable = get_reachable_commits(object_store, [commit2.id])
+
+        # Should include both commits
+        self.assertEqual(len(reachable), 2)
+        self.assertIn(commit1.id, reachable)
+        self.assertIn(commit2.id, reachable)
+
+    def test_write_commit_graph_to_file(self):
+        """Test writing commit graph to file."""
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Commit, Tree
+
+        # Create a disk object store
+        object_store_path = os.path.join(self.tempdir, "objects")
+        os.makedirs(object_store_path, exist_ok=True)
+        object_store = DiskObjectStore(object_store_path)
+
+        # Create a tree and commit
+        tree = Tree()
+        object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.author = b"Test Author <test@example.com>"
+        commit.committer = b"Test Author <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+        object_store.add_object(commit)
+
+        # Write commit graph using ObjectStore method
+        object_store.write_commit_graph([commit.id], reachable=False)
+
+        # Verify file was created
+        graph_path = os.path.join(object_store_path, "info", "commit-graph")
+        self.assertTrue(os.path.exists(graph_path))
+
+        # Read back and verify
+        graph = read_commit_graph(graph_path)
+        self.assertIsNotNone(graph)
+        self.assertEqual(len(graph), 1)
+
+        entry = graph.entries[0]
+        self.assertEqual(entry.commit_id, commit.id)
+        self.assertEqual(entry.tree_id, commit.tree)
+
+    def test_object_store_commit_graph_methods(self):
+        """Test ObjectStore commit graph methods."""
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Commit, Tree
+
+        # Create a disk object store
+        object_store_path = os.path.join(self.tempdir, "objects")
+        os.makedirs(object_store_path, exist_ok=True)
+        object_store = DiskObjectStore(object_store_path)
+
+        # Initially no commit graph
+        self.assertIsNone(object_store.get_commit_graph())
+
+        # Create a tree and commit
+        tree = Tree()
+        object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.author = b"Test Author <test@example.com>"
+        commit.committer = b"Test Author <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+        object_store.add_object(commit)
+
+        # Write commit graph (disable reachable to avoid traversal issue)
+        object_store.write_commit_graph([commit.id], reachable=False)
+
+        # Now should have commit graph
+        self.assertIsNotNone(object_store.get_commit_graph())
+
+        # Test update (should still have commit graph)
+        object_store.write_commit_graph()
+        self.assertIsNot(None, object_store.get_commit_graph())
+
+    def test_parents_provider_commit_graph_integration(self):
+        """Test that ParentsProvider uses commit graph when available."""
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Commit, Tree
+        from dulwich.repo import ParentsProvider
+
+        # Create a disk object store
+        object_store_path = os.path.join(self.tempdir, "objects")
+        os.makedirs(object_store_path, exist_ok=True)
+        object_store = DiskObjectStore(object_store_path)
+
+        # Create a tree and two commits
+        tree = Tree()
+        object_store.add_object(tree)
+
+        # First commit (no parents)
+        commit1 = Commit()
+        commit1.tree = tree.id
+        commit1.author = commit1.committer = b"Test <test@example.com>"
+        commit1.commit_time = commit1.author_time = 1234567890
+        commit1.commit_timezone = commit1.author_timezone = 0
+        commit1.message = b"First commit"
+        object_store.add_object(commit1)
+
+        # Second commit (child of first)
+        commit2 = Commit()
+        commit2.tree = tree.id
+        commit2.parents = [commit1.id]
+        commit2.author = commit2.committer = b"Test <test@example.com>"
+        commit2.commit_time = commit2.author_time = 1234567891
+        commit2.commit_timezone = commit2.author_timezone = 0
+        commit2.message = b"Second commit"
+        object_store.add_object(commit2)
+
+        # Write commit graph
+        object_store.write_commit_graph([commit1.id, commit2.id], reachable=False)
+
+        # Test ParentsProvider with commit graph
+        provider = ParentsProvider(object_store)
+
+        # Verify commit graph is loaded
+        self.assertIsNotNone(provider.commit_graph)
+
+        # Test parent lookups
+        parents1 = provider.get_parents(commit1.id)
+        self.assertEqual(parents1, [])
+
+        parents2 = provider.get_parents(commit2.id)
+        self.assertEqual(parents2, [commit1.id])
+
+        # Test fallback behavior by creating provider without commit graph
+        object_store_no_graph_path = os.path.join(self.tempdir, "objects2")
+        os.makedirs(object_store_no_graph_path, exist_ok=True)
+        object_store_no_graph = DiskObjectStore(object_store_no_graph_path)
+        object_store_no_graph.add_object(tree)
+        object_store_no_graph.add_object(commit1)
+        object_store_no_graph.add_object(commit2)
+
+        provider_no_graph = ParentsProvider(object_store_no_graph)
+        self.assertIsNone(provider_no_graph.commit_graph)
+
+        # Should still work via commit object fallback
+        parents1_fallback = provider_no_graph.get_parents(commit1.id)
+        self.assertEqual(parents1_fallback, [])
+
+        parents2_fallback = provider_no_graph.get_parents(commit2.id)
+        self.assertEqual(parents2_fallback, [commit1.id])
+
+    def test_graph_operations_use_commit_graph(self):
+        """Test that graph operations use commit graph when available."""
+        from dulwich.graph import can_fast_forward, find_merge_base
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Commit, Tree
+        from dulwich.repo import Repo
+
+        # Create a disk object store
+        object_store_path = os.path.join(self.tempdir, "objects")
+        os.makedirs(object_store_path, exist_ok=True)
+        object_store = DiskObjectStore(object_store_path)
+
+        # Create a tree and a more complex commit graph for testing
+        tree = Tree()
+        object_store.add_object(tree)
+
+        # Create commit chain: commit1 -> commit2 -> commit3
+        #                               \-> commit4 -> commit5 (merge)
+        commit1 = Commit()
+        commit1.tree = tree.id
+        commit1.author = commit1.committer = b"Test <test@example.com>"
+        commit1.commit_time = commit1.author_time = 1234567890
+        commit1.commit_timezone = commit1.author_timezone = 0
+        commit1.message = b"First commit"
+        object_store.add_object(commit1)
+
+        commit2 = Commit()
+        commit2.tree = tree.id
+        commit2.parents = [commit1.id]
+        commit2.author = commit2.committer = b"Test <test@example.com>"
+        commit2.commit_time = commit2.author_time = 1234567891
+        commit2.commit_timezone = commit2.author_timezone = 0
+        commit2.message = b"Second commit"
+        object_store.add_object(commit2)
+
+        commit3 = Commit()
+        commit3.tree = tree.id
+        commit3.parents = [commit2.id]
+        commit3.author = commit3.committer = b"Test <test@example.com>"
+        commit3.commit_time = commit3.author_time = 1234567892
+        commit3.commit_timezone = commit3.author_timezone = 0
+        commit3.message = b"Third commit"
+        object_store.add_object(commit3)
+
+        # Branch from commit2
+        commit4 = Commit()
+        commit4.tree = tree.id
+        commit4.parents = [commit2.id]
+        commit4.author = commit4.committer = b"Test <test@example.com>"
+        commit4.commit_time = commit4.author_time = 1234567893
+        commit4.commit_timezone = commit4.author_timezone = 0
+        commit4.message = b"Fourth commit (branch)"
+        object_store.add_object(commit4)
+
+        # Merge commit
+        commit5 = Commit()
+        commit5.tree = tree.id
+        commit5.parents = [commit3.id, commit4.id]
+        commit5.author = commit5.committer = b"Test <test@example.com>"
+        commit5.commit_time = commit5.author_time = 1234567894
+        commit5.commit_timezone = commit5.author_timezone = 0
+        commit5.message = b"Merge commit"
+        object_store.add_object(commit5)
+
+        # Create refs
+        refs_path = os.path.join(self.tempdir, "refs")
+        os.makedirs(refs_path, exist_ok=True)
+        repo_path = self.tempdir
+        repo = Repo.init(repo_path)
+        repo.object_store = object_store
+
+        # Test graph operations WITHOUT commit graph first
+        merge_base_no_graph = find_merge_base(repo, [commit3.id, commit4.id])
+        can_ff_no_graph = can_fast_forward(repo, commit1.id, commit3.id)
+
+        # Now write commit graph
+        object_store.write_commit_graph(
+            [commit1.id, commit2.id, commit3.id, commit4.id, commit5.id],
+            reachable=False,
+        )
+
+        # Verify commit graph is loaded by creating new repo instance
+        repo2 = Repo(repo_path)
+        repo2.object_store = object_store
+
+        # Verify commit graph is available
+        commit_graph = repo2.object_store.get_commit_graph()
+        self.assertIsNotNone(commit_graph)
+
+        # Test graph operations WITH commit graph
+        merge_base_with_graph = find_merge_base(repo2, [commit3.id, commit4.id])
+        can_ff_with_graph = can_fast_forward(repo2, commit1.id, commit3.id)
+
+        # Results should be identical
+        self.assertEqual(
+            merge_base_no_graph,
+            merge_base_with_graph,
+            "Merge base should be same with and without commit graph",
+        )
+        self.assertEqual(
+            can_ff_no_graph,
+            can_ff_with_graph,
+            "Fast-forward detection should be same with and without commit graph",
+        )
+
+        # Expected results
+        self.assertEqual(
+            merge_base_with_graph,
+            [commit2.id],
+            "Merge base of commit3 and commit4 should be commit2",
+        )
+        self.assertTrue(
+            can_ff_with_graph, "Should be able to fast-forward from commit1 to commit3"
+        )
+
+        # Test that ParentsProvider in the repo uses commit graph
+        parents_provider = repo2.parents_provider()
+        self.assertIsNotNone(
+            parents_provider.commit_graph,
+            "Repository's parents provider should have commit graph",
+        )
+
+        # Verify parent lookups work through the provider
+        self.assertEqual(parents_provider.get_parents(commit1.id), [])
+        self.assertEqual(parents_provider.get_parents(commit2.id), [commit1.id])
+        self.assertEqual(
+            parents_provider.get_parents(commit5.id), [commit3.id, commit4.id]
+        )
+
+    def test_performance_with_commit_graph(self):
+        """Test that using commit graph provides performance benefits."""
+        from dulwich.graph import find_merge_base
+        from dulwich.object_store import DiskObjectStore
+        from dulwich.objects import Commit, Tree
+        from dulwich.repo import Repo
+
+        # Create a larger commit history to better measure performance
+        object_store_path = os.path.join(self.tempdir, "objects")
+        os.makedirs(object_store_path, exist_ok=True)
+        object_store = DiskObjectStore(object_store_path)
+
+        tree = Tree()
+        object_store.add_object(tree)
+
+        # Create a chain of 20 commits
+        commits = []
+        for i in range(20):
+            commit = Commit()
+            commit.tree = tree.id
+            if i > 0:
+                commit.parents = [commits[i - 1].id]
+            commit.author = commit.committer = b"Test <test@example.com>"
+            commit.commit_time = commit.author_time = 1234567890 + i
+            commit.commit_timezone = commit.author_timezone = 0
+            commit.message = f"Commit {i}".encode()
+            object_store.add_object(commit)
+            commits.append(commit)
+
+        # Create repository
+        repo_path = self.tempdir
+        repo = Repo.init(repo_path)
+        repo.object_store = object_store
+
+        # Time operations without commit graph
+        for _ in range(10):  # Run multiple times for better measurement
+            find_merge_base(repo, [commits[0].id, commits[-1].id])
+
+        # Write commit graph
+        object_store.write_commit_graph([c.id for c in commits], reachable=False)
+
+        # Create new repo instance to pick up commit graph
+        repo2 = Repo(repo_path)
+        repo2.object_store = object_store
+
+        # Verify commit graph is loaded
+        self.assertIsNotNone(repo2.object_store.get_commit_graph())
+
+        # Time operations with commit graph
+        for _ in range(10):  # Run multiple times for better measurement
+            find_merge_base(repo2, [commits[0].id, commits[-1].id])
+
+        # With commit graph should be at least as fast (usually faster)
+        # We don't assert a specific speedup since it depends on the machine
+        # But we verify both approaches give the same result
+        result_no_graph = find_merge_base(repo, [commits[0].id, commits[-1].id])
+        result_with_graph = find_merge_base(repo2, [commits[0].id, commits[-1].id])
+
+        self.assertEqual(
+            result_no_graph,
+            result_with_graph,
+            "Results should be identical with and without commit graph",
+        )
+        self.assertEqual(
+            result_with_graph, [commits[0].id], "Merge base should be the first commit"
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()