Browse Source

Improve bundle handling (#1685)

* Add ``BundleClient`` for interacting with bundles like a Git client
* Add ``create_bundle_from_repo`` for easily creating bundles

Fixes #1246
Jelmer Vernooij 1 month ago
parent
commit
cd56b79c51
7 changed files with 927 additions and 15 deletions
  1. 3 0
      NEWS
  2. 117 5
      dulwich/bundle.py
  3. 244 1
      dulwich/client.py
  4. 1 0
      tests/compat/__init__.py
  5. 285 0
      tests/compat/test_bundle.py
  6. 143 8
      tests/test_bundle.py
  7. 134 1
      tests/test_client.py

+ 3 - 0
NEWS

@@ -5,6 +5,9 @@
 
  * Add ``format-patch`` command in porcelain. (Jelmer Vernooij)
 
+ * Add functions for creating bundles and ``BundleClient``
+   for interacting with bundles. (Jelmer Vernooij, #1246)
+
  * Add support for ``core.commitGraph`` configuration setting to control
    whether commit-graph files are used for performance optimization.
    (Jelmer Vernooij)

+ 117 - 5
dulwich/bundle.py

@@ -21,7 +21,7 @@
 
 """Bundle format support."""
 
-from typing import BinaryIO, Optional
+from typing import BinaryIO, Callable, Optional
 
 from .pack import PackData, write_pack_data
 
@@ -30,7 +30,7 @@ class Bundle:
     version: Optional[int]
 
     capabilities: dict[str, Optional[str]]
-    prerequisites: list[tuple[bytes, str]]
+    prerequisites: list[tuple[bytes, bytes]]
     references: dict[bytes, bytes]
     pack_data: PackData
 
@@ -76,7 +76,7 @@ def _read_bundle(f: BinaryIO, version: int) -> Bundle:
             line = f.readline()
     while line.startswith(b"-"):
         (obj_id, comment) = line[1:].rstrip(b"\n").split(b" ", 1)
-        prerequisites.append((obj_id, comment.decode("utf-8")))
+        prerequisites.append((obj_id, comment))
         line = f.readline()
     while line != b"\n":
         (obj_id, ref) = line.rstrip(b"\n").split(b" ", 1)
@@ -122,12 +122,124 @@ def write_bundle(f: BinaryIO, bundle: Bundle) -> None:
                 f.write(b"=" + value.encode("utf-8"))
             f.write(b"\n")
     for obj_id, comment in bundle.prerequisites:
-        f.write(b"-%s %s\n" % (obj_id, comment.encode("utf-8")))
+        f.write(b"-" + obj_id + b" " + comment + b"\n")
     for ref, obj_id in bundle.references.items():
-        f.write(b"%s %s\n" % (obj_id, ref))
+        f.write(obj_id + b" " + ref + b"\n")
     f.write(b"\n")
     write_pack_data(
         f.write,
         num_records=len(bundle.pack_data),
         records=bundle.pack_data.iter_unpacked(),
     )
+
+
+def create_bundle_from_repo(
+    repo,
+    refs: Optional[list[bytes]] = None,
+    prerequisites: Optional[list[bytes]] = None,
+    version: Optional[int] = None,
+    capabilities: Optional[dict[str, Optional[str]]] = None,
+    progress: Optional[Callable[[str], None]] = None,
+) -> Bundle:
+    """Create a bundle from a repository.
+
+    Args:
+        repo: Repository object to create bundle from
+        refs: List of refs to include (defaults to all refs)
+        prerequisites: List of commit SHAs that are prerequisites
+        version: Bundle version (2 or 3, auto-detected if None)
+        capabilities: Bundle capabilities (for v3 bundles)
+        progress: Optional progress reporting function
+
+    Returns:
+        Bundle object ready for writing
+    """
+    if refs is None:
+        refs = list(repo.refs.keys())
+
+    if prerequisites is None:
+        prerequisites = []
+
+    if capabilities is None:
+        capabilities = {}
+
+    # Build the references dictionary for the bundle
+    bundle_refs = {}
+    want_objects = []
+
+    for ref in refs:
+        if ref in repo.refs:
+            ref_value = repo.refs[ref]
+            # Handle peeled refs
+            try:
+                peeled_value = repo.refs.get_peeled(ref)
+                if peeled_value is not None and peeled_value != ref_value:
+                    bundle_refs[ref] = peeled_value
+                else:
+                    bundle_refs[ref] = ref_value
+            except KeyError:
+                bundle_refs[ref] = ref_value
+            want_objects.append(bundle_refs[ref])
+
+    # Convert prerequisites to proper format
+    bundle_prerequisites = []
+    have_objects = []
+    for prereq in prerequisites:
+        if isinstance(prereq, str):
+            prereq = prereq.encode("utf-8")
+        if isinstance(prereq, bytes):
+            if len(prereq) == 40:  # SHA1 hex string
+                try:
+                    # Validate it's actually hex
+                    bytes.fromhex(prereq.decode("utf-8"))
+                    # Store hex in bundle and for pack generation
+                    bundle_prerequisites.append((prereq, b""))
+                    have_objects.append(prereq)
+                except ValueError:
+                    # Not a valid hex string, invalid prerequisite
+                    raise ValueError(f"Invalid prerequisite format: {prereq!r}")
+            elif len(prereq) == 20:
+                # Binary SHA, convert to hex for both bundle and pack generation
+                hex_prereq = prereq.hex().encode("ascii")
+                bundle_prerequisites.append((hex_prereq, b""))
+                have_objects.append(hex_prereq)
+            else:
+                # Invalid length
+                raise ValueError(f"Invalid prerequisite SHA length: {len(prereq)}")
+        else:
+            # Assume it's already a binary SHA
+            hex_prereq = prereq.hex().encode("ascii")
+            bundle_prerequisites.append((hex_prereq, b""))
+            have_objects.append(hex_prereq)
+
+    # Generate pack data containing all objects needed for the refs
+    pack_count, pack_objects = repo.generate_pack_data(
+        have=have_objects,
+        want=want_objects,
+        progress=progress,
+    )
+
+    # Store the pack objects directly, we'll write them when saving the bundle
+    # For now, create a simple wrapper to hold the data
+    class _BundlePackData:
+        def __init__(self, count, objects):
+            self._count = count
+            self._objects = list(objects)  # Materialize the iterator
+
+        def __len__(self):
+            return self._count
+
+        def iter_unpacked(self):
+            return iter(self._objects)
+
+    pack_data = _BundlePackData(pack_count, pack_objects)
+
+    # Create bundle object
+    bundle = Bundle()
+    bundle.version = version
+    bundle.capabilities = capabilities
+    bundle.prerequisites = bundle_prerequisites
+    bundle.references = bundle_refs
+    bundle.pack_data = pack_data  # type: ignore[assignment]
+
+    return bundle

+ 244 - 1
dulwich/client.py

@@ -72,6 +72,7 @@ from .errors import GitProtocolError, NotGitRepository, SendPackError
 from .pack import (
     PACK_SPOOL_FILE_MAX_SIZE,
     PackChunkGenerator,
+    PackData,
     UnpackedObject,
     write_pack_from_container,
 )
@@ -1946,6 +1947,243 @@ class LocalGitClient(GitClient):
             return LsRemoteResult(refs, symrefs)
 
 
+class BundleClient(GitClient):
+    """Git Client that reads from a bundle file."""
+
+    def __init__(
+        self,
+        thin_packs: bool = True,
+        report_activity=None,
+        config: Optional[Config] = None,
+    ) -> None:
+        """Create a new BundleClient instance.
+
+        Args:
+          thin_packs: Whether or not thin packs should be retrieved
+          report_activity: Optional callback for reporting transport
+            activity.
+        """
+        self._report_activity = report_activity
+
+    def get_url(self, path):
+        return path
+
+    @classmethod
+    def from_parsedurl(cls, parsedurl, **kwargs):
+        return cls(**kwargs)
+
+    @classmethod
+    def _is_bundle_file(cls, path):
+        """Check if a file is a git bundle by reading the first line."""
+        try:
+            with open(path, "rb") as f:
+                first_line = f.readline()
+                return first_line in (b"# v2 git bundle\n", b"# v3 git bundle\n")
+        except OSError:
+            return False
+
+    @classmethod
+    def _open_bundle(cls, path):
+        if not isinstance(path, str):
+            path = os.fsdecode(path)
+        # Read bundle metadata without PackData to avoid file handle issues
+        with open(path, "rb") as f:
+            from dulwich.bundle import Bundle
+
+            version = None
+            firstline = f.readline()
+            if firstline == b"# v2 git bundle\n":
+                version = 2
+            elif firstline == b"# v3 git bundle\n":
+                version = 3
+            else:
+                raise AssertionError(f"unsupported bundle format header: {firstline!r}")
+
+            capabilities = {}
+            prerequisites = []
+            references = {}
+            line = f.readline()
+
+            if version >= 3:
+                while line.startswith(b"@"):
+                    line = line[1:].rstrip(b"\n")
+                    try:
+                        key, value_bytes = line.split(b"=", 1)
+                        value = value_bytes.decode("utf-8")
+                    except ValueError:
+                        key = line
+                        value = None
+                    capabilities[key.decode("utf-8")] = value
+                    line = f.readline()
+
+            while line.startswith(b"-"):
+                (obj_id, comment) = line[1:].rstrip(b"\n").split(b" ", 1)
+                prerequisites.append((obj_id, comment.decode("utf-8")))
+                line = f.readline()
+
+            while line != b"\n":
+                (obj_id, ref) = line.rstrip(b"\n").split(b" ", 1)
+                references[ref] = obj_id
+                line = f.readline()
+
+            # Don't read PackData here, we'll do it later
+            bundle = Bundle()
+            bundle.version = version
+            bundle.capabilities = capabilities
+            bundle.prerequisites = prerequisites
+            bundle.references = references
+            bundle.pack_data = None  # Will be read on demand
+
+            return bundle
+
+    @staticmethod
+    def _skip_to_pack_data(f, version):
+        """Skip to the pack data section in a bundle file."""
+        # Skip header
+        header = f.readline()
+        if header not in (b"# v2 git bundle\n", b"# v3 git bundle\n"):
+            raise AssertionError(f"Invalid bundle header: {header!r}")
+
+        line = f.readline()
+
+        # Skip capabilities (v3 only)
+        if version >= 3:
+            while line.startswith(b"@"):
+                line = f.readline()
+
+        # Skip prerequisites
+        while line.startswith(b"-"):
+            line = f.readline()
+
+        # Skip references
+        while line != b"\n":
+            line = f.readline()
+
+        # Now at pack data
+
+    def send_pack(self, path, update_refs, generate_pack_data, progress=None):
+        """Upload is not supported for bundle files."""
+        raise NotImplementedError("Bundle files are read-only")
+
+    def fetch(
+        self,
+        path: str,
+        target: BaseRepo,
+        determine_wants: Optional[
+            Callable[[dict[bytes, bytes], Optional[int]], list[bytes]]
+        ] = None,
+        progress: Optional[Callable[[bytes], None]] = None,
+        depth: Optional[int] = None,
+        ref_prefix: Optional[list[Ref]] = None,
+        filter_spec: Optional[bytes] = None,
+        protocol_version: Optional[int] = None,
+        **kwargs,
+    ):
+        """Fetch into a target repository from a bundle file."""
+        bundle = self._open_bundle(path)
+
+        # Get references from bundle
+        refs = dict(bundle.references)
+
+        # Determine what we want to fetch
+        if determine_wants is None:
+            _ = list(refs.values())
+        else:
+            _ = determine_wants(refs, None)
+
+        # Add pack data to target repository
+        # Need to reopen the file for pack data access
+        with open(path, "rb") as pack_file:
+            # Skip to pack data section
+            BundleClient._skip_to_pack_data(pack_file, bundle.version)
+            # Read pack data into memory to avoid file positioning issues
+            pack_bytes = pack_file.read()
+
+        # Create PackData from in-memory bytes
+        from io import BytesIO
+
+        pack_io = BytesIO(pack_bytes)
+        pack_data = PackData.from_file(pack_io)
+        target.object_store.add_pack_data(len(pack_data), pack_data.iter_unpacked())
+
+        # Apply ref filtering if specified
+        if ref_prefix:
+            filtered_refs = {}
+            for ref_name, ref_value in refs.items():
+                for prefix in ref_prefix:
+                    if ref_name.startswith(prefix):
+                        filtered_refs[ref_name] = ref_value
+                        break
+            refs = filtered_refs
+
+        return FetchPackResult(refs, {}, agent_string())
+
+    def fetch_pack(
+        self,
+        path,
+        determine_wants,
+        graph_walker,
+        pack_data,
+        progress=None,
+        depth: Optional[int] = None,
+        ref_prefix: Optional[list[Ref]] = None,
+        filter_spec: Optional[bytes] = None,
+        protocol_version: Optional[int] = None,
+    ) -> FetchPackResult:
+        """Retrieve a pack from a bundle file."""
+        bundle = self._open_bundle(path)
+
+        # Get references from bundle
+        refs = dict(bundle.references)
+
+        # Determine what we want to fetch
+        _ = determine_wants(refs)
+
+        # Write pack data to the callback
+        # Need to reopen the file for pack data access
+        with open(path, "rb") as pack_file:
+            # Skip to pack data section
+            BundleClient._skip_to_pack_data(pack_file, bundle.version)
+            # Read pack data and write it to the callback
+            pack_bytes = pack_file.read()
+            pack_data(pack_bytes)
+
+        # Apply ref filtering if specified
+        if ref_prefix:
+            filtered_refs = {}
+            for ref_name, ref_value in refs.items():
+                for prefix in ref_prefix:
+                    if ref_name.startswith(prefix):
+                        filtered_refs[ref_name] = ref_value
+                        break
+            refs = filtered_refs
+
+        return FetchPackResult(refs, {}, agent_string())
+
+    def get_refs(
+        self,
+        path,
+        protocol_version: Optional[int] = None,
+        ref_prefix: Optional[list[Ref]] = None,
+    ):
+        """Retrieve the current refs from a bundle file."""
+        bundle = self._open_bundle(path)
+
+        refs = dict(bundle.references)
+
+        # Apply ref filtering if specified
+        if ref_prefix:
+            filtered_refs = {}
+            for ref_name, ref_value in refs.items():
+                for prefix in ref_prefix:
+                    if ref_name.startswith(prefix):
+                        filtered_refs[ref_name] = ref_value
+                        break
+            refs = filtered_refs
+
+        return LsRemoteResult(refs, {})
+
+
 # What Git client to use for local access
 default_local_git_client_cls = LocalGitClient
 
@@ -3086,12 +3324,17 @@ def get_transport_and_path(
         pass
 
     if sys.platform == "win32" and location[0].isalpha() and location[1:3] == ":\\":
-        # Windows local path
+        # Windows local path - but check if it's a bundle file first
+        if BundleClient._is_bundle_file(location):
+            return BundleClient(**kwargs), location
         return default_local_git_client_cls(**kwargs), location
 
     try:
         (username, hostname, path) = parse_rsync_url(location)
     except ValueError:
+        # Check if it's a bundle file before assuming it's a local path
+        if BundleClient._is_bundle_file(location):
+            return BundleClient(**kwargs), location
         # Otherwise, assume it's a local path.
         return default_local_git_client_cls(**kwargs), location
     else:

+ 1 - 0
tests/compat/__init__.py

@@ -26,6 +26,7 @@ import unittest
 
 def test_suite():
     names = [
+        "bundle",
         "check_ignore",
         "client",
         "commit_graph",

+ 285 - 0
tests/compat/test_bundle.py

@@ -0,0 +1,285 @@
+# test_bundle.py -- test bundle compatibility with CGit
+# Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for bundle compatibility with CGit."""
+
+import os
+import tempfile
+
+from dulwich.bundle import create_bundle_from_repo, read_bundle, write_bundle
+from dulwich.objects import Commit, Tree
+from dulwich.repo import Repo
+
+from .utils import CompatTestCase, rmtree_ro, run_git_or_fail
+
+
+class CompatBundleTestCase(CompatTestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, self.test_dir)
+        self.repo_path = os.path.join(self.test_dir, "repo")
+        self.repo = Repo.init(self.repo_path, mkdir=True)
+        self.addCleanup(self.repo.close)
+
+    def test_create_bundle_git_compat(self) -> None:
+        """Test creating a bundle that git can read."""
+        # Create a commit
+        commit = Commit()
+        commit.committer = commit.author = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+
+        tree = Tree()
+        self.repo.object_store.add_object(tree)
+        commit.tree = tree.id
+        self.repo.object_store.add_object(commit)
+
+        # Update ref
+        self.repo.refs[b"refs/heads/master"] = commit.id
+
+        # Create bundle using dulwich
+        bundle_path = os.path.join(self.test_dir, "test.bundle")
+
+        # Use create_bundle_from_repo helper
+        bundle = create_bundle_from_repo(self.repo)
+
+        with open(bundle_path, "wb") as f:
+            write_bundle(f, bundle)
+
+        # Verify git can read the bundle (must run from a repo directory)
+        output = run_git_or_fail(["bundle", "verify", bundle_path], cwd=self.repo_path)
+        self.assertIn(b"The bundle contains", output)
+        self.assertIn(b"refs/heads/master", output)
+
+    def test_read_git_bundle(self) -> None:
+        """Test reading a bundle created by git."""
+        # Create a commit using git
+        run_git_or_fail(["config", "user.name", "Test User"], cwd=self.repo_path)
+        run_git_or_fail(
+            ["config", "user.email", "test@example.com"], cwd=self.repo_path
+        )
+
+        # Create a file and commit
+        test_file = os.path.join(self.repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("test content\n")
+
+        run_git_or_fail(["add", "test.txt"], cwd=self.repo_path)
+        run_git_or_fail(["commit", "-m", "Test commit"], cwd=self.repo_path)
+
+        # Create bundle using git
+        bundle_path = os.path.join(self.test_dir, "git.bundle")
+        run_git_or_fail(["bundle", "create", bundle_path, "HEAD"], cwd=self.repo_path)
+
+        # Read bundle using dulwich
+        with open(bundle_path, "rb") as f:
+            bundle = read_bundle(f)
+
+        # Verify bundle contents
+        self.assertEqual(2, bundle.version)
+        self.assertIn(b"HEAD", bundle.references)
+        self.assertEqual({}, bundle.capabilities)
+        self.assertEqual([], bundle.prerequisites)
+
+    def test_read_git_bundle_multiple_refs(self) -> None:
+        """Test reading a bundle with multiple references created by git."""
+        # Create commits and branches using git
+        run_git_or_fail(["config", "user.name", "Test User"], cwd=self.repo_path)
+        run_git_or_fail(
+            ["config", "user.email", "test@example.com"], cwd=self.repo_path
+        )
+
+        # Create initial commit
+        test_file = os.path.join(self.repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("initial content\n")
+        run_git_or_fail(["add", "test.txt"], cwd=self.repo_path)
+        run_git_or_fail(["commit", "-m", "Initial commit"], cwd=self.repo_path)
+
+        # Create feature branch
+        run_git_or_fail(["checkout", "-b", "feature"], cwd=self.repo_path)
+        with open(test_file, "w") as f:
+            f.write("feature content\n")
+        run_git_or_fail(["add", "test.txt"], cwd=self.repo_path)
+        run_git_or_fail(["commit", "-m", "Feature commit"], cwd=self.repo_path)
+
+        # Create another branch
+        run_git_or_fail(["checkout", "-b", "develop", "master"], cwd=self.repo_path)
+        dev_file = os.path.join(self.repo_path, "dev.txt")
+        with open(dev_file, "w") as f:
+            f.write("dev content\n")
+        run_git_or_fail(["add", "dev.txt"], cwd=self.repo_path)
+        run_git_or_fail(["commit", "-m", "Dev commit"], cwd=self.repo_path)
+
+        # Create bundle with all branches
+        bundle_path = os.path.join(self.test_dir, "multi_ref.bundle")
+        run_git_or_fail(["bundle", "create", bundle_path, "--all"], cwd=self.repo_path)
+
+        # Read bundle using dulwich
+        with open(bundle_path, "rb") as f:
+            bundle = read_bundle(f)
+
+        # Verify bundle contains all refs
+        self.assertIn(b"refs/heads/master", bundle.references)
+        self.assertIn(b"refs/heads/feature", bundle.references)
+        self.assertIn(b"refs/heads/develop", bundle.references)
+
+    def test_read_git_bundle_with_prerequisites(self) -> None:
+        """Test reading a bundle with prerequisites created by git."""
+        # Create initial commits using git
+        run_git_or_fail(["config", "user.name", "Test User"], cwd=self.repo_path)
+        run_git_or_fail(
+            ["config", "user.email", "test@example.com"], cwd=self.repo_path
+        )
+
+        # Create base commits
+        test_file = os.path.join(self.repo_path, "test.txt")
+        with open(test_file, "w") as f:
+            f.write("content 1\n")
+        run_git_or_fail(["add", "test.txt"], cwd=self.repo_path)
+        run_git_or_fail(["commit", "-m", "Commit 1"], cwd=self.repo_path)
+
+        with open(test_file, "a") as f:
+            f.write("content 2\n")
+        run_git_or_fail(["add", "test.txt"], cwd=self.repo_path)
+        run_git_or_fail(["commit", "-m", "Commit 2"], cwd=self.repo_path)
+
+        # Get the first commit hash to use as base
+        first_commit = run_git_or_fail(
+            ["rev-parse", "HEAD~1"], cwd=self.repo_path
+        ).strip()
+
+        # Create more commits
+        with open(test_file, "a") as f:
+            f.write("content 3\n")
+        run_git_or_fail(["add", "test.txt"], cwd=self.repo_path)
+        run_git_or_fail(["commit", "-m", "Commit 3"], cwd=self.repo_path)
+
+        # Create bundle with prerequisites (only commits after first_commit)
+        bundle_path = os.path.join(self.test_dir, "prereq.bundle")
+        run_git_or_fail(
+            ["bundle", "create", bundle_path, f"{first_commit.decode()}..HEAD"],
+            cwd=self.repo_path,
+        )
+
+        # Read bundle using dulwich
+        with open(bundle_path, "rb") as f:
+            bundle = read_bundle(f)
+
+        # Verify bundle has prerequisites
+        self.assertGreater(len(bundle.prerequisites), 0)
+        # The prerequisite should be the first commit
+        prereq_ids = [p[0] for p in bundle.prerequisites]
+        self.assertIn(first_commit, prereq_ids)
+
+    def test_read_git_bundle_complex_pack(self) -> None:
+        """Test reading a bundle with complex pack data (multiple objects) created by git."""
+        # Create a more complex repository structure
+        run_git_or_fail(["config", "user.name", "Test User"], cwd=self.repo_path)
+        run_git_or_fail(
+            ["config", "user.email", "test@example.com"], cwd=self.repo_path
+        )
+
+        # Create multiple files in subdirectories
+        os.makedirs(os.path.join(self.repo_path, "src", "main"), exist_ok=True)
+        os.makedirs(os.path.join(self.repo_path, "tests"), exist_ok=True)
+
+        # Add various file types
+        files = [
+            ("README.md", "# Test Project\n\nThis is a test."),
+            ("src/main/app.py", "def main():\n    print('Hello')\n"),
+            ("src/main/utils.py", "def helper():\n    return 42\n"),
+            ("tests/test_app.py", "def test_main():\n    pass\n"),
+            (".gitignore", "*.pyc\n__pycache__/\n"),
+        ]
+
+        for filepath, content in files:
+            full_path = os.path.join(self.repo_path, filepath)
+            with open(full_path, "w") as f:
+                f.write(content)
+            run_git_or_fail(["add", filepath], cwd=self.repo_path)
+
+        run_git_or_fail(["commit", "-m", "Initial complex commit"], cwd=self.repo_path)
+
+        # Make additional changes
+        with open(os.path.join(self.repo_path, "src/main/app.py"), "a") as f:
+            f.write("\nif __name__ == '__main__':\n    main()\n")
+        run_git_or_fail(["add", "src/main/app.py"], cwd=self.repo_path)
+        run_git_or_fail(["commit", "-m", "Update app.py"], cwd=self.repo_path)
+
+        # Create bundle
+        bundle_path = os.path.join(self.test_dir, "complex.bundle")
+        run_git_or_fail(["bundle", "create", bundle_path, "HEAD"], cwd=self.repo_path)
+
+        # Read bundle using dulwich
+        with open(bundle_path, "rb") as f:
+            bundle = read_bundle(f)
+
+        # Verify bundle contents
+        self.assertEqual(2, bundle.version)
+        self.assertIn(b"HEAD", bundle.references)
+
+        # Verify pack data exists
+        self.assertIsNotNone(bundle.pack_data)
+        self.assertGreater(len(bundle.pack_data), 0)
+
+    def test_clone_from_git_bundle(self) -> None:
+        """Test cloning from a bundle created by git."""
+        # Create a repository with some history
+        run_git_or_fail(["config", "user.name", "Test User"], cwd=self.repo_path)
+        run_git_or_fail(
+            ["config", "user.email", "test@example.com"], cwd=self.repo_path
+        )
+
+        # Create commits
+        test_file = os.path.join(self.repo_path, "test.txt")
+        for i in range(3):
+            with open(test_file, "a") as f:
+                f.write(f"Line {i}\n")
+            run_git_or_fail(["add", "test.txt"], cwd=self.repo_path)
+            run_git_or_fail(["commit", "-m", f"Commit {i}"], cwd=self.repo_path)
+
+        # Create bundle
+        bundle_path = os.path.join(self.test_dir, "clone.bundle")
+        run_git_or_fail(["bundle", "create", bundle_path, "HEAD"], cwd=self.repo_path)
+
+        # Read bundle using dulwich
+        with open(bundle_path, "rb") as f:
+            bundle = read_bundle(f)
+
+        # Verify bundle was read correctly
+        self.assertEqual(2, bundle.version)
+        self.assertIn(b"HEAD", bundle.references)
+        self.assertEqual([], bundle.prerequisites)
+
+        # Verify pack data exists
+        self.assertIsNotNone(bundle.pack_data)
+        self.assertGreater(len(bundle.pack_data), 0)
+
+        # Use git to verify the bundle can be used for cloning
+        clone_path = os.path.join(self.test_dir, "cloned_repo")
+        run_git_or_fail(["clone", bundle_path, clone_path])
+
+        # Verify the cloned repository exists and has content
+        self.assertTrue(os.path.exists(clone_path))
+        self.assertTrue(os.path.exists(os.path.join(clone_path, "test.txt")))

+ 143 - 8
tests/test_bundle.py

@@ -25,8 +25,10 @@ import os
 import tempfile
 from io import BytesIO
 
-from dulwich.bundle import Bundle, read_bundle, write_bundle
+from dulwich.bundle import Bundle, create_bundle_from_repo, read_bundle, write_bundle
+from dulwich.objects import Blob, Commit, Tree
 from dulwich.pack import PackData, write_pack_objects
+from dulwich.repo import MemoryRepo
 
 from . import TestCase
 
@@ -152,7 +154,7 @@ class BundleTests(TestCase):
         bundle = read_bundle(f)
         self.assertEqual(2, bundle.version)
         self.assertEqual({}, bundle.capabilities)
-        self.assertEqual([(b"cc" * 20, "prerequisite comment")], bundle.prerequisites)
+        self.assertEqual([(b"cc" * 20, b"prerequisite comment")], bundle.prerequisites)
         self.assertEqual({b"refs/heads/master": b"ab" * 20}, bundle.references)
 
     def test_read_bundle_v3(self) -> None:
@@ -175,7 +177,7 @@ class BundleTests(TestCase):
         self.assertEqual(
             {"capability1": None, "capability2": "value2"}, bundle.capabilities
         )
-        self.assertEqual([(b"cc" * 20, "prerequisite comment")], bundle.prerequisites)
+        self.assertEqual([(b"cc" * 20, b"prerequisite comment")], bundle.prerequisites)
         self.assertEqual({b"refs/heads/master": b"ab" * 20}, bundle.references)
 
     def test_read_bundle_invalid_format(self) -> None:
@@ -192,7 +194,7 @@ class BundleTests(TestCase):
         bundle = Bundle()
         bundle.version = 2
         bundle.capabilities = {}
-        bundle.prerequisites = [(b"cc" * 20, "prerequisite comment")]
+        bundle.prerequisites = [(b"cc" * 20, b"prerequisite comment")]
         bundle.references = {b"refs/heads/master": b"ab" * 20}
 
         # Create a simple pack data
@@ -218,7 +220,7 @@ class BundleTests(TestCase):
         bundle = Bundle()
         bundle.version = 3
         bundle.capabilities = {"capability1": None, "capability2": "value2"}
-        bundle.prerequisites = [(b"cc" * 20, "prerequisite comment")]
+        bundle.prerequisites = [(b"cc" * 20, b"prerequisite comment")]
         bundle.references = {b"refs/heads/master": b"ab" * 20}
 
         # Create a simple pack data
@@ -247,7 +249,7 @@ class BundleTests(TestCase):
         bundle1 = Bundle()
         bundle1.version = None
         bundle1.capabilities = {"capability1": "value1"}
-        bundle1.prerequisites = [(b"cc" * 20, "prerequisite comment")]
+        bundle1.prerequisites = [(b"cc" * 20, b"prerequisite comment")]
         bundle1.references = {b"refs/heads/master": b"ab" * 20}
 
         b1 = BytesIO()
@@ -265,7 +267,7 @@ class BundleTests(TestCase):
         bundle2 = Bundle()
         bundle2.version = None
         bundle2.capabilities = {}
-        bundle2.prerequisites = [(b"cc" * 20, "prerequisite comment")]
+        bundle2.prerequisites = [(b"cc" * 20, b"prerequisite comment")]
         bundle2.references = {b"refs/heads/master": b"ab" * 20}
 
         b2 = BytesIO()
@@ -301,7 +303,7 @@ class BundleTests(TestCase):
         origbundle.version = 3
         origbundle.capabilities = {"foo": None}
         origbundle.references = {b"refs/heads/master": b"ab" * 20}
-        origbundle.prerequisites = [(b"cc" * 20, "comment")]
+        origbundle.prerequisites = [(b"cc" * 20, b"comment")]
         b = BytesIO()
         write_pack_objects(b.write, [])
         b.seek(0)
@@ -314,3 +316,136 @@ class BundleTests(TestCase):
                 newbundle = read_bundle(f)
 
                 self.assertEqual(origbundle, newbundle)
+
+    def test_create_bundle_from_repo(self) -> None:
+        """Test creating a bundle from a repository."""
+        # Create a simple repository
+        repo = MemoryRepo()
+
+        # Create a blob
+        blob = Blob.from_string(b"Hello world")
+        repo.object_store.add_object(blob)
+
+        # Create a tree
+        tree = Tree()
+        tree.add(b"hello.txt", 0o100644, blob.id)
+        repo.object_store.add_object(tree)
+
+        # Create a commit
+        commit = Commit()
+        commit.tree = tree.id
+        commit.message = b"Initial commit"
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        repo.object_store.add_object(commit)
+
+        # Add a reference
+        repo.refs[b"refs/heads/master"] = commit.id
+
+        # Create bundle from repository
+        bundle = create_bundle_from_repo(repo)
+
+        # Verify bundle contents
+        self.assertEqual(bundle.references, {b"refs/heads/master": commit.id})
+        self.assertEqual(bundle.prerequisites, [])
+        self.assertEqual(bundle.capabilities, {})
+        self.assertIsNotNone(bundle.pack_data)
+
+        # Verify the bundle contains the right objects
+        objects = list(bundle.pack_data.iter_unpacked())
+        object_ids = {obj.sha().hex().encode("ascii") for obj in objects}
+        self.assertIn(blob.id, object_ids)
+        self.assertIn(tree.id, object_ids)
+        self.assertIn(commit.id, object_ids)
+
+    def test_create_bundle_with_prerequisites(self) -> None:
+        """Test creating a bundle with prerequisites."""
+        repo = MemoryRepo()
+
+        # Create some objects
+        blob = Blob.from_string(b"Hello world")
+        repo.object_store.add_object(blob)
+
+        tree = Tree()
+        tree.add(b"hello.txt", 0o100644, blob.id)
+        repo.object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.message = b"Initial commit"
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        repo.object_store.add_object(commit)
+
+        repo.refs[b"refs/heads/master"] = commit.id
+
+        # Create bundle with prerequisites
+        prereq_id = b"aa" * 20  # hex string like other object ids
+        bundle = create_bundle_from_repo(repo, prerequisites=[prereq_id])
+
+        # Verify prerequisites are included
+        self.assertEqual(len(bundle.prerequisites), 1)
+        self.assertEqual(bundle.prerequisites[0][0], prereq_id)
+
+    def test_create_bundle_with_specific_refs(self) -> None:
+        """Test creating a bundle with specific refs."""
+        repo = MemoryRepo()
+
+        # Create objects and refs
+        blob = Blob.from_string(b"Hello world")
+        repo.object_store.add_object(blob)
+
+        tree = Tree()
+        tree.add(b"hello.txt", 0o100644, blob.id)
+        repo.object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.message = b"Initial commit"
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        repo.object_store.add_object(commit)
+
+        repo.refs[b"refs/heads/master"] = commit.id
+        repo.refs[b"refs/heads/feature"] = commit.id
+
+        # Create bundle with only master ref
+        bundle = create_bundle_from_repo(repo, refs=[b"refs/heads/master"])
+
+        # Verify only master ref is included
+        self.assertEqual(len(bundle.references), 1)
+        self.assertIn(b"refs/heads/master", bundle.references)
+        self.assertNotIn(b"refs/heads/feature", bundle.references)
+
+    def test_create_bundle_with_capabilities(self) -> None:
+        """Test creating a bundle with capabilities."""
+        repo = MemoryRepo()
+
+        # Create minimal objects
+        blob = Blob.from_string(b"Hello world")
+        repo.object_store.add_object(blob)
+
+        tree = Tree()
+        tree.add(b"hello.txt", 0o100644, blob.id)
+        repo.object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.message = b"Initial commit"
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        repo.object_store.add_object(commit)
+
+        repo.refs[b"refs/heads/master"] = commit.id
+
+        # Create bundle with capabilities
+        capabilities = {"object-format": "sha1"}
+        bundle = create_bundle_from_repo(repo, capabilities=capabilities, version=3)
+
+        # Verify capabilities are included
+        self.assertEqual(bundle.capabilities, capabilities)
+        self.assertEqual(bundle.version, 3)

+ 134 - 1
tests/test_client.py

@@ -33,7 +33,9 @@ from urllib.parse import urlparse
 
 import dulwich
 from dulwich import client
+from dulwich.bundle import create_bundle_from_repo, write_bundle
 from dulwich.client import (
+    BundleClient,
     FetchPackResult,
     GitProtocolError,
     HangupException,
@@ -59,7 +61,7 @@ from dulwich.client import (
     parse_rsync_url,
 )
 from dulwich.config import ConfigDict
-from dulwich.objects import Commit, Tree
+from dulwich.objects import Blob, Commit, Tree
 from dulwich.pack import pack_objects_to_data, write_pack_data, write_pack_objects
 from dulwich.protocol import DEFAULT_GIT_PROTOCOL_VERSION_FETCH, TCP_GIT_PORT, Protocol
 from dulwich.repo import MemoryRepo, Repo
@@ -1023,6 +1025,137 @@ class LocalGitClientTests(TestCase):
         self.assertEqual(obj_local, obj_target)
 
 
+class BundleClientTests(TestCase):
+    def setUp(self) -> None:
+        super().setUp()
+        self.tempdir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.tempdir)
+
+    def _create_test_bundle(self):
+        """Create a test bundle file and return its path."""
+        # Create a simple repository
+        repo = MemoryRepo()
+
+        # Create some objects
+        blob = Blob.from_string(b"Hello world")
+        repo.object_store.add_object(blob)
+
+        tree = Tree()
+        tree.add(b"hello.txt", 0o100644, blob.id)
+        repo.object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.message = b"Initial commit"
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        repo.object_store.add_object(commit)
+
+        repo.refs[b"refs/heads/master"] = commit.id
+
+        # Create bundle
+        bundle = create_bundle_from_repo(repo)
+
+        # Write bundle to file
+        bundle_path = os.path.join(self.tempdir, "test.bundle")
+        with open(bundle_path, "wb") as f:
+            write_bundle(f, bundle)
+
+        return bundle_path, repo
+
+    def test_is_bundle_file(self) -> None:
+        """Test bundle file detection."""
+        bundle_path, _ = self._create_test_bundle()
+
+        # Test positive case
+        self.assertTrue(BundleClient._is_bundle_file(bundle_path))
+
+        # Test negative case - regular file
+        regular_file = os.path.join(self.tempdir, "regular.txt")
+        with open(regular_file, "w") as f:
+            f.write("not a bundle")
+        self.assertFalse(BundleClient._is_bundle_file(regular_file))
+
+        # Test negative case - non-existent file
+        self.assertFalse(BundleClient._is_bundle_file("/non/existent/file"))
+
+    def test_get_refs(self) -> None:
+        """Test getting refs from bundle."""
+        bundle_path, _ = self._create_test_bundle()
+
+        client = BundleClient()
+        result = client.get_refs(bundle_path)
+
+        self.assertIn(b"refs/heads/master", result.refs)
+        self.assertEqual(result.symrefs, {})
+
+    def test_fetch_pack(self) -> None:
+        """Test fetching pack from bundle."""
+        bundle_path, source_repo = self._create_test_bundle()
+
+        client = BundleClient()
+        pack_data = BytesIO()
+
+        def determine_wants(refs):
+            return list(refs.values())
+
+        class MockGraphWalker:
+            def next(self):
+                return None
+
+            def ack(self, sha):
+                pass
+
+        result = client.fetch_pack(
+            bundle_path, determine_wants, MockGraphWalker(), pack_data.write
+        )
+
+        # Verify we got refs back
+        self.assertIn(b"refs/heads/master", result.refs)
+
+        # Verify pack data was written
+        self.assertGreater(len(pack_data.getvalue()), 0)
+
+    def test_fetch(self) -> None:
+        """Test fetching from bundle into target repo."""
+        bundle_path, source_repo = self._create_test_bundle()
+
+        client = BundleClient()
+        target_repo = MemoryRepo()
+
+        result = client.fetch(bundle_path, target_repo)
+
+        # Verify refs were imported
+        self.assertIn(b"refs/heads/master", result.refs)
+
+        # Verify objects were imported
+        master_id = result.refs[b"refs/heads/master"]
+        self.assertIn(master_id, target_repo.object_store)
+
+        # Verify the commit object is correct
+        commit = target_repo.object_store[master_id]
+        self.assertEqual(commit.message, b"Initial commit")
+
+    def test_send_pack_not_supported(self) -> None:
+        """Test that send_pack raises NotImplementedError."""
+        bundle_path, _ = self._create_test_bundle()
+
+        client = BundleClient()
+
+        with self.assertRaises(NotImplementedError):
+            client.send_pack(bundle_path, None, None)
+
+    def test_get_transport_and_path_bundle(self) -> None:
+        """Test that get_transport_and_path detects bundle files."""
+        bundle_path, _ = self._create_test_bundle()
+
+        client, path = get_transport_and_path(bundle_path)
+
+        self.assertIsInstance(client, BundleClient)
+        self.assertEqual(path, bundle_path)
+
+
 class HttpGitClientTests(TestCase):
     def test_get_url(self) -> None:
         base_url = "https://github.com/jelmer/dulwich"