瀏覽代碼

dumb: Ensure repository where some objects are packed can be cloned

Cloning a repository containing pack files with the dump protocol was
raising multiple errors related to unwanted exceptions from HTTP client
and missing pack index file.
Antoine Lambert 1 月之前
父節點
當前提交
b57534f21b
共有 3 個文件被更改,包括 50 次插入28 次删除
  1. 15 11
      dulwich/client.py
  2. 9 8
      dulwich/dumb.py
  3. 26 9
      tests/compat/test_dumb.py

+ 15 - 11
dulwich/client.py

@@ -40,6 +40,7 @@ Known capabilities that are not supported:
 """
 
 import copy
+import functools
 import logging
 import os
 import select
@@ -2506,7 +2507,7 @@ class AbstractHttpGitClient(GitClient):
         self.dumb = dumb
         GitClient.__init__(self, **kwargs)
 
-    def _http_request(self, url, headers=None, data=None):
+    def _http_request(self, url, headers=None, data=None, raise_for_status=True):
         """Perform HTTP request.
 
         Args:
@@ -2820,7 +2821,9 @@ class AbstractHttpGitClient(GitClient):
             from .dumb import DumbRemoteHTTPRepo
 
             # Pass http_request function
-            dumb_repo = DumbRemoteHTTPRepo(url, self._http_request)
+            dumb_repo = DumbRemoteHTTPRepo(
+                url, functools.partial(self._http_request, raise_for_status=False)
+            )
 
             # Fetch pack data from dumb remote
             pack_data_list = list(
@@ -2985,7 +2988,7 @@ class Urllib3HttpGitClient(AbstractHttpGitClient):
             path = path.decode("utf-8")
         return urljoin(self._base_url, path).rstrip("/") + "/"
 
-    def _http_request(self, url, headers=None, data=None):
+    def _http_request(self, url, headers=None, data=None, raise_for_status=True):
         import urllib3.exceptions
 
         req_headers = self.pool_manager.headers.copy()
@@ -3009,14 +3012,15 @@ class Urllib3HttpGitClient(AbstractHttpGitClient):
         except urllib3.exceptions.HTTPError as e:
             raise GitProtocolError(str(e)) from e
 
-        if resp.status == 404:
-            raise NotGitRepository
-        if resp.status == 401:
-            raise HTTPUnauthorized(resp.headers.get("WWW-Authenticate"), url)
-        if resp.status == 407:
-            raise HTTPProxyUnauthorized(resp.headers.get("Proxy-Authenticate"), url)
-        if resp.status != 200:
-            raise GitProtocolError(f"unexpected http resp {resp.status} for {url}")
+        if raise_for_status:
+            if resp.status == 404:
+                raise NotGitRepository
+            if resp.status == 401:
+                raise HTTPUnauthorized(resp.headers.get("WWW-Authenticate"), url)
+            if resp.status == 407:
+                raise HTTPProxyUnauthorized(resp.headers.get("Proxy-Authenticate"), url)
+            if resp.status != 200:
+                raise GitProtocolError(f"unexpected http resp {resp.status} for {url}")
 
         resp.content_type = resp.headers.get("Content-Type")
         # Check if geturl() is available (urllib3 version >= 1.23)

+ 9 - 8
dulwich/dumb.py

@@ -42,7 +42,7 @@ from .objects import (
     hex_to_sha,
     sha_to_hex,
 )
-from .pack import Pack, PackIndex, UnpackedObject, load_pack_index_file
+from .pack import Pack, PackData, PackIndex, UnpackedObject, load_pack_index_file
 from .refs import Ref, read_info_refs, split_peeled_refs
 from .repo import BaseRepo
 
@@ -212,13 +212,13 @@ class DumbHTTPObjectStore(BaseObjectStore):
         # Convert hex to binary for pack operations
         binsha = hex_to_sha(sha)
 
-        for pack_name, idx in self._packs or []:
-            if idx is None:
-                idx = self._get_pack_index(pack_name)
+        for pack_name, pack_idx in self._packs or []:
+            if pack_idx is None:
+                pack_idx = self._get_pack_index(pack_name)
 
             try:
                 # Check if object is in this pack
-                idx.object_offset(binsha)
+                pack_idx.object_offset(binsha)
             except KeyError:
                 continue
 
@@ -232,12 +232,13 @@ class DumbHTTPObjectStore(BaseObjectStore):
 
             if not os.path.exists(pack_path):
                 # Download the pack file
-                pack_data = self._fetch_url(f"objects/pack/{pack_name}.pack")
+                data = self._fetch_url(f"objects/pack/{pack_name}.pack")
                 with open(pack_path, "wb") as f:
-                    f.write(pack_data)
+                    f.write(data)
 
             # Open the pack and get the object
-            pack = Pack(pack_path[:-5])  # Remove .pack extension
+            pack_data = PackData(pack_path)
+            pack = Pack.from_objects(pack_data, pack_idx)
             try:
                 return pack.get_raw(binsha)
             finally:

+ 26 - 9
tests/compat/test_dumb.py

@@ -101,9 +101,11 @@ class DumbHTTPGitServer:
         return f"http://127.0.0.1:{self.port}"
 
 
-class DumbHTTPClientTests(CompatTestCase):
+class DumbHTTPClientNoPackTests(CompatTestCase):
     """Tests for dumb HTTP client against real git repositories."""
 
+    with_pack = False
+
     def setUp(self):
         super().setUp()
         # Create a temporary directory for test repos
@@ -124,12 +126,17 @@ class DumbHTTPClientTests(CompatTestCase):
         )
         run_git_or_fail(["config", "user.name", "Test User"], cwd=self.work_path)
 
-        # Create initial commit
-        test_file = os.path.join(self.work_path, "test.txt")
-        with open(test_file, "w") as f:
-            f.write("Hello, world!\n")
-        run_git_or_fail(["add", "test.txt"], cwd=self.work_path)
-        run_git_or_fail(["commit", "-m", "Initial commit"], cwd=self.work_path)
+        nb_files = 10
+        if self.with_pack:
+            # adding more files will create a pack file in the repository
+            nb_files = 50
+
+        for i in range(nb_files):
+            test_file = os.path.join(self.work_path, f"test{i}.txt")
+            with open(test_file, "w") as f:
+                f.write(f"Hello, world {i}!\n")
+            run_git_or_fail(["add", f"test{i}.txt"], cwd=self.work_path)
+            run_git_or_fail(["commit", "-m", f"Commit {i}"], cwd=self.work_path)
 
         # Push to origin
         run_git_or_fail(
@@ -145,6 +152,12 @@ class DumbHTTPClientTests(CompatTestCase):
         self.server.start()
         self.addCleanup(self.server.stop)
 
+        pack_dir = os.path.join(self.origin_path, "objects", "pack")
+        if self.with_pack:
+            assert os.listdir(pack_dir)
+        else:
+            assert not os.listdir(pack_dir)
+
     @skipUnless(
         sys.platform != "win32", "git clone from Python HTTPServer fails on Windows"
     )
@@ -181,10 +194,10 @@ class DumbHTTPClientTests(CompatTestCase):
             dest_repo.reset_index()
 
             # Verify the clone
-            test_file = os.path.join(dest_path, "test.txt")
+            test_file = os.path.join(dest_path, "test0.txt")
             self.assertTrue(os.path.exists(test_file))
             with open(test_file) as f:
-                self.assertEqual("Hello, world!\n", f.read())
+                self.assertEqual("Hello, world 0!\n", f.read())
         finally:
             # Ensure repo is closed before cleanup
             dest_repo.close()
@@ -285,3 +298,7 @@ class DumbHTTPClientTests(CompatTestCase):
         finally:
             # Ensure repo is closed before cleanup
             dest_repo.close()
+
+
+class DumbHTTPClientWithPackTests(DumbHTTPClientNoPackTests):
+    with_pack = True