Browse Source

Avoid making an extra copy of HTTP responses.

Jelmer Vernooij 2 years ago
parent
commit
97814a9bb5
3 changed files with 10 additions and 13 deletions
  1. 3 0
      NEWS
  2. 3 10
      dulwich/client.py
  3. 4 3
      dulwich/tests/test_client.py

+ 3 - 0
NEWS

@@ -1,5 +1,8 @@
 0.20.37	UNRELEASED
 
+ * Avoid making an extra copy when fetching pack files.
+   (Jelmer Vernooij)
+
 0.20.36	2022-05-15
 
  * Add ``walk_untracked`` argument to ``porcelain.status``.

+ 3 - 10
dulwich/client.py

@@ -2186,10 +2186,10 @@ class Urllib3HttpGitClient(AbstractHttpGitClient):
             req_headers["Accept-Encoding"] = "identity"
 
         if data is None:
-            resp = self.pool_manager.request("GET", url, headers=req_headers)
+            resp = self.pool_manager.request("GET", url, headers=req_headers, preload_content=False)
         else:
             resp = self.pool_manager.request(
-                "POST", url, headers=req_headers, body=data
+                "POST", url, headers=req_headers, body=data, preload_content=False
             )
 
         if resp.status == 404:
@@ -2203,13 +2203,6 @@ class Urllib3HttpGitClient(AbstractHttpGitClient):
                 "unexpected http resp %d for %s" % (resp.status, url)
             )
 
-        # TODO: Optimization available by adding `preload_content=False` to the
-        # request and just passing the `read` method on instead of going via
-        # `BytesIO`, if we can guarantee that the entire response is consumed
-        # before issuing the next to still allow for connection reuse from the
-        # pool.
-        read = BytesIO(resp.data).read
-
         resp.content_type = resp.getheader("Content-Type")
         # Check if geturl() is available (urllib3 version >= 1.23)
         try:
@@ -2219,7 +2212,7 @@ class Urllib3HttpGitClient(AbstractHttpGitClient):
             resp.redirect_location = resp.get_redirect_location()
         else:
             resp.redirect_location = resp_url if resp_url != url else ""
-        return resp, read
+        return resp, resp.read
 
 
 HttpGitClient = Urllib3HttpGitClient

+ 4 - 3
dulwich/tests/test_client.py

@@ -1048,7 +1048,7 @@ class HttpGitClientTests(TestCase):
             def __init__(self):
                 self.headers = {}
 
-            def request(self, method, url, fields=None, headers=None, redirect=True):
+            def request(self, method, url, fields=None, headers=None, redirect=True, preload_content=True):
                 base_url = url[: -len(tail)]
                 redirect_base_url = test_data[base_url]["redirect_url"]
                 redirect_url = redirect_base_url + tail
@@ -1063,14 +1063,15 @@ class HttpGitClientTests(TestCase):
                 if redirect is False:
                     request_url = url
                     if redirect_base_url != base_url:
-                        body = ""
+                        body = b""
                         headers["location"] = redirect_url
                         status = 301
                 return HTTPResponse(
-                    body=body,
+                    body=BytesIO(body),
                     headers=headers,
                     request_method=method,
                     request_url=request_url,
+                    preload_content=preload_content,
                     status=status,
                 )