瀏覽代碼

add initial support for Git protocol v2

Implement Git protocol version negotiation and use Git protocol v2 for
fetches if supported. For now, the observable behaviour of Dulwich is
equivalent regardless of protocol version, except that two new features
may be used if the server supports Git protocol v2.

The first feature is a reference prefix list which may be used to filter
refs server-side. This can be used to reduce the size of the initial
reference listing announced by the server. Reducing the size of this
announcement was a major design goal for protocol v2 to avoid sending of
very large announcements when a repository contains a lot of references.
This feature is intended as an optimization which servers are free to
ignore depending on available server-side resources. Therefore, users of
Dulwich should still be prepared to filter redundant refs manually (this
limitation also applies to Git itself).
A new --refspec porcelain option is provided in order to test this
feature on the command line.

The second feature is an object filter specification, which corresponds
to the --filter option of 'git clone'. This can be used to omit objects
while cloning repositories. For instance, the following command will
clone a given repsitory without fetching any blob objects:

   dulwich clone --filter blob:none --bare REPO_URL

(In this example the --bare option is used because creation of a work
tree would fail without any blobs present.)

The test suite now enables protocol v2 and keeps passing for me.
Stefan Sperling 9 月之前
父節點
當前提交
2b975ec7e4
共有 5 個文件被更改,包括 401 次插入51 次删除
  1. 26 7
      dulwich/cli.py
  2. 313 39
      dulwich/client.py
  3. 56 5
      dulwich/porcelain.py
  4. 3 0
      dulwich/protocol.py
  5. 3 0
      tests/compat/test_client.py

+ 26 - 7
dulwich/cli.py

@@ -263,6 +263,19 @@ class cmd_clone(Command):
             type=str,
             type=str,
             help=("Check out branch instead of branch pointed to by remote " "HEAD"),
             help=("Check out branch instead of branch pointed to by remote " "HEAD"),
         )
         )
+        parser.add_option(
+            "--refspec",
+            dest="refspec",
+            type=str,
+            help="References to fetch",
+            action="append",
+        )
+        parser.add_option(
+            "--filter",
+            dest="filter_spec",
+            type=str,
+            help="git-rev-list-style object filter",
+        )
         options, args = parser.parse_args(args)
         options, args = parser.parse_args(args)
 
 
         if args == []:
         if args == []:
@@ -282,6 +295,8 @@ class cmd_clone(Command):
                 bare=options.bare,
                 bare=options.bare,
                 depth=options.depth,
                 depth=options.depth,
                 branch=options.branch,
                 branch=options.branch,
+                refspec=options.refspec,
+                filter_spec=options.filter_spec,
             )
             )
         except GitProtocolError as e:
         except GitProtocolError as e:
             print(f"{e}")
             print(f"{e}")
@@ -586,13 +601,17 @@ class cmd_pack_objects(Command):
 
 
 class cmd_pull(Command):
 class cmd_pull(Command):
     def run(self, args):
     def run(self, args):
-        parser = optparse.OptionParser()
-        options, args = parser.parse_args(args)
-        try:
-            from_location = args[0]
-        except IndexError:
-            from_location = None
-        porcelain.pull(".", from_location)
+        parser = argparse.ArgumentParser()
+        parser.add_argument("--from_location", type=str)
+        parser.add_argument("--refspec", type=str, nargs="*")
+        parser.add_argument("--filter", type=str, nargs=1)
+        args = parser.parse_args(args)
+        porcelain.pull(
+            ".",
+            args.from_location or None,
+            args.refspec or None,
+            filter_spec=args.filter,
+        )
 
 
 
 
 class cmd_push(Command):
 class cmd_push(Command):

+ 313 - 39
dulwich/client.py

@@ -82,6 +82,8 @@ from .protocol import (
     CAPABILITIES_REF,
     CAPABILITIES_REF,
     CAPABILITY_AGENT,
     CAPABILITY_AGENT,
     CAPABILITY_DELETE_REFS,
     CAPABILITY_DELETE_REFS,
+    CAPABILITY_FETCH,
+    CAPABILITY_FILTER,
     CAPABILITY_INCLUDE_TAG,
     CAPABILITY_INCLUDE_TAG,
     CAPABILITY_MULTI_ACK,
     CAPABILITY_MULTI_ACK,
     CAPABILITY_MULTI_ACK_DETAILED,
     CAPABILITY_MULTI_ACK_DETAILED,
@@ -237,8 +239,22 @@ class ReportStatusParser:
             self._ref_statuses.append(ref_status)
             self._ref_statuses.append(ref_status)
 
 
 
 
-def read_pkt_refs(pkt_seq):
-    server_capabilities = None
+def negotiate_protocol_version(proto):
+    pkt = proto.read_pkt_line()
+    if pkt == b"version 2\n":
+        return 2
+    proto.unread_pkt_line(pkt)
+    return 0
+
+
+def read_server_capabilities(pkt_seq):
+    server_capabilities = []
+    for pkt in pkt_seq:
+        server_capabilities.append(pkt)
+    return set(server_capabilities)
+
+
+def read_pkt_refs(pkt_seq, server_capabilities=None):
     refs = {}
     refs = {}
     # Receive refs from server
     # Receive refs from server
     for pkt in pkt_seq:
     for pkt in pkt_seq:
@@ -247,6 +263,18 @@ def read_pkt_refs(pkt_seq):
             raise GitProtocolError(ref.decode("utf-8", "replace"))
             raise GitProtocolError(ref.decode("utf-8", "replace"))
         if server_capabilities is None:
         if server_capabilities is None:
             (ref, server_capabilities) = extract_capabilities(ref)
             (ref, server_capabilities) = extract_capabilities(ref)
+        else:  # Git protocol-v2:
+            try:
+                symref, target = ref.split(b" ", 1)
+            except ValueError:
+                pass
+            else:
+                if symref and target and target[:14] == b"symref-target:":
+                    server_capabilities.add(
+                        b"%s=%s:%s"
+                        % (CAPABILITY_SYMREF, symref, target.split(b":", 1)[1])
+                    )
+                    ref = symref
         refs[ref] = sha
         refs[ref] = sha
 
 
     if len(refs) == 0:
     if len(refs) == 0:
@@ -413,6 +441,8 @@ def _read_shallow_updates(pkt_seq):
     new_shallow = set()
     new_shallow = set()
     new_unshallow = set()
     new_unshallow = set()
     for pkt in pkt_seq:
     for pkt in pkt_seq:
+        if pkt == b"shallow-info\n":  # Git-protocol v2
+            continue
         try:
         try:
             cmd, sha = pkt.split(b" ", 1)
             cmd, sha = pkt.split(b" ", 1)
         except ValueError:
         except ValueError:
@@ -498,7 +528,25 @@ def _read_side_band64k_data(pkt_seq: Iterable[bytes]) -> Iterator[Tuple[int, byt
         yield channel, pkt[1:]
         yield channel, pkt[1:]
 
 
 
 
-def _handle_upload_pack_head(proto, capabilities, graph_walker, wants, can_read, depth):
+def find_capability(capabilities, key, value):
+    for capability in capabilities:
+        k, v = parse_capability(capability)
+        if k != key:
+            continue
+        if value and value not in v.split(b" "):
+            continue
+        return capability
+
+
+def _handle_upload_pack_head(
+    proto,
+    capabilities,
+    graph_walker,
+    wants,
+    can_read,
+    depth,
+    protocol_version=0,
+):
     """Handle the head of a 'git-upload-pack' request.
     """Handle the head of a 'git-upload-pack' request.
 
 
     Args:
     Args:
@@ -509,15 +557,23 @@ def _handle_upload_pack_head(proto, capabilities, graph_walker, wants, can_read,
       can_read: function that returns a boolean that indicates
       can_read: function that returns a boolean that indicates
     whether there is extra graph data to read on proto
     whether there is extra graph data to read on proto
       depth: Depth for request
       depth: Depth for request
+      protocol_version: desired Git protocol version; defaults to v0
     """
     """
     assert isinstance(wants, list) and isinstance(wants[0], bytes)
     assert isinstance(wants, list) and isinstance(wants[0], bytes)
-    proto.write_pkt_line(
-        COMMAND_WANT + b" " + wants[0] + b" " + b" ".join(sorted(capabilities)) + b"\n"
-    )
+    wantcmd = COMMAND_WANT + b" " + wants[0]
+    if protocol_version != 2:
+        wantcmd += b" " + b" ".join(sorted(capabilities))
+    wantcmd += b"\n"
+    proto.write_pkt_line(wantcmd)
     for want in wants[1:]:
     for want in wants[1:]:
         proto.write_pkt_line(COMMAND_WANT + b" " + want + b"\n")
         proto.write_pkt_line(COMMAND_WANT + b" " + want + b"\n")
     if depth not in (0, None) or getattr(graph_walker, "shallow", None):
     if depth not in (0, None) or getattr(graph_walker, "shallow", None):
-        if CAPABILITY_SHALLOW not in capabilities:
+        if protocol_version == 2:
+            if not find_capability(capabilities, CAPABILITY_FETCH, CAPABILITY_SHALLOW):
+                raise GitProtocolError(
+                    "server does not support shallow capability required for " "depth"
+                )
+        elif CAPABILITY_SHALLOW not in capabilities:
             raise GitProtocolError(
             raise GitProtocolError(
                 "server does not support shallow capability required for " "depth"
                 "server does not support shallow capability required for " "depth"
             )
             )
@@ -527,7 +583,10 @@ def _handle_upload_pack_head(proto, capabilities, graph_walker, wants, can_read,
             proto.write_pkt_line(
             proto.write_pkt_line(
                 COMMAND_DEEPEN + b" " + str(depth).encode("ascii") + b"\n"
                 COMMAND_DEEPEN + b" " + str(depth).encode("ascii") + b"\n"
             )
             )
-    proto.write_pkt_line(None)
+        if protocol_version == 2:
+            proto.write_pkt_line(None)
+    if protocol_version != 2:
+        proto.write_pkt_line(None)
 
 
     if depth not in (0, None):
     if depth not in (0, None):
         if can_read is not None:
         if can_read is not None:
@@ -555,6 +614,8 @@ def _handle_upload_pack_head(proto, capabilities, graph_walker, wants, can_read,
                     )
                     )
         have = next(graph_walker)
         have = next(graph_walker)
     proto.write_pkt_line(COMMAND_DONE + b"\n")
     proto.write_pkt_line(COMMAND_DONE + b"\n")
+    if protocol_version == 2:
+        proto.write_pkt_line(None)
     return (new_shallow, new_unshallow)
     return (new_shallow, new_unshallow)
 
 
 
 
@@ -565,6 +626,7 @@ def _handle_upload_pack_tail(
     pack_data: Callable[[bytes], None],
     pack_data: Callable[[bytes], None],
     progress: Optional[Callable[[bytes], None]] = None,
     progress: Optional[Callable[[bytes], None]] = None,
     rbufsize=_RBUFSIZE,
     rbufsize=_RBUFSIZE,
+    protocol_version=0,
 ):
 ):
     """Handle the tail of a 'git-upload-pack' request.
     """Handle the tail of a 'git-upload-pack' request.
 
 
@@ -579,18 +641,21 @@ def _handle_upload_pack_tail(
     pkt = proto.read_pkt_line()
     pkt = proto.read_pkt_line()
     while pkt:
     while pkt:
         parts = pkt.rstrip(b"\n").split(b" ")
         parts = pkt.rstrip(b"\n").split(b" ")
-        if parts[0] == b"ACK":
-            graph_walker.ack(parts[1])
-        if parts[0] == b"NAK":
-            graph_walker.nak()
-        if len(parts) < 3 or parts[2] not in (
-            b"ready",
-            b"continue",
-            b"common",
-        ):
+        if protocol_version == 2 and parts[0] != "packfile":
             break
             break
+        else:
+            if parts[0] == b"ACK":
+                graph_walker.ack(parts[1])
+            if parts[0] == b"NAK":
+                graph_walker.nak()
+            if len(parts) < 3 or parts[2] not in (
+                b"ready",
+                b"continue",
+                b"common",
+            ):
+                break
         pkt = proto.read_pkt_line()
         pkt = proto.read_pkt_line()
-    if CAPABILITY_SIDE_BAND_64K in capabilities:
+    if CAPABILITY_SIDE_BAND_64K in capabilities or protocol_version == 2:
         if progress is None:
         if progress is None:
             # Just ignore progress data
             # Just ignore progress data
 
 
@@ -624,6 +689,7 @@ class GitClient:
         report_activity=None,
         report_activity=None,
         quiet=False,
         quiet=False,
         include_tags=False,
         include_tags=False,
+        **kwargs,
     ) -> None:
     ) -> None:
         """Create a new GitClient instance.
         """Create a new GitClient instance.
 
 
@@ -646,6 +712,7 @@ class GitClient:
             self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
             self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
         if include_tags:
         if include_tags:
             self._fetch_capabilities.add(CAPABILITY_INCLUDE_TAG)
             self._fetch_capabilities.add(CAPABILITY_INCLUDE_TAG)
+        self.protocol_version = 0  # our default Git protocol version
 
 
     def get_url(self, path):
     def get_url(self, path):
         """Retrieves full url to given path.
         """Retrieves full url to given path.
@@ -711,6 +778,8 @@ class GitClient:
         branch=None,
         branch=None,
         progress=None,
         progress=None,
         depth=None,
         depth=None,
+        ref_prefix=[],
+        filter_spec=None,
     ) -> Repo:
     ) -> Repo:
         """Clone a repository."""
         """Clone a repository."""
         from .refs import _set_default_branch, _set_head, _set_origin_head
         from .refs import _set_default_branch, _set_head, _set_origin_head
@@ -749,7 +818,14 @@ class GitClient:
                 target_config.write_to_path()
                 target_config.write_to_path()
 
 
             ref_message = b"clone: from " + encoded_path
             ref_message = b"clone: from " + encoded_path
-            result = self.fetch(path, target, progress=progress, depth=depth)
+            result = self.fetch(
+                path,
+                target,
+                progress=progress,
+                depth=depth,
+                ref_prefix=ref_prefix,
+                filter_spec=filter_spec,
+            )
             if origin is not None:
             if origin is not None:
                 _import_remote_refs(
                 _import_remote_refs(
                     target.refs, origin, result.refs, message=ref_message
                     target.refs, origin, result.refs, message=ref_message
@@ -798,6 +874,8 @@ class GitClient:
         ] = None,
         ] = None,
         progress: Optional[Callable[[bytes], None]] = None,
         progress: Optional[Callable[[bytes], None]] = None,
         depth: Optional[int] = None,
         depth: Optional[int] = None,
+        ref_prefix: Optional[List[bytes]] = [],
+        filter_spec: Optional[bytes] = None,
     ) -> FetchPackResult:
     ) -> FetchPackResult:
         """Fetch into a target repository.
         """Fetch into a target repository.
 
 
@@ -809,6 +887,15 @@ class GitClient:
             list of shas to fetch. Defaults to all shas.
             list of shas to fetch. Defaults to all shas.
           progress: Optional progress function
           progress: Optional progress function
           depth: Depth to fetch at
           depth: Depth to fetch at
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
 
         Returns:
         Returns:
           Dictionary with all remote refs (not just those fetched)
           Dictionary with all remote refs (not just those fetched)
@@ -844,6 +931,8 @@ class GitClient:
                 f.write,
                 f.write,
                 progress=progress,
                 progress=progress,
                 depth=depth,
                 depth=depth,
+                ref_prefix=ref_prefix,
+                filter_spec=filter_spec,
             )
             )
         except BaseException:
         except BaseException:
             abort()
             abort()
@@ -862,6 +951,8 @@ class GitClient:
         *,
         *,
         progress: Optional[Callable[[bytes], None]] = None,
         progress: Optional[Callable[[bytes], None]] = None,
         depth: Optional[int] = None,
         depth: Optional[int] = None,
+        ref_prefix=[],
+        filter_spec=None,
     ):
     ):
         """Retrieve a pack from a git smart server.
         """Retrieve a pack from a git smart server.
 
 
@@ -874,6 +965,15 @@ class GitClient:
           pack_data: Callback called for each bit of data in the pack
           pack_data: Callback called for each bit of data in the pack
           progress: Callback for progress reports (strings)
           progress: Callback for progress reports (strings)
           depth: Shallow fetch depth
           depth: Shallow fetch depth
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
 
         Returns:
         Returns:
           FetchPackResult object
           FetchPackResult object
@@ -923,7 +1023,7 @@ class GitClient:
             error message if the ref failed to update
             error message if the ref failed to update
             None if it was updated successfully
             None if it was updated successfully
         """
         """
-        if CAPABILITY_SIDE_BAND_64K in capabilities:
+        if CAPABILITY_SIDE_BAND_64K in capabilities or self.protocol_version == 2:
             if progress is None:
             if progress is None:
 
 
                 def progress(x):
                 def progress(x):
@@ -955,6 +1055,7 @@ class GitClient:
         # TODO(jelmer): warn about unknown capabilities
         # TODO(jelmer): warn about unknown capabilities
         symrefs = {}
         symrefs = {}
         agent = None
         agent = None
+        fetch_capa = None
         for capability in server_capabilities:
         for capability in server_capabilities:
             k, v = parse_capability(capability)
             k, v = parse_capability(capability)
             if k == CAPABILITY_SYMREF:
             if k == CAPABILITY_SYMREF:
@@ -962,8 +1063,24 @@ class GitClient:
                 symrefs[src] = dst
                 symrefs[src] = dst
             if k == CAPABILITY_AGENT:
             if k == CAPABILITY_AGENT:
                 agent = v
                 agent = v
+            if self.protocol_version == 2 and k == CAPABILITY_FETCH:
+                fetch_capa = CAPABILITY_FETCH
+                fetch_features = []
+                v = v.strip()
+                if b"shallow" in v.split(b" "):
+                    fetch_features.append(CAPABILITY_SHALLOW)
+                if b"filter" in v.split(b" "):
+                    fetch_features.append(CAPABILITY_FILTER)
+                for i in range(len(fetch_features)):
+                    if i == 0:
+                        fetch_capa += b"="
+                    else:
+                        fetch_capa += b" "
+                    fetch_capa += fetch_features[i]
 
 
         negotiated_capabilities = self._fetch_capabilities & server_capabilities
         negotiated_capabilities = self._fetch_capabilities & server_capabilities
+        if fetch_capa:
+            negotiated_capabilities.add(fetch_capa)
         return (negotiated_capabilities, symrefs, agent)
         return (negotiated_capabilities, symrefs, agent)
 
 
     def archive(
     def archive(
@@ -1048,6 +1165,7 @@ class TraditionalGitClient(GitClient):
           SendPackError: if server rejects the pack data
           SendPackError: if server rejects the pack data
 
 
         """
         """
+        self.protocol_version = 0
         proto, unused_can_read, stderr = self._connect(b"receive-pack", path)
         proto, unused_can_read, stderr = self._connect(b"receive-pack", path)
         with proto:
         with proto:
             try:
             try:
@@ -1130,6 +1248,8 @@ class TraditionalGitClient(GitClient):
         pack_data,
         pack_data,
         progress=None,
         progress=None,
         depth=None,
         depth=None,
+        ref_prefix=[],
+        filter_spec=None,
     ):
     ):
         """Retrieve a pack from a git smart server.
         """Retrieve a pack from a git smart server.
 
 
@@ -1142,15 +1262,29 @@ class TraditionalGitClient(GitClient):
           pack_data: Callback called for each bit of data in the pack
           pack_data: Callback called for each bit of data in the pack
           progress: Callback for progress reports (strings)
           progress: Callback for progress reports (strings)
           depth: Shallow fetch depth
           depth: Shallow fetch depth
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
 
         Returns:
         Returns:
           FetchPackResult object
           FetchPackResult object
 
 
         """
         """
         proto, can_read, stderr = self._connect(b"upload-pack", path)
         proto, can_read, stderr = self._connect(b"upload-pack", path)
+        self.protocol_version = negotiate_protocol_version(proto)
         with proto:
         with proto:
             try:
             try:
-                refs, server_capabilities = read_pkt_refs(proto.read_pkt_seq())
+                if self.protocol_version == 2:
+                    server_capabilities = read_server_capabilities(proto.read_pkt_seq())
+                    refs = None
+                else:
+                    refs, server_capabilities = read_pkt_refs(proto.read_pkt_seq())
             except HangupException as exc:
             except HangupException as exc:
                 raise _remote_error_from_stderr(stderr) from exc
                 raise _remote_error_from_stderr(stderr) from exc
             (
             (
@@ -1159,6 +1293,17 @@ class TraditionalGitClient(GitClient):
                 agent,
                 agent,
             ) = self._negotiate_upload_pack_capabilities(server_capabilities)
             ) = self._negotiate_upload_pack_capabilities(server_capabilities)
 
 
+            if self.protocol_version == 2:
+                proto.write_pkt_line(b"command=ls-refs\n")
+                proto.write(b"0001")  # delim-pkt
+                proto.write_pkt_line(b"symrefs")
+                for prefix in ref_prefix:
+                    proto.write_pkt_line(b"ref-prefix " + prefix)
+                proto.write_pkt_line(None)
+                refs, server_capabilities = read_pkt_refs(
+                    proto.read_pkt_seq(), server_capabilities
+                )
+
             if refs is None:
             if refs is None:
                 proto.write_pkt_line(None)
                 proto.write_pkt_line(None)
                 return FetchPackResult(refs, symrefs, agent)
                 return FetchPackResult(refs, symrefs, agent)
@@ -1176,6 +1321,16 @@ class TraditionalGitClient(GitClient):
             if not wants:
             if not wants:
                 proto.write_pkt_line(None)
                 proto.write_pkt_line(None)
                 return FetchPackResult(refs, symrefs, agent)
                 return FetchPackResult(refs, symrefs, agent)
+            if self.protocol_version == 2:
+                proto.write_pkt_line(b"command=fetch\n")
+                proto.write(b"0001")  # delim-pkt
+                if (
+                    find_capability(
+                        negotiated_capabilities, CAPABILITY_FETCH, CAPABILITY_FILTER
+                    )
+                    and filter_spec
+                ):
+                    proto.write(pkt_line(b"filter %s\n" % filter_spec))
             (new_shallow, new_unshallow) = _handle_upload_pack_head(
             (new_shallow, new_unshallow) = _handle_upload_pack_head(
                 proto,
                 proto,
                 negotiated_capabilities,
                 negotiated_capabilities,
@@ -1183,6 +1338,7 @@ class TraditionalGitClient(GitClient):
                 wants,
                 wants,
                 can_read,
                 can_read,
                 depth=depth,
                 depth=depth,
+                protocol_version=self.protocol_version,
             )
             )
             _handle_upload_pack_tail(
             _handle_upload_pack_tail(
                 proto,
                 proto,
@@ -1190,6 +1346,7 @@ class TraditionalGitClient(GitClient):
                 graph_walker,
                 graph_walker,
                 pack_data,
                 pack_data,
                 progress,
                 progress,
+                protocol_version=self.protocol_version,
             )
             )
             return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
             return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
 
 
@@ -1197,9 +1354,20 @@ class TraditionalGitClient(GitClient):
         """Retrieve the current refs from a git smart server."""
         """Retrieve the current refs from a git smart server."""
         # stock `git ls-remote` uses upload-pack
         # stock `git ls-remote` uses upload-pack
         proto, _, stderr = self._connect(b"upload-pack", path)
         proto, _, stderr = self._connect(b"upload-pack", path)
+        self.protocol_version = negotiate_protocol_version(proto)
+        if self.protocol_version == 2:
+            server_capabilities = read_server_capabilities(proto.read_pkt_seq())
+            proto.write_pkt_line(b"command=ls-refs\n")
+            proto.write(b"0001")  # delim-pkt
+            proto.write_pkt_line(b"symrefs")
+            proto.write_pkt_line(None)
+        else:
+            server_capabilities = None  # read_pkt_refs will find them
         with proto:
         with proto:
             try:
             try:
-                refs, _ = read_pkt_refs(proto.read_pkt_seq())
+                refs, server_capabilities = read_pkt_refs(
+                    proto.read_pkt_seq(), server_capabilities
+                )
             except HangupException as exc:
             except HangupException as exc:
                 raise _remote_error_from_stderr(stderr) from exc
                 raise _remote_error_from_stderr(stderr) from exc
             proto.write_pkt_line(None)
             proto.write_pkt_line(None)
@@ -1314,8 +1482,20 @@ class TCPGitClient(TraditionalGitClient):
         )
         )
         if path.startswith(b"/~"):
         if path.startswith(b"/~"):
             path = path[1:]
             path = path[1:]
+        if cmd == b"upload-pack":
+            self.protocol_version = 2
+            # Git protocol version advertisement is hidden behind two NUL bytes
+            # for compatibility with older Git server implementations, which
+            # would crash if something other than a "host=" header was found
+            # after the first NUL byte.
+            version_str = b"\0\0version=%d\0" % self.protocol_version
+        else:
+            version_str = b""
+            self.protocol_version = 0
         # TODO(jelmer): Alternative to ascii?
         # TODO(jelmer): Alternative to ascii?
-        proto.send_cmd(b"git-" + cmd, path, b"host=" + self._host.encode("ascii"))
+        proto.send_cmd(
+            b"git-" + cmd, path, b"host=" + self._host.encode("ascii") + version_str
+        )
         return proto, lambda: _fileno_can_read(s), None
         return proto, lambda: _fileno_can_read(s), None
 
 
 
 
@@ -1492,7 +1672,16 @@ class LocalGitClient(GitClient):
 
 
         return SendPackResult(new_refs, ref_status=ref_status)
         return SendPackResult(new_refs, ref_status=ref_status)
 
 
-    def fetch(self, path, target, determine_wants=None, progress=None, depth=None):
+    def fetch(
+        self,
+        path,
+        target,
+        determine_wants=None,
+        progress=None,
+        depth=None,
+        ref_prefix=[],
+        filter_spec=None,
+    ):
         """Fetch into a target repository.
         """Fetch into a target repository.
 
 
         Args:
         Args:
@@ -1503,6 +1692,15 @@ class LocalGitClient(GitClient):
             list of shas to fetch. Defaults to all shas.
             list of shas to fetch. Defaults to all shas.
           progress: Optional progress function
           progress: Optional progress function
           depth: Shallow fetch depth
           depth: Shallow fetch depth
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
 
         Returns:
         Returns:
           FetchPackResult object
           FetchPackResult object
@@ -1525,6 +1723,8 @@ class LocalGitClient(GitClient):
         pack_data,
         pack_data,
         progress=None,
         progress=None,
         depth=None,
         depth=None,
+        ref_prefix: Optional[List[bytes]] = [],
+        filter_spec: Optional[bytes] = None,
     ) -> FetchPackResult:
     ) -> FetchPackResult:
         """Retrieve a pack from a local on-disk repository.
         """Retrieve a pack from a local on-disk repository.
 
 
@@ -1537,6 +1737,15 @@ class LocalGitClient(GitClient):
           pack_data: Callback called for each bit of data in the pack
           pack_data: Callback called for each bit of data in the pack
           progress: Callback for progress reports (strings)
           progress: Callback for progress reports (strings)
           depth: Shallow fetch depth
           depth: Shallow fetch depth
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
 
         Returns:
         Returns:
           FetchPackResult object
           FetchPackResult object
@@ -2009,6 +2218,16 @@ class AbstractHttpGitClient(GitClient):
         headers = {"Accept": "*/*"}
         headers = {"Accept": "*/*"}
         if self.dumb is not True:
         if self.dumb is not True:
             tail += "?service={}".format(service.decode("ascii"))
             tail += "?service={}".format(service.decode("ascii"))
+            # Enable protocol v2 only when fetching, not when pushing.
+            # Git does not yet implement push over protocol v2, and as of
+            # git version 2.37.3 git-http-backend's behaviour is erratic if
+            # we try: It responds with a Git-protocol-v1-style ref listing
+            # which lacks the "001f# service=git-receive-pack" marker.
+            if service == b"git-upload-pack":
+                self.protocol_version = 2
+                headers["Git-Protocol"] = "version=2"
+            else:
+                self.protocol_version = 0
         url = urljoin(base_url, tail)
         url = urljoin(base_url, tail)
         resp, read = self._http_request(url, headers)
         resp, read = self._http_request(url, headers)
 
 
@@ -2025,20 +2244,50 @@ class AbstractHttpGitClient(GitClient):
                 "application/x-git-"
                 "application/x-git-"
             )
             )
             if not self.dumb:
             if not self.dumb:
-                proto = Protocol(read, None)
-                # The first line should mention the service
-                try:
-                    [pkt] = list(proto.read_pkt_seq())
-                except ValueError as exc:
-                    raise GitProtocolError(
-                        "unexpected number of packets received"
-                    ) from exc
-                if pkt.rstrip(b"\n") != (b"# service=" + service):
-                    raise GitProtocolError(
-                        f"unexpected first line {pkt!r} from smart server"
+
+                def begin_protocol_v2(proto):
+                    server_capabilities = read_server_capabilities(proto.read_pkt_seq())
+                    resp, read = self._smart_request(
+                        service.decode("ascii"),
+                        base_url,
+                        pkt_line(b"command=ls-refs\n")
+                        + b"0001"
+                        + pkt_line(b"symrefs")
+                        + b"0000",
                     )
                     )
-                return (*read_pkt_refs(proto.read_pkt_seq()), base_url)
+                    proto = Protocol(read, None)
+                    return server_capabilities, resp, read, proto
+
+                proto = Protocol(read, None)
+                self.protocol_version = negotiate_protocol_version(proto)
+                if self.protocol_version == 2:
+                    server_capabilities, resp, read, proto = begin_protocol_v2(proto)
+                else:
+                    server_capabilities = None  # read_pkt_refs will find them
+                    try:
+                        [pkt] = list(proto.read_pkt_seq())
+                    except ValueError as exc:
+                        raise GitProtocolError(
+                            "unexpected number of packets received"
+                        ) from exc
+                    if pkt.rstrip(b"\n") != (b"# service=" + service):
+                        raise GitProtocolError(
+                            f"unexpected first line {pkt!r} from smart server"
+                        )
+                    # Github sends "version 2" after sending the service name.
+                    # Try to negotiate protocol version 2 again.
+                    self.protocol_version = negotiate_protocol_version(proto)
+                    if self.protocol_version == 2:
+                        server_capabilities, resp, read, proto = begin_protocol_v2(
+                            proto
+                        )
+                (
+                    refs,
+                    server_capabilities,
+                ) = read_pkt_refs(proto.read_pkt_seq(), server_capabilities)
+                return refs, server_capabilities, base_url
             else:
             else:
+                self.protocol_version = 0
                 return read_info_refs(resp), set(), base_url
                 return read_info_refs(resp), set(), base_url
         finally:
         finally:
             resp.close()
             resp.close()
@@ -2056,6 +2305,8 @@ class AbstractHttpGitClient(GitClient):
             "Content-Type": f"application/x-{service}-request",
             "Content-Type": f"application/x-{service}-request",
             "Accept": result_content_type,
             "Accept": result_content_type,
         }
         }
+        if self.protocol_version == 2:
+            headers["Git-Protocol"] = "version=2"
         if isinstance(data, bytes):
         if isinstance(data, bytes):
             headers["Content-Length"] = str(len(data))
             headers["Content-Length"] = str(len(data))
         resp, read = self._http_request(url, headers, data)
         resp, read = self._http_request(url, headers, data)
@@ -2138,6 +2389,8 @@ class AbstractHttpGitClient(GitClient):
         pack_data,
         pack_data,
         progress=None,
         progress=None,
         depth=None,
         depth=None,
+        ref_prefix=[],
+        filter_spec=None,
     ):
     ):
         """Retrieve a pack from a git smart server.
         """Retrieve a pack from a git smart server.
 
 
@@ -2148,6 +2401,15 @@ class AbstractHttpGitClient(GitClient):
           pack_data: Callback called for each bit of data in the pack
           pack_data: Callback called for each bit of data in the pack
           progress: Callback for progress reports (strings)
           progress: Callback for progress reports (strings)
           depth: Depth for request
           depth: Depth for request
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
 
         Returns:
         Returns:
           FetchPackResult object
           FetchPackResult object
@@ -2181,10 +2443,21 @@ class AbstractHttpGitClient(GitClient):
             wants,
             wants,
             can_read=None,
             can_read=None,
             depth=depth,
             depth=depth,
+            protocol_version=self.protocol_version,
         )
         )
-        resp, read = self._smart_request(
-            "git-upload-pack", url, data=req_data.getvalue()
-        )
+        if self.protocol_version == 2:
+            data = pkt_line(b"command=fetch\n") + b"0001"
+            if (
+                find_capability(
+                    negotiated_capabilities, CAPABILITY_FETCH, CAPABILITY_FILTER
+                )
+                and filter_spec
+            ):
+                data += pkt_line(b"filter %s\n" % filter_spec)
+            data += req_data.getvalue()
+        else:
+            data = req_data.getvalue()
+        resp, read = self._smart_request("git-upload-pack", url, data)
         try:
         try:
             resp_proto = Protocol(read, None)
             resp_proto = Protocol(read, None)
             if new_shallow is None and new_unshallow is None:
             if new_shallow is None and new_unshallow is None:
@@ -2197,6 +2470,7 @@ class AbstractHttpGitClient(GitClient):
                 graph_walker,
                 graph_walker,
                 pack_data,
                 pack_data,
                 progress,
                 progress,
+                protocol_version=self.protocol_version,
             )
             )
             return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
             return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
         finally:
         finally:

+ 56 - 5
dulwich/porcelain.py

@@ -486,6 +486,26 @@ def init(path=".", *, bare=False, symlinks: Optional[bool] = None):
         return Repo.init(path, symlinks=symlinks)
         return Repo.init(path, symlinks=symlinks)
 
 
 
 
+def encode_refspecs(refspecs, refspec_encoding):
+    if refspecs is None:
+        return [b"HEAD"]
+
+    def encode_refspec(ref):
+        if isinstance(ref, bytes):
+            return ref
+        else:
+            return ref.encode(refspec_encoding)
+
+    encoded_refs = []
+    if isinstance(refspecs, bytes) or isinstance(refspecs, str):
+        encoded_refs.append(encode_refspec(refspecs))
+    else:
+        for ref in refspecs:
+            encoded_refs.append(encode_refspec(ref))
+
+    return encoded_refs
+
+
 def clone(
 def clone(
     source,
     source,
     target=None,
     target=None,
@@ -497,6 +517,9 @@ def clone(
     depth: Optional[int] = None,
     depth: Optional[int] = None,
     branch: Optional[Union[str, bytes]] = None,
     branch: Optional[Union[str, bytes]] = None,
     config: Optional[Config] = None,
     config: Optional[Config] = None,
+    refspecs=None,
+    refspec_encoding=DEFAULT_ENCODING,
+    filter_spec=None,
     **kwargs,
     **kwargs,
 ):
 ):
     """Clone a local or remote git repository.
     """Clone a local or remote git repository.
@@ -513,6 +536,13 @@ def clone(
       branch: Optional branch or tag to be used as HEAD in the new repository
       branch: Optional branch or tag to be used as HEAD in the new repository
         instead of the cloned repository's HEAD.
         instead of the cloned repository's HEAD.
       config: Configuration to use
       config: Configuration to use
+      refspecs: refspecs to fetch. Can be a bytestring, a string, or a list of
+        bytestring/string.
+      refspec_encoding: Character encoding of bytestrings provided in the refspecs parameter.
+        If not specified, the internal default encoding will be used.
+      filter_spec: A git-rev-list-style object filter spec, as an ASCII string.
+        Only used if the server supports the Git protocol-v2 'filter'
+        feature, and ignored otherwise.
     Returns: The new repository
     Returns: The new repository
     """
     """
     if outstream is not None:
     if outstream is not None:
@@ -533,6 +563,8 @@ def clone(
     if checkout and bare:
     if checkout and bare:
         raise Error("checkout and bare are incompatible")
         raise Error("checkout and bare are incompatible")
 
 
+    encoded_refs = encode_refspecs(refspecs, refspec_encoding)
+
     if target is None:
     if target is None:
         target = source.split("/")[-1]
         target = source.split("/")[-1]
 
 
@@ -543,6 +575,9 @@ def clone(
 
 
     (client, path) = get_transport_and_path(source, config=config, **kwargs)
     (client, path) = get_transport_and_path(source, config=config, **kwargs)
 
 
+    if filter_spec:
+        filter_spec = filter_spec.encode("ascii")
+
     return client.clone(
     return client.clone(
         path,
         path,
         target,
         target,
@@ -553,6 +588,8 @@ def clone(
         branch=branch,
         branch=branch,
         progress=errstream.write,
         progress=errstream.write,
         depth=depth,
         depth=depth,
+        ref_prefix=encoded_refs,
+        filter_spec=filter_spec,
     )
     )
 
 
 
 
@@ -1238,6 +1275,8 @@ def pull(
     errstream=default_bytes_err_stream,
     errstream=default_bytes_err_stream,
     fast_forward=True,
     fast_forward=True,
     force=False,
     force=False,
+    refspec_encoding=DEFAULT_ENCODING,
+    filter_spec=None,
     **kwargs,
     **kwargs,
 ):
 ):
     """Pull from remote via dulwich.client.
     """Pull from remote via dulwich.client.
@@ -1245,21 +1284,26 @@ def pull(
     Args:
     Args:
       repo: Path to repository
       repo: Path to repository
       remote_location: Location of the remote
       remote_location: Location of the remote
-      refspecs: refspecs to fetch
+      refspecs: refspecs to fetch. Can be a bytestring, a string, or a list of
+        bytestring/string.
       outstream: A stream file to write to output
       outstream: A stream file to write to output
       errstream: A stream file to write to errors
       errstream: A stream file to write to errors
+      refspec_encoding: Character encoding of bytestrings provided in the refspecs parameter.
+        If not specified, the internal default encoding will be used.
+      filter_spec: A git-rev-list-style object filter spec, as an ASCII string.
+        Only used if the server supports the Git protocol-v2 'filter'
+        feature, and ignored otherwise.
     """
     """
     # Open the repo
     # Open the repo
     with open_repo_closing(repo) as r:
     with open_repo_closing(repo) as r:
         (remote_name, remote_location) = get_remote_repo(r, remote_location)
         (remote_name, remote_location) = get_remote_repo(r, remote_location)
 
 
-        if refspecs is None:
-            refspecs = [b"HEAD"]
+        encoded_refs = encode_refspecs(refspecs, refspec_encoding)
         selected_refs = []
         selected_refs = []
 
 
         def determine_wants(remote_refs, **kwargs):
         def determine_wants(remote_refs, **kwargs):
             selected_refs.extend(
             selected_refs.extend(
-                parse_reftuples(remote_refs, r.refs, refspecs, force=force)
+                parse_reftuples(remote_refs, r.refs, encoded_refs, force=force)
             )
             )
             return [
             return [
                 remote_refs[lh]
                 remote_refs[lh]
@@ -1270,8 +1314,15 @@ def pull(
         client, path = get_transport_and_path(
         client, path = get_transport_and_path(
             remote_location, config=r.get_config_stack(), **kwargs
             remote_location, config=r.get_config_stack(), **kwargs
         )
         )
+        if filter_spec:
+            filter_spec = filter_spec.encode("ascii")
         fetch_result = client.fetch(
         fetch_result = client.fetch(
-            path, r, progress=errstream.write, determine_wants=determine_wants
+            path,
+            r,
+            progress=errstream.write,
+            determine_wants=determine_wants,
+            ref_prefix=refspecs,
+            filter_spec=filter_spec,
         )
         )
         for lh, rh, force_ref in selected_refs:
         for lh, rh, force_ref in selected_refs:
             if not force_ref and rh in r.refs:
             if not force_ref and rh in r.refs:

+ 3 - 0
dulwich/protocol.py

@@ -64,6 +64,8 @@ CAPABILITY_AGENT = b"agent"
 CAPABILITY_SYMREF = b"symref"
 CAPABILITY_SYMREF = b"symref"
 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want"
 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want"
 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want"
 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want"
+CAPABILITY_FETCH = b"fetch"
+CAPABILITY_FILTER = b"filter"
 
 
 # Magic ref that is used to attach capabilities to when
 # Magic ref that is used to attach capabilities to when
 # there are no refs. Should always be ste to ZERO_SHA.
 # there are no refs. Should always be ste to ZERO_SHA.
@@ -90,6 +92,7 @@ KNOWN_UPLOAD_CAPABILITIES = set(
         CAPABILITY_DEEPEN_RELATIVE,
         CAPABILITY_DEEPEN_RELATIVE,
         CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
         CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
         CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
         CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
+        CAPABILITY_FETCH,
     ]
     ]
 )
 )
 KNOWN_RECEIVE_CAPABILITIES = set(
 KNOWN_RECEIVE_CAPABILITIES = set(

+ 3 - 0
tests/compat/test_client.py

@@ -570,6 +570,9 @@ class GitHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
         co = self.headers.get("cookie")
         co = self.headers.get("cookie")
         if co:
         if co:
             env["HTTP_COOKIE"] = co
             env["HTTP_COOKIE"] = co
+        proto = self.headers.get("Git-Protocol")
+        if proto:
+            env["GIT_PROTOCOL"] = proto
         # XXX Other HTTP_* headers
         # XXX Other HTTP_* headers
         # Since we're setting the env in the parent, provide empty
         # Since we're setting the env in the parent, provide empty
         # values to override previously set values
         # values to override previously set values