Commit Diff


commit - 436ce9f94137e46d527d8260fe79242e1c82e00b
commit + 2b975ec7e49caf98c1c4d0b1eea848d0dd86ae77
blob - e856df4552148033332dc79b9768de0c5180d9ec (mode 644)
blob + 6234adba0fb57a7b214b8a02c98cd9baf6e8fc0d (mode 755)
--- dulwich/cli.py
+++ dulwich/cli.py
@@ -262,6 +262,19 @@ class cmd_clone(Command):
             dest="branch",
             type=str,
             help=("Check out branch instead of branch pointed to by remote " "HEAD"),
+        )
+        parser.add_option(
+            "--refspec",
+            dest="refspec",
+            type=str,
+            help="References to fetch",
+            action="append",
+        )
+        parser.add_option(
+            "--filter",
+            dest="filter_spec",
+            type=str,
+            help="git-rev-list-style object filter",
         )
         options, args = parser.parse_args(args)
 
@@ -282,6 +295,8 @@ class cmd_clone(Command):
                 bare=options.bare,
                 depth=options.depth,
                 branch=options.branch,
+                refspec=options.refspec,
+                filter_spec=options.filter_spec,
             )
         except GitProtocolError as e:
             print(f"{e}")
@@ -586,13 +601,17 @@ class cmd_pack_objects(Command):
 
 class cmd_pull(Command):
     def run(self, args):
-        parser = optparse.OptionParser()
-        options, args = parser.parse_args(args)
-        try:
-            from_location = args[0]
-        except IndexError:
-            from_location = None
-        porcelain.pull(".", from_location)
+        parser = argparse.ArgumentParser()
+        parser.add_argument("--from_location", type=str)
+        parser.add_argument("--refspec", type=str, nargs="*")
+        parser.add_argument("--filter", type=str, nargs=1)
+        args = parser.parse_args(args)
+        porcelain.pull(
+            ".",
+            args.from_location or None,
+            args.refspec or None,
+            filter_spec=args.filter,
+        )
 
 
 class cmd_push(Command):
blob - 16f4d3bd89e5503b141748fd01047c7ef3d5560e
blob + 02d2acd706a90f271fe2780d6b328ec51f5fc700
--- dulwich/client.py
+++ dulwich/client.py
@@ -82,6 +82,8 @@ from .protocol import (
     CAPABILITIES_REF,
     CAPABILITY_AGENT,
     CAPABILITY_DELETE_REFS,
+    CAPABILITY_FETCH,
+    CAPABILITY_FILTER,
     CAPABILITY_INCLUDE_TAG,
     CAPABILITY_MULTI_ACK,
     CAPABILITY_MULTI_ACK_DETAILED,
@@ -237,8 +239,22 @@ class ReportStatusParser:
             self._ref_statuses.append(ref_status)
 
 
-def read_pkt_refs(pkt_seq):
-    server_capabilities = None
+def negotiate_protocol_version(proto):
+    pkt = proto.read_pkt_line()
+    if pkt == b"version 2\n":
+        return 2
+    proto.unread_pkt_line(pkt)
+    return 0
+
+
+def read_server_capabilities(pkt_seq):
+    server_capabilities = []
+    for pkt in pkt_seq:
+        server_capabilities.append(pkt)
+    return set(server_capabilities)
+
+
+def read_pkt_refs(pkt_seq, server_capabilities=None):
     refs = {}
     # Receive refs from server
     for pkt in pkt_seq:
@@ -247,6 +263,18 @@ def read_pkt_refs(pkt_seq):
             raise GitProtocolError(ref.decode("utf-8", "replace"))
         if server_capabilities is None:
             (ref, server_capabilities) = extract_capabilities(ref)
+        else:  # Git protocol-v2:
+            try:
+                symref, target = ref.split(b" ", 1)
+            except ValueError:
+                pass
+            else:
+                if symref and target and target[:14] == b"symref-target:":
+                    server_capabilities.add(
+                        b"%s=%s:%s"
+                        % (CAPABILITY_SYMREF, symref, target.split(b":", 1)[1])
+                    )
+                    ref = symref
         refs[ref] = sha
 
     if len(refs) == 0:
@@ -413,6 +441,8 @@ def _read_shallow_updates(pkt_seq):
     new_shallow = set()
     new_unshallow = set()
     for pkt in pkt_seq:
+        if pkt == b"shallow-info\n":  # Git-protocol v2
+            continue
         try:
             cmd, sha = pkt.split(b" ", 1)
         except ValueError:
@@ -498,7 +528,25 @@ def _read_side_band64k_data(pkt_seq: Iterable[bytes]) 
         yield channel, pkt[1:]
 
 
-def _handle_upload_pack_head(proto, capabilities, graph_walker, wants, can_read, depth):
+def find_capability(capabilities, key, value):
+    for capability in capabilities:
+        k, v = parse_capability(capability)
+        if k != key:
+            continue
+        if value and value not in v.split(b" "):
+            continue
+        return capability
+
+
+def _handle_upload_pack_head(
+    proto,
+    capabilities,
+    graph_walker,
+    wants,
+    can_read,
+    depth,
+    protocol_version=0,
+):
     """Handle the head of a 'git-upload-pack' request.
 
     Args:
@@ -509,15 +557,23 @@ def _handle_upload_pack_head(proto, capabilities, grap
       can_read: function that returns a boolean that indicates
     whether there is extra graph data to read on proto
       depth: Depth for request
+      protocol_version: desired Git protocol version; defaults to v0
     """
     assert isinstance(wants, list) and isinstance(wants[0], bytes)
-    proto.write_pkt_line(
-        COMMAND_WANT + b" " + wants[0] + b" " + b" ".join(sorted(capabilities)) + b"\n"
-    )
+    wantcmd = COMMAND_WANT + b" " + wants[0]
+    if protocol_version != 2:
+        wantcmd += b" " + b" ".join(sorted(capabilities))
+    wantcmd += b"\n"
+    proto.write_pkt_line(wantcmd)
     for want in wants[1:]:
         proto.write_pkt_line(COMMAND_WANT + b" " + want + b"\n")
     if depth not in (0, None) or getattr(graph_walker, "shallow", None):
-        if CAPABILITY_SHALLOW not in capabilities:
+        if protocol_version == 2:
+            if not find_capability(capabilities, CAPABILITY_FETCH, CAPABILITY_SHALLOW):
+                raise GitProtocolError(
+                    "server does not support shallow capability required for " "depth"
+                )
+        elif CAPABILITY_SHALLOW not in capabilities:
             raise GitProtocolError(
                 "server does not support shallow capability required for " "depth"
             )
@@ -527,7 +583,10 @@ def _handle_upload_pack_head(proto, capabilities, grap
             proto.write_pkt_line(
                 COMMAND_DEEPEN + b" " + str(depth).encode("ascii") + b"\n"
             )
-    proto.write_pkt_line(None)
+        if protocol_version == 2:
+            proto.write_pkt_line(None)
+    if protocol_version != 2:
+        proto.write_pkt_line(None)
 
     if depth not in (0, None):
         if can_read is not None:
@@ -555,6 +614,8 @@ def _handle_upload_pack_head(proto, capabilities, grap
                     )
         have = next(graph_walker)
     proto.write_pkt_line(COMMAND_DONE + b"\n")
+    if protocol_version == 2:
+        proto.write_pkt_line(None)
     return (new_shallow, new_unshallow)
 
 
@@ -565,6 +626,7 @@ def _handle_upload_pack_tail(
     pack_data: Callable[[bytes], None],
     progress: Optional[Callable[[bytes], None]] = None,
     rbufsize=_RBUFSIZE,
+    protocol_version=0,
 ):
     """Handle the tail of a 'git-upload-pack' request.
 
@@ -579,18 +641,21 @@ def _handle_upload_pack_tail(
     pkt = proto.read_pkt_line()
     while pkt:
         parts = pkt.rstrip(b"\n").split(b" ")
-        if parts[0] == b"ACK":
-            graph_walker.ack(parts[1])
-        if parts[0] == b"NAK":
-            graph_walker.nak()
-        if len(parts) < 3 or parts[2] not in (
-            b"ready",
-            b"continue",
-            b"common",
-        ):
+        if protocol_version == 2 and parts[0] != "packfile":
             break
+        else:
+            if parts[0] == b"ACK":
+                graph_walker.ack(parts[1])
+            if parts[0] == b"NAK":
+                graph_walker.nak()
+            if len(parts) < 3 or parts[2] not in (
+                b"ready",
+                b"continue",
+                b"common",
+            ):
+                break
         pkt = proto.read_pkt_line()
-    if CAPABILITY_SIDE_BAND_64K in capabilities:
+    if CAPABILITY_SIDE_BAND_64K in capabilities or protocol_version == 2:
         if progress is None:
             # Just ignore progress data
 
@@ -624,6 +689,7 @@ class GitClient:
         report_activity=None,
         quiet=False,
         include_tags=False,
+        **kwargs,
     ) -> None:
         """Create a new GitClient instance.
 
@@ -646,6 +712,7 @@ class GitClient:
             self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
         if include_tags:
             self._fetch_capabilities.add(CAPABILITY_INCLUDE_TAG)
+        self.protocol_version = 0  # our default Git protocol version
 
     def get_url(self, path):
         """Retrieves full url to given path.
@@ -711,6 +778,8 @@ class GitClient:
         branch=None,
         progress=None,
         depth=None,
+        ref_prefix=[],
+        filter_spec=None,
     ) -> Repo:
         """Clone a repository."""
         from .refs import _set_default_branch, _set_head, _set_origin_head
@@ -749,7 +818,14 @@ class GitClient:
                 target_config.write_to_path()
 
             ref_message = b"clone: from " + encoded_path
-            result = self.fetch(path, target, progress=progress, depth=depth)
+            result = self.fetch(
+                path,
+                target,
+                progress=progress,
+                depth=depth,
+                ref_prefix=ref_prefix,
+                filter_spec=filter_spec,
+            )
             if origin is not None:
                 _import_remote_refs(
                     target.refs, origin, result.refs, message=ref_message
@@ -798,6 +874,8 @@ class GitClient:
         ] = None,
         progress: Optional[Callable[[bytes], None]] = None,
         depth: Optional[int] = None,
+        ref_prefix: Optional[List[bytes]] = [],
+        filter_spec: Optional[bytes] = None,
     ) -> FetchPackResult:
         """Fetch into a target repository.
 
@@ -809,6 +887,15 @@ class GitClient:
             list of shas to fetch. Defaults to all shas.
           progress: Optional progress function
           depth: Depth to fetch at
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
         Returns:
           Dictionary with all remote refs (not just those fetched)
@@ -844,6 +931,8 @@ class GitClient:
                 f.write,
                 progress=progress,
                 depth=depth,
+                ref_prefix=ref_prefix,
+                filter_spec=filter_spec,
             )
         except BaseException:
             abort()
@@ -862,6 +951,8 @@ class GitClient:
         *,
         progress: Optional[Callable[[bytes], None]] = None,
         depth: Optional[int] = None,
+        ref_prefix=[],
+        filter_spec=None,
     ):
         """Retrieve a pack from a git smart server.
 
@@ -874,6 +965,15 @@ class GitClient:
           pack_data: Callback called for each bit of data in the pack
           progress: Callback for progress reports (strings)
           depth: Shallow fetch depth
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
         Returns:
           FetchPackResult object
@@ -923,7 +1023,7 @@ class GitClient:
             error message if the ref failed to update
             None if it was updated successfully
         """
-        if CAPABILITY_SIDE_BAND_64K in capabilities:
+        if CAPABILITY_SIDE_BAND_64K in capabilities or self.protocol_version == 2:
             if progress is None:
 
                 def progress(x):
@@ -955,6 +1055,7 @@ class GitClient:
         # TODO(jelmer): warn about unknown capabilities
         symrefs = {}
         agent = None
+        fetch_capa = None
         for capability in server_capabilities:
             k, v = parse_capability(capability)
             if k == CAPABILITY_SYMREF:
@@ -962,8 +1063,24 @@ class GitClient:
                 symrefs[src] = dst
             if k == CAPABILITY_AGENT:
                 agent = v
+            if self.protocol_version == 2 and k == CAPABILITY_FETCH:
+                fetch_capa = CAPABILITY_FETCH
+                fetch_features = []
+                v = v.strip()
+                if b"shallow" in v.split(b" "):
+                    fetch_features.append(CAPABILITY_SHALLOW)
+                if b"filter" in v.split(b" "):
+                    fetch_features.append(CAPABILITY_FILTER)
+                for i in range(len(fetch_features)):
+                    if i == 0:
+                        fetch_capa += b"="
+                    else:
+                        fetch_capa += b" "
+                    fetch_capa += fetch_features[i]
 
         negotiated_capabilities = self._fetch_capabilities & server_capabilities
+        if fetch_capa:
+            negotiated_capabilities.add(fetch_capa)
         return (negotiated_capabilities, symrefs, agent)
 
     def archive(
@@ -1048,6 +1165,7 @@ class TraditionalGitClient(GitClient):
           SendPackError: if server rejects the pack data
 
         """
+        self.protocol_version = 0
         proto, unused_can_read, stderr = self._connect(b"receive-pack", path)
         with proto:
             try:
@@ -1130,6 +1248,8 @@ class TraditionalGitClient(GitClient):
         pack_data,
         progress=None,
         depth=None,
+        ref_prefix=[],
+        filter_spec=None,
     ):
         """Retrieve a pack from a git smart server.
 
@@ -1142,15 +1262,29 @@ class TraditionalGitClient(GitClient):
           pack_data: Callback called for each bit of data in the pack
           progress: Callback for progress reports (strings)
           depth: Shallow fetch depth
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
         Returns:
           FetchPackResult object
 
         """
         proto, can_read, stderr = self._connect(b"upload-pack", path)
+        self.protocol_version = negotiate_protocol_version(proto)
         with proto:
             try:
-                refs, server_capabilities = read_pkt_refs(proto.read_pkt_seq())
+                if self.protocol_version == 2:
+                    server_capabilities = read_server_capabilities(proto.read_pkt_seq())
+                    refs = None
+                else:
+                    refs, server_capabilities = read_pkt_refs(proto.read_pkt_seq())
             except HangupException as exc:
                 raise _remote_error_from_stderr(stderr) from exc
             (
@@ -1158,6 +1292,17 @@ class TraditionalGitClient(GitClient):
                 symrefs,
                 agent,
             ) = self._negotiate_upload_pack_capabilities(server_capabilities)
+
+            if self.protocol_version == 2:
+                proto.write_pkt_line(b"command=ls-refs\n")
+                proto.write(b"0001")  # delim-pkt
+                proto.write_pkt_line(b"symrefs")
+                for prefix in ref_prefix:
+                    proto.write_pkt_line(b"ref-prefix " + prefix)
+                proto.write_pkt_line(None)
+                refs, server_capabilities = read_pkt_refs(
+                    proto.read_pkt_seq(), server_capabilities
+                )
 
             if refs is None:
                 proto.write_pkt_line(None)
@@ -1176,6 +1321,16 @@ class TraditionalGitClient(GitClient):
             if not wants:
                 proto.write_pkt_line(None)
                 return FetchPackResult(refs, symrefs, agent)
+            if self.protocol_version == 2:
+                proto.write_pkt_line(b"command=fetch\n")
+                proto.write(b"0001")  # delim-pkt
+                if (
+                    find_capability(
+                        negotiated_capabilities, CAPABILITY_FETCH, CAPABILITY_FILTER
+                    )
+                    and filter_spec
+                ):
+                    proto.write(pkt_line(b"filter %s\n" % filter_spec))
             (new_shallow, new_unshallow) = _handle_upload_pack_head(
                 proto,
                 negotiated_capabilities,
@@ -1183,6 +1338,7 @@ class TraditionalGitClient(GitClient):
                 wants,
                 can_read,
                 depth=depth,
+                protocol_version=self.protocol_version,
             )
             _handle_upload_pack_tail(
                 proto,
@@ -1190,6 +1346,7 @@ class TraditionalGitClient(GitClient):
                 graph_walker,
                 pack_data,
                 progress,
+                protocol_version=self.protocol_version,
             )
             return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
 
@@ -1197,9 +1354,20 @@ class TraditionalGitClient(GitClient):
         """Retrieve the current refs from a git smart server."""
         # stock `git ls-remote` uses upload-pack
         proto, _, stderr = self._connect(b"upload-pack", path)
+        self.protocol_version = negotiate_protocol_version(proto)
+        if self.protocol_version == 2:
+            server_capabilities = read_server_capabilities(proto.read_pkt_seq())
+            proto.write_pkt_line(b"command=ls-refs\n")
+            proto.write(b"0001")  # delim-pkt
+            proto.write_pkt_line(b"symrefs")
+            proto.write_pkt_line(None)
+        else:
+            server_capabilities = None  # read_pkt_refs will find them
         with proto:
             try:
-                refs, _ = read_pkt_refs(proto.read_pkt_seq())
+                refs, server_capabilities = read_pkt_refs(
+                    proto.read_pkt_seq(), server_capabilities
+                )
             except HangupException as exc:
                 raise _remote_error_from_stderr(stderr) from exc
             proto.write_pkt_line(None)
@@ -1314,8 +1482,20 @@ class TCPGitClient(TraditionalGitClient):
         )
         if path.startswith(b"/~"):
             path = path[1:]
+        if cmd == b"upload-pack":
+            self.protocol_version = 2
+            # Git protocol version advertisement is hidden behind two NUL bytes
+            # for compatibility with older Git server implementations, which
+            # would crash if something other than a "host=" header was found
+            # after the first NUL byte.
+            version_str = b"\0\0version=%d\0" % self.protocol_version
+        else:
+            version_str = b""
+            self.protocol_version = 0
         # TODO(jelmer): Alternative to ascii?
-        proto.send_cmd(b"git-" + cmd, path, b"host=" + self._host.encode("ascii"))
+        proto.send_cmd(
+            b"git-" + cmd, path, b"host=" + self._host.encode("ascii") + version_str
+        )
         return proto, lambda: _fileno_can_read(s), None
 
 
@@ -1492,7 +1672,16 @@ class LocalGitClient(GitClient):
 
         return SendPackResult(new_refs, ref_status=ref_status)
 
-    def fetch(self, path, target, determine_wants=None, progress=None, depth=None):
+    def fetch(
+        self,
+        path,
+        target,
+        determine_wants=None,
+        progress=None,
+        depth=None,
+        ref_prefix=[],
+        filter_spec=None,
+    ):
         """Fetch into a target repository.
 
         Args:
@@ -1503,6 +1692,15 @@ class LocalGitClient(GitClient):
             list of shas to fetch. Defaults to all shas.
           progress: Optional progress function
           depth: Shallow fetch depth
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
         Returns:
           FetchPackResult object
@@ -1525,6 +1723,8 @@ class LocalGitClient(GitClient):
         pack_data,
         progress=None,
         depth=None,
+        ref_prefix: Optional[List[bytes]] = [],
+        filter_spec: Optional[bytes] = None,
     ) -> FetchPackResult:
         """Retrieve a pack from a local on-disk repository.
 
@@ -1537,6 +1737,15 @@ class LocalGitClient(GitClient):
           pack_data: Callback called for each bit of data in the pack
           progress: Callback for progress reports (strings)
           depth: Shallow fetch depth
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
         Returns:
           FetchPackResult object
@@ -2009,6 +2218,16 @@ class AbstractHttpGitClient(GitClient):
         headers = {"Accept": "*/*"}
         if self.dumb is not True:
             tail += "?service={}".format(service.decode("ascii"))
+            # Enable protocol v2 only when fetching, not when pushing.
+            # Git does not yet implement push over protocol v2, and as of
+            # git version 2.37.3 git-http-backend's behaviour is erratic if
+            # we try: It responds with a Git-protocol-v1-style ref listing
+            # which lacks the "001f# service=git-receive-pack" marker.
+            if service == b"git-upload-pack":
+                self.protocol_version = 2
+                headers["Git-Protocol"] = "version=2"
+            else:
+                self.protocol_version = 0
         url = urljoin(base_url, tail)
         resp, read = self._http_request(url, headers)
 
@@ -2025,20 +2244,50 @@ class AbstractHttpGitClient(GitClient):
                 "application/x-git-"
             )
             if not self.dumb:
-                proto = Protocol(read, None)
-                # The first line should mention the service
-                try:
-                    [pkt] = list(proto.read_pkt_seq())
-                except ValueError as exc:
-                    raise GitProtocolError(
-                        "unexpected number of packets received"
-                    ) from exc
-                if pkt.rstrip(b"\n") != (b"# service=" + service):
-                    raise GitProtocolError(
-                        f"unexpected first line {pkt!r} from smart server"
+
+                def begin_protocol_v2(proto):
+                    server_capabilities = read_server_capabilities(proto.read_pkt_seq())
+                    resp, read = self._smart_request(
+                        service.decode("ascii"),
+                        base_url,
+                        pkt_line(b"command=ls-refs\n")
+                        + b"0001"
+                        + pkt_line(b"symrefs")
+                        + b"0000",
                     )
-                return (*read_pkt_refs(proto.read_pkt_seq()), base_url)
+                    proto = Protocol(read, None)
+                    return server_capabilities, resp, read, proto
+
+                proto = Protocol(read, None)
+                self.protocol_version = negotiate_protocol_version(proto)
+                if self.protocol_version == 2:
+                    server_capabilities, resp, read, proto = begin_protocol_v2(proto)
+                else:
+                    server_capabilities = None  # read_pkt_refs will find them
+                    try:
+                        [pkt] = list(proto.read_pkt_seq())
+                    except ValueError as exc:
+                        raise GitProtocolError(
+                            "unexpected number of packets received"
+                        ) from exc
+                    if pkt.rstrip(b"\n") != (b"# service=" + service):
+                        raise GitProtocolError(
+                            f"unexpected first line {pkt!r} from smart server"
+                        )
+                    # Github sends "version 2" after sending the service name.
+                    # Try to negotiate protocol version 2 again.
+                    self.protocol_version = negotiate_protocol_version(proto)
+                    if self.protocol_version == 2:
+                        server_capabilities, resp, read, proto = begin_protocol_v2(
+                            proto
+                        )
+                (
+                    refs,
+                    server_capabilities,
+                ) = read_pkt_refs(proto.read_pkt_seq(), server_capabilities)
+                return refs, server_capabilities, base_url
             else:
+                self.protocol_version = 0
                 return read_info_refs(resp), set(), base_url
         finally:
             resp.close()
@@ -2056,6 +2305,8 @@ class AbstractHttpGitClient(GitClient):
             "Content-Type": f"application/x-{service}-request",
             "Accept": result_content_type,
         }
+        if self.protocol_version == 2:
+            headers["Git-Protocol"] = "version=2"
         if isinstance(data, bytes):
             headers["Content-Length"] = str(len(data))
         resp, read = self._http_request(url, headers, data)
@@ -2138,6 +2389,8 @@ class AbstractHttpGitClient(GitClient):
         pack_data,
         progress=None,
         depth=None,
+        ref_prefix=[],
+        filter_spec=None,
     ):
         """Retrieve a pack from a git smart server.
 
@@ -2148,6 +2401,15 @@ class AbstractHttpGitClient(GitClient):
           pack_data: Callback called for each bit of data in the pack
           progress: Callback for progress reports (strings)
           depth: Depth for request
+          ref_prefix: Prefix of desired references, as a list of bytestrings.
+            The server will limit the list of references sent to this prefix,
+            provided this feature is supported and sufficient server-side
+            resources are available to match all references against the prefix.
+            Clients must be prepared to filter out any non-requested references
+            themselves. This feature is an entirely optional optimization.
+          filter_spec: A git-rev-list-style object filter spec, as bytestring.
+            Only used if the server supports the Git protocol-v2 'filter'
+            feature, and ignored otherwise.
 
         Returns:
           FetchPackResult object
@@ -2181,10 +2443,21 @@ class AbstractHttpGitClient(GitClient):
             wants,
             can_read=None,
             depth=depth,
+            protocol_version=self.protocol_version,
         )
-        resp, read = self._smart_request(
-            "git-upload-pack", url, data=req_data.getvalue()
-        )
+        if self.protocol_version == 2:
+            data = pkt_line(b"command=fetch\n") + b"0001"
+            if (
+                find_capability(
+                    negotiated_capabilities, CAPABILITY_FETCH, CAPABILITY_FILTER
+                )
+                and filter_spec
+            ):
+                data += pkt_line(b"filter %s\n" % filter_spec)
+            data += req_data.getvalue()
+        else:
+            data = req_data.getvalue()
+        resp, read = self._smart_request("git-upload-pack", url, data)
         try:
             resp_proto = Protocol(read, None)
             if new_shallow is None and new_unshallow is None:
@@ -2197,6 +2470,7 @@ class AbstractHttpGitClient(GitClient):
                 graph_walker,
                 pack_data,
                 progress,
+                protocol_version=self.protocol_version,
             )
             return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow)
         finally:
blob - b2d92283d8a7884ff4e61db2a6a8f4c6a5b0a56d
blob + c803373375d6ae2badb37ecc784255a7c3992512
--- dulwich/porcelain.py
+++ dulwich/porcelain.py
@@ -484,8 +484,28 @@ def init(path=".", *, bare=False, symlinks: Optional[b
         return Repo.init_bare(path)
     else:
         return Repo.init(path, symlinks=symlinks)
+
+
+def encode_refspecs(refspecs, refspec_encoding):
+    if refspecs is None:
+        return [b"HEAD"]
+
+    def encode_refspec(ref):
+        if isinstance(ref, bytes):
+            return ref
+        else:
+            return ref.encode(refspec_encoding)
+
+    encoded_refs = []
+    if isinstance(refspecs, bytes) or isinstance(refspecs, str):
+        encoded_refs.append(encode_refspec(refspecs))
+    else:
+        for ref in refspecs:
+            encoded_refs.append(encode_refspec(ref))
 
+    return encoded_refs
 
+
 def clone(
     source,
     target=None,
@@ -497,6 +517,9 @@ def clone(
     depth: Optional[int] = None,
     branch: Optional[Union[str, bytes]] = None,
     config: Optional[Config] = None,
+    refspecs=None,
+    refspec_encoding=DEFAULT_ENCODING,
+    filter_spec=None,
     **kwargs,
 ):
     """Clone a local or remote git repository.
@@ -513,6 +536,13 @@ def clone(
       branch: Optional branch or tag to be used as HEAD in the new repository
         instead of the cloned repository's HEAD.
       config: Configuration to use
+      refspecs: refspecs to fetch. Can be a bytestring, a string, or a list of
+        bytestring/string.
+      refspec_encoding: Character encoding of bytestrings provided in the refspecs parameter.
+        If not specified, the internal default encoding will be used.
+      filter_spec: A git-rev-list-style object filter spec, as an ASCII string.
+        Only used if the server supports the Git protocol-v2 'filter'
+        feature, and ignored otherwise.
     Returns: The new repository
     """
     if outstream is not None:
@@ -533,6 +563,8 @@ def clone(
     if checkout and bare:
         raise Error("checkout and bare are incompatible")
 
+    encoded_refs = encode_refspecs(refspecs, refspec_encoding)
+
     if target is None:
         target = source.split("/")[-1]
 
@@ -542,6 +574,9 @@ def clone(
     mkdir = not os.path.exists(target)
 
     (client, path) = get_transport_and_path(source, config=config, **kwargs)
+
+    if filter_spec:
+        filter_spec = filter_spec.encode("ascii")
 
     return client.clone(
         path,
@@ -553,6 +588,8 @@ def clone(
         branch=branch,
         progress=errstream.write,
         depth=depth,
+        ref_prefix=encoded_refs,
+        filter_spec=filter_spec,
     )
 
 
@@ -1238,6 +1275,8 @@ def pull(
     errstream=default_bytes_err_stream,
     fast_forward=True,
     force=False,
+    refspec_encoding=DEFAULT_ENCODING,
+    filter_spec=None,
     **kwargs,
 ):
     """Pull from remote via dulwich.client.
@@ -1245,21 +1284,26 @@ def pull(
     Args:
       repo: Path to repository
       remote_location: Location of the remote
-      refspecs: refspecs to fetch
+      refspecs: refspecs to fetch. Can be a bytestring, a string, or a list of
+        bytestring/string.
       outstream: A stream file to write to output
       errstream: A stream file to write to errors
+      refspec_encoding: Character encoding of bytestrings provided in the refspecs parameter.
+        If not specified, the internal default encoding will be used.
+      filter_spec: A git-rev-list-style object filter spec, as an ASCII string.
+        Only used if the server supports the Git protocol-v2 'filter'
+        feature, and ignored otherwise.
     """
     # Open the repo
     with open_repo_closing(repo) as r:
         (remote_name, remote_location) = get_remote_repo(r, remote_location)
 
-        if refspecs is None:
-            refspecs = [b"HEAD"]
+        encoded_refs = encode_refspecs(refspecs, refspec_encoding)
         selected_refs = []
 
         def determine_wants(remote_refs, **kwargs):
             selected_refs.extend(
-                parse_reftuples(remote_refs, r.refs, refspecs, force=force)
+                parse_reftuples(remote_refs, r.refs, encoded_refs, force=force)
             )
             return [
                 remote_refs[lh]
@@ -1270,8 +1314,15 @@ def pull(
         client, path = get_transport_and_path(
             remote_location, config=r.get_config_stack(), **kwargs
         )
+        if filter_spec:
+            filter_spec = filter_spec.encode("ascii")
         fetch_result = client.fetch(
-            path, r, progress=errstream.write, determine_wants=determine_wants
+            path,
+            r,
+            progress=errstream.write,
+            determine_wants=determine_wants,
+            ref_prefix=refspecs,
+            filter_spec=filter_spec,
         )
         for lh, rh, force_ref in selected_refs:
             if not force_ref and rh in r.refs:
blob - de2618779071a60d78270ed38abfb4f5b95e5025
blob + 3d25e6798867b28a1a15ccd3e785ab72231fd3f8
--- dulwich/protocol.py
+++ dulwich/protocol.py
@@ -64,6 +64,8 @@ CAPABILITY_AGENT = b"agent"
 CAPABILITY_SYMREF = b"symref"
 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want"
 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want"
+CAPABILITY_FETCH = b"fetch"
+CAPABILITY_FILTER = b"filter"
 
 # Magic ref that is used to attach capabilities to when
 # there are no refs. Should always be ste to ZERO_SHA.
@@ -90,6 +92,7 @@ KNOWN_UPLOAD_CAPABILITIES = set(
         CAPABILITY_DEEPEN_RELATIVE,
         CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
         CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
+        CAPABILITY_FETCH,
     ]
 )
 KNOWN_RECEIVE_CAPABILITIES = set(
blob - 6202cbdff5d845e8a52124c1c928514a8f8c3c83
blob + 7763d62967420378b4d71d935f47a3c781a01656
--- tests/compat/test_client.py
+++ tests/compat/test_client.py
@@ -570,6 +570,9 @@ class GitHTTPRequestHandler(http.server.SimpleHTTPRequ
         co = self.headers.get("cookie")
         if co:
             env["HTTP_COOKIE"] = co
+        proto = self.headers.get("Git-Protocol")
+        if proto:
+            env["GIT_PROTOCOL"] = proto
         # XXX Other HTTP_* headers
         # Since we're setting the env in the parent, provide empty
         # values to override previously set values