commit 2b975ec7e49caf98c1c4d0b1eea848d0dd86ae77 from: Stefan Sperling date: Tue Jun 25 08:54:00 2024 UTC add initial support for Git protocol v2 Implement Git protocol version negotiation and use Git protocol v2 for fetches if supported. For now, the observable behaviour of Dulwich is equivalent regardless of protocol version, except that two new features may be used if the server supports Git protocol v2. The first feature is a reference prefix list which may be used to filter refs server-side. This can be used to reduce the size of the initial reference listing announced by the server. Reducing the size of this announcement was a major design goal for protocol v2 to avoid sending of very large announcements when a repository contains a lot of references. This feature is intended as an optimization which servers are free to ignore depending on available server-side resources. Therefore, users of Dulwich should still be prepared to filter redundant refs manually (this limitation also applies to Git itself). A new --refspec porcelain option is provided in order to test this feature on the command line. The second feature is an object filter specification, which corresponds to the --filter option of 'git clone'. This can be used to omit objects while cloning repositories. For instance, the following command will clone a given repsitory without fetching any blob objects: dulwich clone --filter blob:none --bare REPO_URL (In this example the --bare option is used because creation of a work tree would fail without any blobs present.) The test suite now enables protocol v2 and keeps passing for me. commit - 436ce9f94137e46d527d8260fe79242e1c82e00b commit + 2b975ec7e49caf98c1c4d0b1eea848d0dd86ae77 blob - e856df4552148033332dc79b9768de0c5180d9ec (mode 644) blob + 6234adba0fb57a7b214b8a02c98cd9baf6e8fc0d (mode 755) --- dulwich/cli.py +++ dulwich/cli.py @@ -262,6 +262,19 @@ class cmd_clone(Command): dest="branch", type=str, help=("Check out branch instead of branch pointed to by remote " "HEAD"), + ) + parser.add_option( + "--refspec", + dest="refspec", + type=str, + help="References to fetch", + action="append", + ) + parser.add_option( + "--filter", + dest="filter_spec", + type=str, + help="git-rev-list-style object filter", ) options, args = parser.parse_args(args) @@ -282,6 +295,8 @@ class cmd_clone(Command): bare=options.bare, depth=options.depth, branch=options.branch, + refspec=options.refspec, + filter_spec=options.filter_spec, ) except GitProtocolError as e: print(f"{e}") @@ -586,13 +601,17 @@ class cmd_pack_objects(Command): class cmd_pull(Command): def run(self, args): - parser = optparse.OptionParser() - options, args = parser.parse_args(args) - try: - from_location = args[0] - except IndexError: - from_location = None - porcelain.pull(".", from_location) + parser = argparse.ArgumentParser() + parser.add_argument("--from_location", type=str) + parser.add_argument("--refspec", type=str, nargs="*") + parser.add_argument("--filter", type=str, nargs=1) + args = parser.parse_args(args) + porcelain.pull( + ".", + args.from_location or None, + args.refspec or None, + filter_spec=args.filter, + ) class cmd_push(Command): blob - 16f4d3bd89e5503b141748fd01047c7ef3d5560e blob + 02d2acd706a90f271fe2780d6b328ec51f5fc700 --- dulwich/client.py +++ dulwich/client.py @@ -82,6 +82,8 @@ from .protocol import ( CAPABILITIES_REF, CAPABILITY_AGENT, CAPABILITY_DELETE_REFS, + CAPABILITY_FETCH, + CAPABILITY_FILTER, CAPABILITY_INCLUDE_TAG, CAPABILITY_MULTI_ACK, CAPABILITY_MULTI_ACK_DETAILED, @@ -237,8 +239,22 @@ class ReportStatusParser: self._ref_statuses.append(ref_status) -def read_pkt_refs(pkt_seq): - server_capabilities = None +def negotiate_protocol_version(proto): + pkt = proto.read_pkt_line() + if pkt == b"version 2\n": + return 2 + proto.unread_pkt_line(pkt) + return 0 + + +def read_server_capabilities(pkt_seq): + server_capabilities = [] + for pkt in pkt_seq: + server_capabilities.append(pkt) + return set(server_capabilities) + + +def read_pkt_refs(pkt_seq, server_capabilities=None): refs = {} # Receive refs from server for pkt in pkt_seq: @@ -247,6 +263,18 @@ def read_pkt_refs(pkt_seq): raise GitProtocolError(ref.decode("utf-8", "replace")) if server_capabilities is None: (ref, server_capabilities) = extract_capabilities(ref) + else: # Git protocol-v2: + try: + symref, target = ref.split(b" ", 1) + except ValueError: + pass + else: + if symref and target and target[:14] == b"symref-target:": + server_capabilities.add( + b"%s=%s:%s" + % (CAPABILITY_SYMREF, symref, target.split(b":", 1)[1]) + ) + ref = symref refs[ref] = sha if len(refs) == 0: @@ -413,6 +441,8 @@ def _read_shallow_updates(pkt_seq): new_shallow = set() new_unshallow = set() for pkt in pkt_seq: + if pkt == b"shallow-info\n": # Git-protocol v2 + continue try: cmd, sha = pkt.split(b" ", 1) except ValueError: @@ -498,7 +528,25 @@ def _read_side_band64k_data(pkt_seq: Iterable[bytes]) yield channel, pkt[1:] -def _handle_upload_pack_head(proto, capabilities, graph_walker, wants, can_read, depth): +def find_capability(capabilities, key, value): + for capability in capabilities: + k, v = parse_capability(capability) + if k != key: + continue + if value and value not in v.split(b" "): + continue + return capability + + +def _handle_upload_pack_head( + proto, + capabilities, + graph_walker, + wants, + can_read, + depth, + protocol_version=0, +): """Handle the head of a 'git-upload-pack' request. Args: @@ -509,15 +557,23 @@ def _handle_upload_pack_head(proto, capabilities, grap can_read: function that returns a boolean that indicates whether there is extra graph data to read on proto depth: Depth for request + protocol_version: desired Git protocol version; defaults to v0 """ assert isinstance(wants, list) and isinstance(wants[0], bytes) - proto.write_pkt_line( - COMMAND_WANT + b" " + wants[0] + b" " + b" ".join(sorted(capabilities)) + b"\n" - ) + wantcmd = COMMAND_WANT + b" " + wants[0] + if protocol_version != 2: + wantcmd += b" " + b" ".join(sorted(capabilities)) + wantcmd += b"\n" + proto.write_pkt_line(wantcmd) for want in wants[1:]: proto.write_pkt_line(COMMAND_WANT + b" " + want + b"\n") if depth not in (0, None) or getattr(graph_walker, "shallow", None): - if CAPABILITY_SHALLOW not in capabilities: + if protocol_version == 2: + if not find_capability(capabilities, CAPABILITY_FETCH, CAPABILITY_SHALLOW): + raise GitProtocolError( + "server does not support shallow capability required for " "depth" + ) + elif CAPABILITY_SHALLOW not in capabilities: raise GitProtocolError( "server does not support shallow capability required for " "depth" ) @@ -527,7 +583,10 @@ def _handle_upload_pack_head(proto, capabilities, grap proto.write_pkt_line( COMMAND_DEEPEN + b" " + str(depth).encode("ascii") + b"\n" ) - proto.write_pkt_line(None) + if protocol_version == 2: + proto.write_pkt_line(None) + if protocol_version != 2: + proto.write_pkt_line(None) if depth not in (0, None): if can_read is not None: @@ -555,6 +614,8 @@ def _handle_upload_pack_head(proto, capabilities, grap ) have = next(graph_walker) proto.write_pkt_line(COMMAND_DONE + b"\n") + if protocol_version == 2: + proto.write_pkt_line(None) return (new_shallow, new_unshallow) @@ -565,6 +626,7 @@ def _handle_upload_pack_tail( pack_data: Callable[[bytes], None], progress: Optional[Callable[[bytes], None]] = None, rbufsize=_RBUFSIZE, + protocol_version=0, ): """Handle the tail of a 'git-upload-pack' request. @@ -579,18 +641,21 @@ def _handle_upload_pack_tail( pkt = proto.read_pkt_line() while pkt: parts = pkt.rstrip(b"\n").split(b" ") - if parts[0] == b"ACK": - graph_walker.ack(parts[1]) - if parts[0] == b"NAK": - graph_walker.nak() - if len(parts) < 3 or parts[2] not in ( - b"ready", - b"continue", - b"common", - ): + if protocol_version == 2 and parts[0] != "packfile": break + else: + if parts[0] == b"ACK": + graph_walker.ack(parts[1]) + if parts[0] == b"NAK": + graph_walker.nak() + if len(parts) < 3 or parts[2] not in ( + b"ready", + b"continue", + b"common", + ): + break pkt = proto.read_pkt_line() - if CAPABILITY_SIDE_BAND_64K in capabilities: + if CAPABILITY_SIDE_BAND_64K in capabilities or protocol_version == 2: if progress is None: # Just ignore progress data @@ -624,6 +689,7 @@ class GitClient: report_activity=None, quiet=False, include_tags=False, + **kwargs, ) -> None: """Create a new GitClient instance. @@ -646,6 +712,7 @@ class GitClient: self._fetch_capabilities.remove(CAPABILITY_THIN_PACK) if include_tags: self._fetch_capabilities.add(CAPABILITY_INCLUDE_TAG) + self.protocol_version = 0 # our default Git protocol version def get_url(self, path): """Retrieves full url to given path. @@ -711,6 +778,8 @@ class GitClient: branch=None, progress=None, depth=None, + ref_prefix=[], + filter_spec=None, ) -> Repo: """Clone a repository.""" from .refs import _set_default_branch, _set_head, _set_origin_head @@ -749,7 +818,14 @@ class GitClient: target_config.write_to_path() ref_message = b"clone: from " + encoded_path - result = self.fetch(path, target, progress=progress, depth=depth) + result = self.fetch( + path, + target, + progress=progress, + depth=depth, + ref_prefix=ref_prefix, + filter_spec=filter_spec, + ) if origin is not None: _import_remote_refs( target.refs, origin, result.refs, message=ref_message @@ -798,6 +874,8 @@ class GitClient: ] = None, progress: Optional[Callable[[bytes], None]] = None, depth: Optional[int] = None, + ref_prefix: Optional[List[bytes]] = [], + filter_spec: Optional[bytes] = None, ) -> FetchPackResult: """Fetch into a target repository. @@ -809,6 +887,15 @@ class GitClient: list of shas to fetch. Defaults to all shas. progress: Optional progress function depth: Depth to fetch at + ref_prefix: Prefix of desired references, as a list of bytestrings. + The server will limit the list of references sent to this prefix, + provided this feature is supported and sufficient server-side + resources are available to match all references against the prefix. + Clients must be prepared to filter out any non-requested references + themselves. This feature is an entirely optional optimization. + filter_spec: A git-rev-list-style object filter spec, as bytestring. + Only used if the server supports the Git protocol-v2 'filter' + feature, and ignored otherwise. Returns: Dictionary with all remote refs (not just those fetched) @@ -844,6 +931,8 @@ class GitClient: f.write, progress=progress, depth=depth, + ref_prefix=ref_prefix, + filter_spec=filter_spec, ) except BaseException: abort() @@ -862,6 +951,8 @@ class GitClient: *, progress: Optional[Callable[[bytes], None]] = None, depth: Optional[int] = None, + ref_prefix=[], + filter_spec=None, ): """Retrieve a pack from a git smart server. @@ -874,6 +965,15 @@ class GitClient: pack_data: Callback called for each bit of data in the pack progress: Callback for progress reports (strings) depth: Shallow fetch depth + ref_prefix: Prefix of desired references, as a list of bytestrings. + The server will limit the list of references sent to this prefix, + provided this feature is supported and sufficient server-side + resources are available to match all references against the prefix. + Clients must be prepared to filter out any non-requested references + themselves. This feature is an entirely optional optimization. + filter_spec: A git-rev-list-style object filter spec, as bytestring. + Only used if the server supports the Git protocol-v2 'filter' + feature, and ignored otherwise. Returns: FetchPackResult object @@ -923,7 +1023,7 @@ class GitClient: error message if the ref failed to update None if it was updated successfully """ - if CAPABILITY_SIDE_BAND_64K in capabilities: + if CAPABILITY_SIDE_BAND_64K in capabilities or self.protocol_version == 2: if progress is None: def progress(x): @@ -955,6 +1055,7 @@ class GitClient: # TODO(jelmer): warn about unknown capabilities symrefs = {} agent = None + fetch_capa = None for capability in server_capabilities: k, v = parse_capability(capability) if k == CAPABILITY_SYMREF: @@ -962,8 +1063,24 @@ class GitClient: symrefs[src] = dst if k == CAPABILITY_AGENT: agent = v + if self.protocol_version == 2 and k == CAPABILITY_FETCH: + fetch_capa = CAPABILITY_FETCH + fetch_features = [] + v = v.strip() + if b"shallow" in v.split(b" "): + fetch_features.append(CAPABILITY_SHALLOW) + if b"filter" in v.split(b" "): + fetch_features.append(CAPABILITY_FILTER) + for i in range(len(fetch_features)): + if i == 0: + fetch_capa += b"=" + else: + fetch_capa += b" " + fetch_capa += fetch_features[i] negotiated_capabilities = self._fetch_capabilities & server_capabilities + if fetch_capa: + negotiated_capabilities.add(fetch_capa) return (negotiated_capabilities, symrefs, agent) def archive( @@ -1048,6 +1165,7 @@ class TraditionalGitClient(GitClient): SendPackError: if server rejects the pack data """ + self.protocol_version = 0 proto, unused_can_read, stderr = self._connect(b"receive-pack", path) with proto: try: @@ -1130,6 +1248,8 @@ class TraditionalGitClient(GitClient): pack_data, progress=None, depth=None, + ref_prefix=[], + filter_spec=None, ): """Retrieve a pack from a git smart server. @@ -1142,15 +1262,29 @@ class TraditionalGitClient(GitClient): pack_data: Callback called for each bit of data in the pack progress: Callback for progress reports (strings) depth: Shallow fetch depth + ref_prefix: Prefix of desired references, as a list of bytestrings. + The server will limit the list of references sent to this prefix, + provided this feature is supported and sufficient server-side + resources are available to match all references against the prefix. + Clients must be prepared to filter out any non-requested references + themselves. This feature is an entirely optional optimization. + filter_spec: A git-rev-list-style object filter spec, as bytestring. + Only used if the server supports the Git protocol-v2 'filter' + feature, and ignored otherwise. Returns: FetchPackResult object """ proto, can_read, stderr = self._connect(b"upload-pack", path) + self.protocol_version = negotiate_protocol_version(proto) with proto: try: - refs, server_capabilities = read_pkt_refs(proto.read_pkt_seq()) + if self.protocol_version == 2: + server_capabilities = read_server_capabilities(proto.read_pkt_seq()) + refs = None + else: + refs, server_capabilities = read_pkt_refs(proto.read_pkt_seq()) except HangupException as exc: raise _remote_error_from_stderr(stderr) from exc ( @@ -1158,6 +1292,17 @@ class TraditionalGitClient(GitClient): symrefs, agent, ) = self._negotiate_upload_pack_capabilities(server_capabilities) + + if self.protocol_version == 2: + proto.write_pkt_line(b"command=ls-refs\n") + proto.write(b"0001") # delim-pkt + proto.write_pkt_line(b"symrefs") + for prefix in ref_prefix: + proto.write_pkt_line(b"ref-prefix " + prefix) + proto.write_pkt_line(None) + refs, server_capabilities = read_pkt_refs( + proto.read_pkt_seq(), server_capabilities + ) if refs is None: proto.write_pkt_line(None) @@ -1176,6 +1321,16 @@ class TraditionalGitClient(GitClient): if not wants: proto.write_pkt_line(None) return FetchPackResult(refs, symrefs, agent) + if self.protocol_version == 2: + proto.write_pkt_line(b"command=fetch\n") + proto.write(b"0001") # delim-pkt + if ( + find_capability( + negotiated_capabilities, CAPABILITY_FETCH, CAPABILITY_FILTER + ) + and filter_spec + ): + proto.write(pkt_line(b"filter %s\n" % filter_spec)) (new_shallow, new_unshallow) = _handle_upload_pack_head( proto, negotiated_capabilities, @@ -1183,6 +1338,7 @@ class TraditionalGitClient(GitClient): wants, can_read, depth=depth, + protocol_version=self.protocol_version, ) _handle_upload_pack_tail( proto, @@ -1190,6 +1346,7 @@ class TraditionalGitClient(GitClient): graph_walker, pack_data, progress, + protocol_version=self.protocol_version, ) return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow) @@ -1197,9 +1354,20 @@ class TraditionalGitClient(GitClient): """Retrieve the current refs from a git smart server.""" # stock `git ls-remote` uses upload-pack proto, _, stderr = self._connect(b"upload-pack", path) + self.protocol_version = negotiate_protocol_version(proto) + if self.protocol_version == 2: + server_capabilities = read_server_capabilities(proto.read_pkt_seq()) + proto.write_pkt_line(b"command=ls-refs\n") + proto.write(b"0001") # delim-pkt + proto.write_pkt_line(b"symrefs") + proto.write_pkt_line(None) + else: + server_capabilities = None # read_pkt_refs will find them with proto: try: - refs, _ = read_pkt_refs(proto.read_pkt_seq()) + refs, server_capabilities = read_pkt_refs( + proto.read_pkt_seq(), server_capabilities + ) except HangupException as exc: raise _remote_error_from_stderr(stderr) from exc proto.write_pkt_line(None) @@ -1314,8 +1482,20 @@ class TCPGitClient(TraditionalGitClient): ) if path.startswith(b"/~"): path = path[1:] + if cmd == b"upload-pack": + self.protocol_version = 2 + # Git protocol version advertisement is hidden behind two NUL bytes + # for compatibility with older Git server implementations, which + # would crash if something other than a "host=" header was found + # after the first NUL byte. + version_str = b"\0\0version=%d\0" % self.protocol_version + else: + version_str = b"" + self.protocol_version = 0 # TODO(jelmer): Alternative to ascii? - proto.send_cmd(b"git-" + cmd, path, b"host=" + self._host.encode("ascii")) + proto.send_cmd( + b"git-" + cmd, path, b"host=" + self._host.encode("ascii") + version_str + ) return proto, lambda: _fileno_can_read(s), None @@ -1492,7 +1672,16 @@ class LocalGitClient(GitClient): return SendPackResult(new_refs, ref_status=ref_status) - def fetch(self, path, target, determine_wants=None, progress=None, depth=None): + def fetch( + self, + path, + target, + determine_wants=None, + progress=None, + depth=None, + ref_prefix=[], + filter_spec=None, + ): """Fetch into a target repository. Args: @@ -1503,6 +1692,15 @@ class LocalGitClient(GitClient): list of shas to fetch. Defaults to all shas. progress: Optional progress function depth: Shallow fetch depth + ref_prefix: Prefix of desired references, as a list of bytestrings. + The server will limit the list of references sent to this prefix, + provided this feature is supported and sufficient server-side + resources are available to match all references against the prefix. + Clients must be prepared to filter out any non-requested references + themselves. This feature is an entirely optional optimization. + filter_spec: A git-rev-list-style object filter spec, as bytestring. + Only used if the server supports the Git protocol-v2 'filter' + feature, and ignored otherwise. Returns: FetchPackResult object @@ -1525,6 +1723,8 @@ class LocalGitClient(GitClient): pack_data, progress=None, depth=None, + ref_prefix: Optional[List[bytes]] = [], + filter_spec: Optional[bytes] = None, ) -> FetchPackResult: """Retrieve a pack from a local on-disk repository. @@ -1537,6 +1737,15 @@ class LocalGitClient(GitClient): pack_data: Callback called for each bit of data in the pack progress: Callback for progress reports (strings) depth: Shallow fetch depth + ref_prefix: Prefix of desired references, as a list of bytestrings. + The server will limit the list of references sent to this prefix, + provided this feature is supported and sufficient server-side + resources are available to match all references against the prefix. + Clients must be prepared to filter out any non-requested references + themselves. This feature is an entirely optional optimization. + filter_spec: A git-rev-list-style object filter spec, as bytestring. + Only used if the server supports the Git protocol-v2 'filter' + feature, and ignored otherwise. Returns: FetchPackResult object @@ -2009,6 +2218,16 @@ class AbstractHttpGitClient(GitClient): headers = {"Accept": "*/*"} if self.dumb is not True: tail += "?service={}".format(service.decode("ascii")) + # Enable protocol v2 only when fetching, not when pushing. + # Git does not yet implement push over protocol v2, and as of + # git version 2.37.3 git-http-backend's behaviour is erratic if + # we try: It responds with a Git-protocol-v1-style ref listing + # which lacks the "001f# service=git-receive-pack" marker. + if service == b"git-upload-pack": + self.protocol_version = 2 + headers["Git-Protocol"] = "version=2" + else: + self.protocol_version = 0 url = urljoin(base_url, tail) resp, read = self._http_request(url, headers) @@ -2025,20 +2244,50 @@ class AbstractHttpGitClient(GitClient): "application/x-git-" ) if not self.dumb: - proto = Protocol(read, None) - # The first line should mention the service - try: - [pkt] = list(proto.read_pkt_seq()) - except ValueError as exc: - raise GitProtocolError( - "unexpected number of packets received" - ) from exc - if pkt.rstrip(b"\n") != (b"# service=" + service): - raise GitProtocolError( - f"unexpected first line {pkt!r} from smart server" + + def begin_protocol_v2(proto): + server_capabilities = read_server_capabilities(proto.read_pkt_seq()) + resp, read = self._smart_request( + service.decode("ascii"), + base_url, + pkt_line(b"command=ls-refs\n") + + b"0001" + + pkt_line(b"symrefs") + + b"0000", ) - return (*read_pkt_refs(proto.read_pkt_seq()), base_url) + proto = Protocol(read, None) + return server_capabilities, resp, read, proto + + proto = Protocol(read, None) + self.protocol_version = negotiate_protocol_version(proto) + if self.protocol_version == 2: + server_capabilities, resp, read, proto = begin_protocol_v2(proto) + else: + server_capabilities = None # read_pkt_refs will find them + try: + [pkt] = list(proto.read_pkt_seq()) + except ValueError as exc: + raise GitProtocolError( + "unexpected number of packets received" + ) from exc + if pkt.rstrip(b"\n") != (b"# service=" + service): + raise GitProtocolError( + f"unexpected first line {pkt!r} from smart server" + ) + # Github sends "version 2" after sending the service name. + # Try to negotiate protocol version 2 again. + self.protocol_version = negotiate_protocol_version(proto) + if self.protocol_version == 2: + server_capabilities, resp, read, proto = begin_protocol_v2( + proto + ) + ( + refs, + server_capabilities, + ) = read_pkt_refs(proto.read_pkt_seq(), server_capabilities) + return refs, server_capabilities, base_url else: + self.protocol_version = 0 return read_info_refs(resp), set(), base_url finally: resp.close() @@ -2056,6 +2305,8 @@ class AbstractHttpGitClient(GitClient): "Content-Type": f"application/x-{service}-request", "Accept": result_content_type, } + if self.protocol_version == 2: + headers["Git-Protocol"] = "version=2" if isinstance(data, bytes): headers["Content-Length"] = str(len(data)) resp, read = self._http_request(url, headers, data) @@ -2138,6 +2389,8 @@ class AbstractHttpGitClient(GitClient): pack_data, progress=None, depth=None, + ref_prefix=[], + filter_spec=None, ): """Retrieve a pack from a git smart server. @@ -2148,6 +2401,15 @@ class AbstractHttpGitClient(GitClient): pack_data: Callback called for each bit of data in the pack progress: Callback for progress reports (strings) depth: Depth for request + ref_prefix: Prefix of desired references, as a list of bytestrings. + The server will limit the list of references sent to this prefix, + provided this feature is supported and sufficient server-side + resources are available to match all references against the prefix. + Clients must be prepared to filter out any non-requested references + themselves. This feature is an entirely optional optimization. + filter_spec: A git-rev-list-style object filter spec, as bytestring. + Only used if the server supports the Git protocol-v2 'filter' + feature, and ignored otherwise. Returns: FetchPackResult object @@ -2181,10 +2443,21 @@ class AbstractHttpGitClient(GitClient): wants, can_read=None, depth=depth, + protocol_version=self.protocol_version, ) - resp, read = self._smart_request( - "git-upload-pack", url, data=req_data.getvalue() - ) + if self.protocol_version == 2: + data = pkt_line(b"command=fetch\n") + b"0001" + if ( + find_capability( + negotiated_capabilities, CAPABILITY_FETCH, CAPABILITY_FILTER + ) + and filter_spec + ): + data += pkt_line(b"filter %s\n" % filter_spec) + data += req_data.getvalue() + else: + data = req_data.getvalue() + resp, read = self._smart_request("git-upload-pack", url, data) try: resp_proto = Protocol(read, None) if new_shallow is None and new_unshallow is None: @@ -2197,6 +2470,7 @@ class AbstractHttpGitClient(GitClient): graph_walker, pack_data, progress, + protocol_version=self.protocol_version, ) return FetchPackResult(refs, symrefs, agent, new_shallow, new_unshallow) finally: blob - b2d92283d8a7884ff4e61db2a6a8f4c6a5b0a56d blob + c803373375d6ae2badb37ecc784255a7c3992512 --- dulwich/porcelain.py +++ dulwich/porcelain.py @@ -484,8 +484,28 @@ def init(path=".", *, bare=False, symlinks: Optional[b return Repo.init_bare(path) else: return Repo.init(path, symlinks=symlinks) + + +def encode_refspecs(refspecs, refspec_encoding): + if refspecs is None: + return [b"HEAD"] + + def encode_refspec(ref): + if isinstance(ref, bytes): + return ref + else: + return ref.encode(refspec_encoding) + + encoded_refs = [] + if isinstance(refspecs, bytes) or isinstance(refspecs, str): + encoded_refs.append(encode_refspec(refspecs)) + else: + for ref in refspecs: + encoded_refs.append(encode_refspec(ref)) + return encoded_refs + def clone( source, target=None, @@ -497,6 +517,9 @@ def clone( depth: Optional[int] = None, branch: Optional[Union[str, bytes]] = None, config: Optional[Config] = None, + refspecs=None, + refspec_encoding=DEFAULT_ENCODING, + filter_spec=None, **kwargs, ): """Clone a local or remote git repository. @@ -513,6 +536,13 @@ def clone( branch: Optional branch or tag to be used as HEAD in the new repository instead of the cloned repository's HEAD. config: Configuration to use + refspecs: refspecs to fetch. Can be a bytestring, a string, or a list of + bytestring/string. + refspec_encoding: Character encoding of bytestrings provided in the refspecs parameter. + If not specified, the internal default encoding will be used. + filter_spec: A git-rev-list-style object filter spec, as an ASCII string. + Only used if the server supports the Git protocol-v2 'filter' + feature, and ignored otherwise. Returns: The new repository """ if outstream is not None: @@ -533,6 +563,8 @@ def clone( if checkout and bare: raise Error("checkout and bare are incompatible") + encoded_refs = encode_refspecs(refspecs, refspec_encoding) + if target is None: target = source.split("/")[-1] @@ -542,6 +574,9 @@ def clone( mkdir = not os.path.exists(target) (client, path) = get_transport_and_path(source, config=config, **kwargs) + + if filter_spec: + filter_spec = filter_spec.encode("ascii") return client.clone( path, @@ -553,6 +588,8 @@ def clone( branch=branch, progress=errstream.write, depth=depth, + ref_prefix=encoded_refs, + filter_spec=filter_spec, ) @@ -1238,6 +1275,8 @@ def pull( errstream=default_bytes_err_stream, fast_forward=True, force=False, + refspec_encoding=DEFAULT_ENCODING, + filter_spec=None, **kwargs, ): """Pull from remote via dulwich.client. @@ -1245,21 +1284,26 @@ def pull( Args: repo: Path to repository remote_location: Location of the remote - refspecs: refspecs to fetch + refspecs: refspecs to fetch. Can be a bytestring, a string, or a list of + bytestring/string. outstream: A stream file to write to output errstream: A stream file to write to errors + refspec_encoding: Character encoding of bytestrings provided in the refspecs parameter. + If not specified, the internal default encoding will be used. + filter_spec: A git-rev-list-style object filter spec, as an ASCII string. + Only used if the server supports the Git protocol-v2 'filter' + feature, and ignored otherwise. """ # Open the repo with open_repo_closing(repo) as r: (remote_name, remote_location) = get_remote_repo(r, remote_location) - if refspecs is None: - refspecs = [b"HEAD"] + encoded_refs = encode_refspecs(refspecs, refspec_encoding) selected_refs = [] def determine_wants(remote_refs, **kwargs): selected_refs.extend( - parse_reftuples(remote_refs, r.refs, refspecs, force=force) + parse_reftuples(remote_refs, r.refs, encoded_refs, force=force) ) return [ remote_refs[lh] @@ -1270,8 +1314,15 @@ def pull( client, path = get_transport_and_path( remote_location, config=r.get_config_stack(), **kwargs ) + if filter_spec: + filter_spec = filter_spec.encode("ascii") fetch_result = client.fetch( - path, r, progress=errstream.write, determine_wants=determine_wants + path, + r, + progress=errstream.write, + determine_wants=determine_wants, + ref_prefix=refspecs, + filter_spec=filter_spec, ) for lh, rh, force_ref in selected_refs: if not force_ref and rh in r.refs: blob - de2618779071a60d78270ed38abfb4f5b95e5025 blob + 3d25e6798867b28a1a15ccd3e785ab72231fd3f8 --- dulwich/protocol.py +++ dulwich/protocol.py @@ -64,6 +64,8 @@ CAPABILITY_AGENT = b"agent" CAPABILITY_SYMREF = b"symref" CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want" CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want" +CAPABILITY_FETCH = b"fetch" +CAPABILITY_FILTER = b"filter" # Magic ref that is used to attach capabilities to when # there are no refs. Should always be ste to ZERO_SHA. @@ -90,6 +92,7 @@ KNOWN_UPLOAD_CAPABILITIES = set( CAPABILITY_DEEPEN_RELATIVE, CAPABILITY_ALLOW_TIP_SHA1_IN_WANT, CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT, + CAPABILITY_FETCH, ] ) KNOWN_RECEIVE_CAPABILITIES = set( blob - 6202cbdff5d845e8a52124c1c928514a8f8c3c83 blob + 7763d62967420378b4d71d935f47a3c781a01656 --- tests/compat/test_client.py +++ tests/compat/test_client.py @@ -570,6 +570,9 @@ class GitHTTPRequestHandler(http.server.SimpleHTTPRequ co = self.headers.get("cookie") if co: env["HTTP_COOKIE"] = co + proto = self.headers.get("Git-Protocol") + if proto: + env["GIT_PROTOCOL"] = proto # XXX Other HTTP_* headers # Since we're setting the env in the parent, provide empty # values to override previously set values