Commit Diff


commit - 8e548f72a416a6300bcf0cfd5d46284ab39c577d
commit + 1d60719c4c8d7806c7b2df74340c6c29d9443541
blob - dd0c012a95bc90d5aa584d2a1ea1437cfa5d3329
blob + 4263bf409b25caea88ee8477d865f9009b26ea69
--- NEWS
+++ NEWS
@@ -1,3 +1,11 @@
+0.20.43	2022-06-07
+
+ * Lazily import url2pathname.
+   (Jelmer Vernooij)
+
+ * Drop caching of full HTTP response. Attempt #2.
+   (jelmer Vernooij, Antoine Lambert, #966)
+
 0.20.42	2022-05-24
 
  * Drop ``RefsContainer.watch`` that was always flaky.
blob - 05c4d30ee39bfb34a2b0d627d8c71c5e1575b545
blob + 5daca8bf089b001f523873510c0fc3ee5bffe1ac
--- PKG-INFO
+++ PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dulwich
-Version: 0.20.42
+Version: 0.20.43
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
 Author: Jelmer Vernooij
blob - 1f523433c5f80ce3a5db12ff70721e7db5f40787
blob + 1645488ae85bca0627e86cebf0a6397d1fcda0f0
--- dulwich/__init__.py
+++ dulwich/__init__.py
@@ -22,4 +22,4 @@
 
 """Python implementation of the Git file formats and protocols."""
 
-__version__ = (0, 20, 42)
+__version__ = (0, 20, 43)
blob - 9120e51f673806fb2e2463068b7762ae3c89840e
blob + 168e28f0da1198afd1fbb89e87ebedda13f17406
--- dulwich/client.py
+++ dulwich/client.py
@@ -43,7 +43,6 @@ from io import BytesIO, BufferedReader
 import logging
 import os
 import select
-import shlex
 import socket
 import subprocess
 import sys
@@ -57,7 +56,6 @@ from urllib.parse import (
     urlunsplit,
     urlunparse,
 )
-from urllib.request import url2pathname
 
 
 import dulwich
@@ -116,6 +114,10 @@ from dulwich.refs import (
     _import_remote_refs,
 )
 from dulwich.repo import Repo
+
+
+# url2pathname is lazily imported
+url2pathname = None
 
 
 logger = logging.getLogger(__name__)
@@ -503,6 +505,129 @@ def _read_side_band64k_data(pkt_seq, channel_callbacks
                 cb(pkt)
 
 
+def _handle_upload_pack_head(
+    proto, capabilities, graph_walker, wants, can_read, depth
+):
+    """Handle the head of a 'git-upload-pack' request.
+
+    Args:
+      proto: Protocol object to read from
+      capabilities: List of negotiated capabilities
+      graph_walker: GraphWalker instance to call .ack() on
+      wants: List of commits to fetch
+      can_read: function that returns a boolean that indicates
+    whether there is extra graph data to read on proto
+      depth: Depth for request
+
+    Returns:
+
+    """
+    assert isinstance(wants, list) and isinstance(wants[0], bytes)
+    proto.write_pkt_line(
+        COMMAND_WANT
+        + b" "
+        + wants[0]
+        + b" "
+        + b" ".join(sorted(capabilities))
+        + b"\n"
+    )
+    for want in wants[1:]:
+        proto.write_pkt_line(COMMAND_WANT + b" " + want + b"\n")
+    if depth not in (0, None) or getattr(graph_walker, "shallow", None):
+        if CAPABILITY_SHALLOW not in capabilities:
+            raise GitProtocolError(
+                "server does not support shallow capability required for " "depth"
+            )
+        for sha in graph_walker.shallow:
+            proto.write_pkt_line(COMMAND_SHALLOW + b" " + sha + b"\n")
+        if depth is not None:
+            proto.write_pkt_line(
+                COMMAND_DEEPEN + b" " + str(depth).encode("ascii") + b"\n"
+            )
+        proto.write_pkt_line(None)
+        if can_read is not None:
+            (new_shallow, new_unshallow) = _read_shallow_updates(proto.read_pkt_seq())
+        else:
+            new_shallow = new_unshallow = None
+    else:
+        new_shallow = new_unshallow = set()
+        proto.write_pkt_line(None)
+    have = next(graph_walker)
+    while have:
+        proto.write_pkt_line(COMMAND_HAVE + b" " + have + b"\n")
+        if can_read is not None and can_read():
+            pkt = proto.read_pkt_line()
+            parts = pkt.rstrip(b"\n").split(b" ")
+            if parts[0] == b"ACK":
+                graph_walker.ack(parts[1])
+                if parts[2] in (b"continue", b"common"):
+                    pass
+                elif parts[2] == b"ready":
+                    break
+                else:
+                    raise AssertionError(
+                        "%s not in ('continue', 'ready', 'common)" % parts[2]
+                    )
+        have = next(graph_walker)
+    proto.write_pkt_line(COMMAND_DONE + b"\n")
+    return (new_shallow, new_unshallow)
+
+
+def _handle_upload_pack_tail(
+    proto,
+    capabilities,
+    graph_walker,
+    pack_data,
+    progress=None,
+    rbufsize=_RBUFSIZE,
+):
+    """Handle the tail of a 'git-upload-pack' request.
+
+    Args:
+      proto: Protocol object to read from
+      capabilities: List of negotiated capabilities
+      graph_walker: GraphWalker instance to call .ack() on
+      pack_data: Function to call with pack data
+      progress: Optional progress reporting function
+      rbufsize: Read buffer size
+
+    Returns:
+
+    """
+    pkt = proto.read_pkt_line()
+    while pkt:
+        parts = pkt.rstrip(b"\n").split(b" ")
+        if parts[0] == b"ACK":
+            graph_walker.ack(parts[1])
+        if len(parts) < 3 or parts[2] not in (
+            b"ready",
+            b"continue",
+            b"common",
+        ):
+            break
+        pkt = proto.read_pkt_line()
+    if CAPABILITY_SIDE_BAND_64K in capabilities:
+        if progress is None:
+            # Just ignore progress data
+
+            def progress(x):
+                pass
+
+        _read_side_band64k_data(
+            proto.read_pkt_seq(),
+            {
+                SIDE_BAND_CHANNEL_DATA: pack_data,
+                SIDE_BAND_CHANNEL_PROGRESS: progress,
+            },
+        )
+    else:
+        while True:
+            data = proto.read(rbufsize)
+            if data == b"":
+                break
+            pack_data(data)
+
+
 # TODO(durin42): this doesn't correctly degrade if the server doesn't
 # support some capabilities. This should work properly with servers
 # that don't support multi_ack.
@@ -825,129 +950,7 @@ class GitClient(object):
         negotiated_capabilities = self._fetch_capabilities & server_capabilities
         return (negotiated_capabilities, symrefs, agent)
 
-    def _handle_upload_pack_head(
-        self, proto, capabilities, graph_walker, wants, can_read, depth
-    ):
-        """Handle the head of a 'git-upload-pack' request.
 
-        Args:
-          proto: Protocol object to read from
-          capabilities: List of negotiated capabilities
-          graph_walker: GraphWalker instance to call .ack() on
-          wants: List of commits to fetch
-          can_read: function that returns a boolean that indicates
-        whether there is extra graph data to read on proto
-          depth: Depth for request
-
-        Returns:
-
-        """
-        assert isinstance(wants, list) and isinstance(wants[0], bytes)
-        proto.write_pkt_line(
-            COMMAND_WANT
-            + b" "
-            + wants[0]
-            + b" "
-            + b" ".join(sorted(capabilities))
-            + b"\n"
-        )
-        for want in wants[1:]:
-            proto.write_pkt_line(COMMAND_WANT + b" " + want + b"\n")
-        if depth not in (0, None) or getattr(graph_walker, "shallow", None):
-            if CAPABILITY_SHALLOW not in capabilities:
-                raise GitProtocolError(
-                    "server does not support shallow capability required for " "depth"
-                )
-            for sha in graph_walker.shallow:
-                proto.write_pkt_line(COMMAND_SHALLOW + b" " + sha + b"\n")
-            if depth is not None:
-                proto.write_pkt_line(
-                    COMMAND_DEEPEN + b" " + str(depth).encode("ascii") + b"\n"
-                )
-            proto.write_pkt_line(None)
-            if can_read is not None:
-                (new_shallow, new_unshallow) = _read_shallow_updates(proto.read_pkt_seq())
-            else:
-                new_shallow = new_unshallow = None
-        else:
-            new_shallow = new_unshallow = set()
-            proto.write_pkt_line(None)
-        have = next(graph_walker)
-        while have:
-            proto.write_pkt_line(COMMAND_HAVE + b" " + have + b"\n")
-            if can_read is not None and can_read():
-                pkt = proto.read_pkt_line()
-                parts = pkt.rstrip(b"\n").split(b" ")
-                if parts[0] == b"ACK":
-                    graph_walker.ack(parts[1])
-                    if parts[2] in (b"continue", b"common"):
-                        pass
-                    elif parts[2] == b"ready":
-                        break
-                    else:
-                        raise AssertionError(
-                            "%s not in ('continue', 'ready', 'common)" % parts[2]
-                        )
-            have = next(graph_walker)
-        proto.write_pkt_line(COMMAND_DONE + b"\n")
-        return (new_shallow, new_unshallow)
-
-    def _handle_upload_pack_tail(
-        self,
-        proto,
-        capabilities,
-        graph_walker,
-        pack_data,
-        progress=None,
-        rbufsize=_RBUFSIZE,
-    ):
-        """Handle the tail of a 'git-upload-pack' request.
-
-        Args:
-          proto: Protocol object to read from
-          capabilities: List of negotiated capabilities
-          graph_walker: GraphWalker instance to call .ack() on
-          pack_data: Function to call with pack data
-          progress: Optional progress reporting function
-          rbufsize: Read buffer size
-
-        Returns:
-
-        """
-        pkt = proto.read_pkt_line()
-        while pkt:
-            parts = pkt.rstrip(b"\n").split(b" ")
-            if parts[0] == b"ACK":
-                graph_walker.ack(parts[1])
-            if len(parts) < 3 or parts[2] not in (
-                b"ready",
-                b"continue",
-                b"common",
-            ):
-                break
-            pkt = proto.read_pkt_line()
-        if CAPABILITY_SIDE_BAND_64K in capabilities:
-            if progress is None:
-                # Just ignore progress data
-
-                def progress(x):
-                    pass
-
-            _read_side_band64k_data(
-                proto.read_pkt_seq(),
-                {
-                    SIDE_BAND_CHANNEL_DATA: pack_data,
-                    SIDE_BAND_CHANNEL_PROGRESS: progress,
-                },
-            )
-        else:
-            while True:
-                data = proto.read(rbufsize)
-                if data == b"":
-                    break
-                pack_data(data)
-
-
 def check_wants(wants, refs):
     """Check that a set of wants is valid.
 
@@ -1141,7 +1144,7 @@ class TraditionalGitClient(GitClient):
             if not wants:
                 proto.write_pkt_line(None)
                 return FetchPackResult(refs, symrefs, agent)
-            (new_shallow, new_unshallow) = self._handle_upload_pack_head(
+            (new_shallow, new_unshallow) = _handle_upload_pack_head(
                 proto,
                 negotiated_capabilities,
                 graph_walker,
@@ -1149,7 +1152,7 @@ class TraditionalGitClient(GitClient):
                 can_read,
                 depth=depth,
             )
-            self._handle_upload_pack_tail(
+            _handle_upload_pack_tail(
                 proto,
                 negotiated_capabilities,
                 graph_walker,
@@ -1612,6 +1615,7 @@ class SubprocessSSHVendor(SSHVendor):
             )
 
         if ssh_command:
+            import shlex
             args = shlex.split(ssh_command) + ["-x"]
         else:
             args = ["ssh", "-x"]
@@ -1653,6 +1657,7 @@ class PLinkSSHVendor(SSHVendor):
     ):
 
         if ssh_command:
+            import shlex
             args = shlex.split(ssh_command) + ["-ssh"]
         elif sys.platform == "win32":
             args = ["plink.exe", "-ssh"]
@@ -1905,14 +1910,13 @@ class AbstractHttpGitClient(GitClient):
         self.dumb = dumb
         GitClient.__init__(self, **kwargs)
 
-    def _http_request(self, url, headers=None, data=None, allow_compression=False):
+    def _http_request(self, url, headers=None, data=None):
         """Perform HTTP request.
 
         Args:
           url: Request URL.
           headers: Optional custom headers to override defaults.
           data: Request data.
-          allow_compression: Allow GZipped communication.
 
         Returns:
           Tuple (`response`, `read`), where response is an `urllib3`
@@ -1931,7 +1935,7 @@ class AbstractHttpGitClient(GitClient):
         if self.dumb is not True:
             tail += "?service=%s" % service.decode("ascii")
         url = urljoin(base_url, tail)
-        resp, read = self._http_request(url, headers, allow_compression=True)
+        resp, read = self._http_request(url, headers)
 
         if resp.redirect_location:
             # Something changed (redirect!), so let's update the base URL
@@ -2092,7 +2096,7 @@ class AbstractHttpGitClient(GitClient):
             raise NotImplementedError(self.fetch_pack)
         req_data = BytesIO()
         req_proto = Protocol(None, req_data.write)
-        (new_shallow, new_unshallow) = self._handle_upload_pack_head(
+        (new_shallow, new_unshallow) = _handle_upload_pack_head(
             req_proto,
             negotiated_capabilities,
             graph_walker,
@@ -2106,8 +2110,9 @@ class AbstractHttpGitClient(GitClient):
         try:
             resp_proto = Protocol(read, None)
             if new_shallow is None and new_unshallow is None:
-                (new_shallow, new_unshallow) = _read_shallow_updates(resp_proto.read_pkt_seq())
-            self._handle_upload_pack_tail(
+                (new_shallow, new_unshallow) = _read_shallow_updates(
+                    resp_proto.read_pkt_seq())
+            _handle_upload_pack_tail(
                 resp_proto,
                 negotiated_capabilities,
                 graph_walker,
@@ -2194,15 +2199,11 @@ class Urllib3HttpGitClient(AbstractHttpGitClient):
             path = path.decode("utf-8")
         return urljoin(self._base_url, path).rstrip("/") + "/"
 
-    def _http_request(self, url, headers=None, data=None, allow_compression=False):
+    def _http_request(self, url, headers=None, data=None):
         req_headers = self.pool_manager.headers.copy()
         if headers is not None:
             req_headers.update(headers)
         req_headers["Pragma"] = "no-cache"
-        if allow_compression:
-            req_headers["Accept-Encoding"] = "gzip"
-        else:
-            req_headers["Accept-Encoding"] = "identity"
 
         if data is None:
             resp = self.pool_manager.request(
@@ -2232,17 +2233,14 @@ class Urllib3HttpGitClient(AbstractHttpGitClient):
             resp.redirect_location = resp.get_redirect_location()
         else:
             resp.redirect_location = resp_url if resp_url != url else ""
-        # TODO(jelmer): Remove BytesIO() call that caches entire response in
-        # memory. See https://github.com/jelmer/dulwich/issues/966
-        return resp, BytesIO(resp.data).read
+        return resp, resp.read
 
 
 HttpGitClient = Urllib3HttpGitClient
 
 
 def _win32_url_to_path(parsed) -> str:
-    """
-    Convert a file: URL to a path.
+    """Convert a file: URL to a path.
 
     https://datatracker.ietf.org/doc/html/rfc8089
     """
@@ -2264,7 +2262,10 @@ def _win32_url_to_path(parsed) -> str:
     else:
         raise NotImplementedError("Non-local file URLs are not supported")
 
-    return url2pathname(netloc + path)
+    global url2pathname
+    if url2pathname is None:
+        from urllib.request import url2pathname  # type: ignore
+    return url2pathname(netloc + path)  # type: ignore
 
 
 def get_transport_and_path_from_url(url, config=None, **kwargs):
blob - 2eab7d9cdfb1af20c1aad078a524b0de85c70d56
blob + 4ceecab182fd7d58ab8dab0dd4d0d18371d75afc
--- dulwich/config.py
+++ dulwich/config.py
@@ -30,19 +30,17 @@ import os
 import sys
 import warnings
 
-from typing import BinaryIO, Iterator, KeysView, Optional, Tuple, Union
+from typing import (
+    BinaryIO,
+    Iterable,
+    Iterator,
+    KeysView,
+    MutableMapping,
+    Optional,
+    Tuple,
+    Union,
+)
 
-try:
-    from collections.abc import (
-        Iterable,
-        MutableMapping,
-    )
-except ImportError:  # python < 3.7
-    from collections import (  # type: ignore
-        Iterable,
-        MutableMapping,
-    )
-
 from dulwich.file import GitFile
 
 
@@ -252,35 +250,52 @@ class Config(object):
         return name in self.sections()
 
 
-class ConfigDict(Config, MutableMapping):
+BytesLike = Union[bytes, str]
+Key = Tuple[bytes, ...]
+KeyLike = Union[bytes, str, Tuple[BytesLike, ...]]
+Value = Union[bytes, bool]
+ValueLike = Union[bytes, str, bool]
+
+
+class ConfigDict(Config, MutableMapping[Key, MutableMapping[bytes, Value]]):
     """Git configuration stored in a dictionary."""
 
-    def __init__(self, values=None, encoding=None):
+    def __init__(
+        self,
+        values: Union[
+            MutableMapping[Key, MutableMapping[bytes, Value]], None
+        ] = None,
+        encoding: Union[str, None] = None
+    ) -> None:
         """Create a new ConfigDict."""
         if encoding is None:
             encoding = sys.getdefaultencoding()
         self.encoding = encoding
         self._values = CaseInsensitiveOrderedMultiDict.make(values)
 
-    def __repr__(self):
+    def __repr__(self) -> str:
         return "%s(%r)" % (self.__class__.__name__, self._values)
 
-    def __eq__(self, other):
+    def __eq__(self, other: object) -> bool:
         return isinstance(other, self.__class__) and other._values == self._values
 
-    def __getitem__(self, key):
+    def __getitem__(self, key: Key) -> MutableMapping[bytes, Value]:
         return self._values.__getitem__(key)
 
-    def __setitem__(self, key, value):
+    def __setitem__(
+        self,
+        key: Key,
+        value: MutableMapping[bytes, Value]
+    ) -> None:
         return self._values.__setitem__(key, value)
 
-    def __delitem__(self, key):
+    def __delitem__(self, key: Key) -> None:
         return self._values.__delitem__(key)
 
-    def __iter__(self):
+    def __iter__(self) -> Iterator[Key]:
         return self._values.__iter__()
 
-    def __len__(self):
+    def __len__(self) -> int:
         return self._values.__len__()
 
     @classmethod
@@ -291,11 +306,15 @@ class ConfigDict(Config, MutableMapping):
         else:
             return (parts[0], None, parts[1])
 
-    def _check_section_and_name(self, section, name):
+    def _check_section_and_name(
+        self,
+        section: KeyLike,
+        name: BytesLike
+    ) -> Tuple[Key, bytes]:
         if not isinstance(section, tuple):
             section = (section,)
 
-        section = tuple(
+        checked_section = tuple(
             [
                 subsection.encode(self.encoding)
                 if not isinstance(subsection, bytes)
@@ -307,9 +326,13 @@ class ConfigDict(Config, MutableMapping):
         if not isinstance(name, bytes):
             name = name.encode(self.encoding)
 
-        return section, name
+        return checked_section, name
 
-    def get_multivar(self, section, name):
+    def get_multivar(
+        self,
+        section: KeyLike,
+        name: BytesLike
+    ) -> Iterator[Value]:
         section, name = self._check_section_and_name(section, name)
 
         if len(section) > 1:
@@ -322,9 +345,9 @@ class ConfigDict(Config, MutableMapping):
 
     def get(  # type: ignore[override]
         self,
-        section: Union[bytes, str, Tuple[Union[bytes, str], ...]],
-        name: Union[str, bytes]
-    ) -> Optional[bytes]:
+        section: KeyLike,
+        name: BytesLike,
+    ) -> Optional[Value]:
         section, name = self._check_section_and_name(section, name)
 
         if len(section) > 1:
@@ -335,18 +358,26 @@ class ConfigDict(Config, MutableMapping):
 
         return self._values[(section[0],)][name]
 
-    def set(self, section, name, value):
+    def set(
+        self,
+        section: KeyLike,
+        name: BytesLike,
+        value: ValueLike,
+    ) -> None:
         section, name = self._check_section_and_name(section, name)
 
-        if type(value) not in (bool, bytes):
+        if not isinstance(value, (bytes, bool)):
             value = value.encode(self.encoding)
 
         self._values.setdefault(section)[name] = value
 
-    def items(self, section):
+    def items(  # type: ignore[override]
+        self,
+        section: Key
+    ) -> Iterator[Value]:
         return self._values.get(section).items()
 
-    def sections(self):
+    def sections(self) -> Iterator[Key]:
         return self._values.keys()
 
 
@@ -696,7 +727,9 @@ def parse_submodules(config: ConfigFile) -> Iterator[T
         section_kind, section_name = section
         if section_kind == b"submodule":
             sm_path = config.get(section, b"path")
+            assert isinstance(sm_path, bytes)
             assert sm_path is not None
             sm_url = config.get(section, b"url")
             assert sm_url is not None
+            assert isinstance(sm_url, bytes)
             yield (sm_path, sm_url, section_name)
blob - 1c1d8c4cb574c330838b5e118156315ec1e2ec44
blob + 34e8dda184436f34544f1badd646cde24bcf7851
--- dulwich/porcelain.py
+++ dulwich/porcelain.py
@@ -1003,6 +1003,7 @@ def get_remote_repo(
         remote_name = encoded_location.decode()
         url = config.get(section, "url")
         assert url is not None
+        assert isinstance(url, bytes)
         encoded_location = url
     else:
         remote_name = None
blob - 05c4d30ee39bfb34a2b0d627d8c71c5e1575b545
blob + 5daca8bf089b001f523873510c0fc3ee5bffe1ac
--- dulwich.egg-info/PKG-INFO
+++ dulwich.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dulwich
-Version: 0.20.42
+Version: 0.20.43
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
 Author: Jelmer Vernooij
blob - 161936c0cc6aefb7cb25f7a604fd063c0a0b952c
blob + a2983826a60d21673c6e8b02788cf1c6cb4a5842
--- setup.py
+++ setup.py
@@ -23,7 +23,7 @@ if sys.version_info < (3, 6):
         'For 2.7 support, please install a version prior to 0.20')
 
 
-dulwich_version_string = '0.20.42'
+dulwich_version_string = '0.20.43'
 
 
 class DulwichDistribution(Distribution):