commit 1997f839a022252a607d6dff033bdd52f62fff41
from: Jelmer Vernooĳ <jelmer@jelmer.uk>
via: GitHub <noreply@github.com>
date: Sat Oct 22 18:35:32 2022 UTC

Merge pull request #1076 from jelmer/more-typing

Add more typing

commit - eb409c082b7fd173c9633b481c67e96331083062
commit + 1997f839a022252a607d6dff033bdd52f62fff41
blob - 3d71e24bed4cdbbd1dd1e17fc3703ac4e227d678
blob + 0bf0e1f3ab41a828ded9c8b28fe5df0c454dfb69
--- NEWS
+++ NEWS
@@ -3,6 +3,10 @@
  * On Windows, provide a hint about developer mode
    when creating symlinks fails due to a permission
    error. (Jelmer Vernooĳ, #1005)
+
+ * Add new ``ObjectID`` type in ``dulwich.objects``,
+   currently just an alias for ``bytes``.
+   (Jelmer Vernooĳ)
 
  * Support repository format version 1.
    (Jelmer Vernooĳ, #1056)
blob - fc26dd2fc6304034d507e3b89d70067e2395a6ce
blob + 492a10ac8b8884ab7096db6c2f3b9277255eb05f
--- dulwich/objects.py
+++ dulwich/objects.py
@@ -33,6 +33,8 @@ from typing import (
     Iterable,
     Union,
     Type,
+    Iterator,
+    List,
 )
 import zlib
 from hashlib import sha1
@@ -75,6 +77,9 @@ MAX_TIME = 9223372036854775807  # (2**63) - 1 - signed
 BEGIN_PGP_SIGNATURE = b"-----BEGIN PGP SIGNATURE-----"
 
 
+ObjectID = bytes
+
+
 class EmptyFileException(FileFormatException):
     """An unexpectedly empty file was encountered."""
 
@@ -153,7 +158,10 @@ def filename_to_hex(filename):
 
 def object_header(num_type: int, length: int) -> bytes:
     """Return an object header for the given numeric type and text length."""
-    return object_class(num_type).type_name + b" " + str(length).encode("ascii") + b"\0"
+    cls = object_class(num_type)
+    if cls is None:
+        raise AssertionError("unsupported class type num: %d" % num_type)
+    return cls.type_name + b" " + str(length).encode("ascii") + b"\0"
 
 
 def serializable_property(name: str, docstring: Optional[str] = None):
@@ -169,7 +177,7 @@ def serializable_property(name: str, docstring: Option
     return property(get, set, doc=docstring)
 
 
-def object_class(type):
+def object_class(type: Union[bytes, int]) -> Optional[Type["ShaFile"]]:
     """Get the object class corresponding to the given type.
 
     Args:
@@ -193,7 +201,7 @@ def check_hexsha(hex, error_msg):
         raise ObjectFormatException("%s %s" % (error_msg, hex))
 
 
-def check_identity(identity, error_msg):
+def check_identity(identity: bytes, error_msg: str) -> None:
     """Check if the specified identity is valid.
 
     This will raise an exception if the identity is not valid.
@@ -261,11 +269,13 @@ class ShaFile(object):
 
     __slots__ = ("_chunked_text", "_sha", "_needs_serialization")
 
-    type_name = None  # type: bytes
-    type_num = None  # type: int
+    _needs_serialization: bool
+    type_name: bytes
+    type_num: int
+    _chunked_text: Optional[List[bytes]]
 
     @staticmethod
-    def _parse_legacy_object_header(magic, f):
+    def _parse_legacy_object_header(magic, f) -> "ShaFile":
         """Parse a legacy object, creating it but not reading the file."""
         bufsize = 1024
         decomp = zlib.decompressobj()
@@ -287,10 +297,10 @@ class ShaFile(object):
                 "Object size not an integer: %s" % exc) from exc
         obj_class = object_class(type_name)
         if not obj_class:
-            raise ObjectFormatException("Not a known type: %s" % type_name)
+            raise ObjectFormatException("Not a known type: %s" % type_name.decode('ascii'))
         return obj_class()
 
-    def _parse_legacy_object(self, map):
+    def _parse_legacy_object(self, map) -> None:
         """Parse a legacy object, setting the raw string."""
         text = _decompress(map)
         header_end = text.find(b"\0")
@@ -298,7 +308,8 @@ class ShaFile(object):
             raise ObjectFormatException("Invalid object header, no \\0")
         self.set_raw_string(text[header_end + 1 :])
 
-    def as_legacy_object_chunks(self, compression_level=-1):
+    def as_legacy_object_chunks(
+            self, compression_level: int = -1) -> Iterator[bytes]:
         """Return chunks representing the object in the experimental format.
 
         Returns: List of strings
@@ -309,13 +320,13 @@ class ShaFile(object):
             yield compobj.compress(chunk)
         yield compobj.flush()
 
-    def as_legacy_object(self, compression_level=-1):
+    def as_legacy_object(self, compression_level: int = -1) -> bytes:
         """Return string representing the object in the experimental format."""
         return b"".join(
             self.as_legacy_object_chunks(compression_level=compression_level)
         )
 
-    def as_raw_chunks(self):
+    def as_raw_chunks(self) -> List[bytes]:
         """Return chunks with serialization of the object.
 
         Returns: List of strings, not necessarily one per line
@@ -324,16 +335,16 @@ class ShaFile(object):
             self._sha = None
             self._chunked_text = self._serialize()
             self._needs_serialization = False
-        return self._chunked_text
+        return self._chunked_text  # type: ignore
 
-    def as_raw_string(self):
+    def as_raw_string(self) -> bytes:
         """Return raw string with serialization of the object.
 
         Returns: String object
         """
         return b"".join(self.as_raw_chunks())
 
-    def __bytes__(self):
+    def __bytes__(self) -> bytes:
         """Return raw string serialization of this object."""
         return self.as_raw_string()
 
@@ -341,24 +352,27 @@ class ShaFile(object):
         """Return unique hash for this object."""
         return hash(self.id)
 
-    def as_pretty_string(self):
+    def as_pretty_string(self) -> bytes:
         """Return a string representing this object, fit for display."""
         return self.as_raw_string()
 
-    def set_raw_string(self, text, sha=None):
+    def set_raw_string(
+            self, text: bytes, sha: Optional[ObjectID] = None) -> None:
         """Set the contents of this object from a serialized string."""
         if not isinstance(text, bytes):
             raise TypeError("Expected bytes for text, got %r" % text)
         self.set_raw_chunks([text], sha)
 
-    def set_raw_chunks(self, chunks, sha=None):
+    def set_raw_chunks(
+            self, chunks: List[bytes],
+            sha: Optional[ObjectID] = None) -> None:
         """Set the contents of this object from a list of chunks."""
         self._chunked_text = chunks
         self._deserialize(chunks)
         if sha is None:
             self._sha = None
         else:
-            self._sha = FixedSha(sha)
+            self._sha = FixedSha(sha)  # type: ignore
         self._needs_serialization = False
 
     @staticmethod
@@ -370,7 +384,7 @@ class ShaFile(object):
             raise ObjectFormatException("Not a known type %d" % num_type)
         return obj_class()
 
-    def _parse_object(self, map):
+    def _parse_object(self, map) -> None:
         """Parse a new style object, setting self._text."""
         # skip type and size; type must have already been determined, and
         # we trust zlib to fail if it's otherwise corrupted
@@ -383,7 +397,7 @@ class ShaFile(object):
         self.set_raw_string(_decompress(raw))
 
     @classmethod
-    def _is_legacy_object(cls, magic):
+    def _is_legacy_object(cls, magic: bytes) -> bool:
         b0 = ord(magic[0:1])
         b1 = ord(magic[1:2])
         word = (b0 << 8) + b1
@@ -445,7 +459,9 @@ class ShaFile(object):
         return obj
 
     @staticmethod
-    def from_raw_chunks(type_num, chunks, sha=None):
+    def from_raw_chunks(
+            type_num: int, chunks: List[bytes],
+            sha: Optional[ObjectID] = None):
         """Creates an object of the indicated type from the raw chunks given.
 
         Args:
@@ -453,7 +469,10 @@ class ShaFile(object):
           chunks: An iterable of the raw uncompressed contents.
           sha: Optional known sha for the object
         """
-        obj = object_class(type_num)()
+        cls = object_class(type_num)
+        if cls is None:
+            raise AssertionError("unsupported class type num: %d" % type_num)
+        obj = cls()
         obj.set_raw_chunks(chunks, sha)
         return obj
 
@@ -477,7 +496,7 @@ class ShaFile(object):
         if getattr(self, member, None) is None:
             raise ObjectFormatException(error_msg)
 
-    def check(self):
+    def check(self) -> None:
         """Check this object for internal consistency.
 
         Raises:
@@ -500,9 +519,9 @@ class ShaFile(object):
             raise ChecksumMismatch(new_sha, old_sha)
 
     def _header(self):
-        return object_header(self.type, self.raw_length())
+        return object_header(self.type_num, self.raw_length())
 
-    def raw_length(self):
+    def raw_length(self) -> int:
         """Returns the length of the raw string of this object."""
         ret = 0
         for chunk in self.as_raw_chunks():
@@ -522,25 +541,14 @@ class ShaFile(object):
 
     def copy(self):
         """Create a new copy of this SHA1 object from its raw string"""
-        obj_class = object_class(self.get_type())
-        return obj_class.from_raw_string(self.get_type(), self.as_raw_string(), self.id)
+        obj_class = object_class(self.type_num)
+        return obj_class.from_raw_string(self.type_num, self.as_raw_string(), self.id)
 
     @property
     def id(self):
         """The hex SHA of this object."""
         return self.sha().hexdigest().encode("ascii")
 
-    def get_type(self):
-        """Return the type number for this object class."""
-        return self.type_num
-
-    def set_type(self, type):
-        """Set the type number for this object class."""
-        self.type_num = type
-
-    # DEPRECATED: use type_num or type_name as needed.
-    type = property(get_type, set_type)
-
     def __repr__(self):
         return "<%s %s>" % (self.__class__.__name__, self.id)
 
@@ -621,7 +629,7 @@ class Blob(ShaFile):
         """
         super(Blob, self).check()
 
-    def splitlines(self):
+    def splitlines(self) -> List[bytes]:
         """Return list of lines in this blob.
 
         This preserves the original line endings.
@@ -649,7 +657,7 @@ class Blob(ShaFile):
         return ret
 
 
-def _parse_message(chunks):
+def _parse_message(chunks: Iterable[bytes]):
     """Parse a message with a list of fields and a body.
 
     Args:
@@ -660,7 +668,7 @@ def _parse_message(chunks):
     """
     f = BytesIO(b"".join(chunks))
     k = None
-    v = ""
+    v = b""
     eof = False
 
     def _strip_last_newline(value):
@@ -1596,7 +1604,7 @@ OBJECT_CLASSES = (
     Tag,
 )
 
-_TYPE_MAP = {}  # type: Dict[Union[bytes, int], Type[ShaFile]]
+_TYPE_MAP: Dict[Union[bytes, int], Type[ShaFile]] = {}
 
 for cls in OBJECT_CLASSES:
     _TYPE_MAP[cls.type_name] = cls
blob - 026fdca3d5fc6d3c592623312d18bb7ee5e29c78
blob + 91159b95a15c816fe9543a63c1ddb9cd7f9098de
--- dulwich/refs.py
+++ dulwich/refs.py
@@ -34,12 +34,14 @@ from dulwich.objects import (
     valid_hexsha,
     ZERO_SHA,
     Tag,
+    ObjectID,
 )
 from dulwich.file import (
     GitFile,
     ensure_dir_exists,
 )
 
+Ref = bytes
 
 HEADREF = b"HEAD"
 SYMREF = b"ref: "
@@ -69,7 +71,7 @@ def parse_symref_value(contents):
     raise ValueError(contents)
 
 
-def check_ref_format(refname):
+def check_ref_format(refname: Ref):
     """Check if a refname is correctly formatted.
 
     Implements all the same rules as git-check-ref-format[1].
@@ -166,8 +168,8 @@ class RefsContainer(object):
 
     def import_refs(
         self,
-        base: bytes,
-        other: Dict[bytes, bytes],
+        base: Ref,
+        other: Dict[Ref, ObjectID],
         committer: Optional[bytes] = None,
         timestamp: Optional[bytes] = None,
         timezone: Optional[bytes] = None,
@@ -455,8 +457,8 @@ class DictRefsContainer(RefsContainer):
 
     def set_symbolic_ref(
         self,
-        name,
-        other,
+        name: Ref,
+        other: Ref,
         committer=None,
         timestamp=None,
         timezone=None,
@@ -507,8 +509,8 @@ class DictRefsContainer(RefsContainer):
 
     def add_if_new(
         self,
-        name: bytes,
-        ref: bytes,
+        name: Ref,
+        ref: ObjectID,
         committer=None,
         timestamp=None,
         timezone=None,
blob - 0fa4b1fd483b2f59fa583f53a414f04a50323967
blob + 89439b4b22e7fee40a30893f00779eca2eb6a6db
--- dulwich/repo.py
+++ dulwich/repo.py
@@ -741,7 +741,8 @@ class BaseRepo(object):
             return cached
         return self.object_store.peel_sha(self.refs[ref]).id
 
-    def get_walker(self, include=None, *args, **kwargs):
+    def get_walker(self, include: Optional[List[bytes]] = None,
+                   *args, **kwargs):
         """Obtain a walker for this repository.
 
         Args:
@@ -771,8 +772,6 @@ class BaseRepo(object):
 
         if include is None:
             include = [self.head()]
-        if isinstance(include, str):
-            include = [include]
 
         kwargs["get_parents"] = lambda commit: self.get_parents(commit.id, commit)
 
blob - 45a0f38f40453ac36b72f30f375448a68a0bc57d
blob + 5a38f71770b56e3f191b5c3d79cb731fbf473130
--- dulwich/walk.py
+++ dulwich/walk.py
@@ -24,6 +24,7 @@
 import collections
 import heapq
 from itertools import chain
+from typing import List, Tuple, Set
 
 from dulwich.diff_tree import (
     RENAME_CHANGE_TYPES,
@@ -35,7 +36,9 @@ from dulwich.errors import (
     MissingCommitError,
 )
 from dulwich.objects import (
+    Commit,
     Tag,
+    ObjectID,
 )
 
 ORDER_DATE = "date"
@@ -128,15 +131,15 @@ class WalkEntry(object):
 class _CommitTimeQueue(object):
     """Priority queue of WalkEntry objects by commit time."""
 
-    def __init__(self, walker):
+    def __init__(self, walker: "Walker"):
         self._walker = walker
         self._store = walker.store
         self._get_parents = walker.get_parents
         self._excluded = walker.excluded
-        self._pq = []
-        self._pq_set = set()
-        self._seen = set()
-        self._done = set()
+        self._pq: List[Tuple[int, Commit]] = []
+        self._pq_set: Set[ObjectID] = set()
+        self._seen: Set[ObjectID] = set()
+        self._done: Set[ObjectID] = set()
         self._min_time = walker.since
         self._last = None
         self._extra_commits_left = _MAX_EXTRA_COMMITS
@@ -145,7 +148,7 @@ class _CommitTimeQueue(object):
         for commit_id in chain(walker.include, walker.excluded):
             self._push(commit_id)
 
-    def _push(self, object_id):
+    def _push(self, object_id: bytes):
         try:
             obj = self._store[object_id]
         except KeyError as exc: