commit f40a60968957e3982eee437f04b3e515bb2f5493
from: Jelmer Vernooĳ <jelmer.vernooij@aiven.io>
via: GitHub <noreply@github.com>
date: Fri Jan 13 20:22:45 2023 UTC

Merge pull request #1113 from jelmer/pack-refactor

Various bits of pack refactoring; add more typing

commit - a96e356d1793b5a0fb4fb79679a194095fc2097e
commit + f40a60968957e3982eee437f04b3e515bb2f5493
blob - a70755dd8c1a14357cc221578d15374a9c35c6b8
blob + 660e98925d9e6af12a6d56522f8d41d2bec52645
--- dulwich/client.py
+++ dulwich/client.py
@@ -1515,7 +1515,7 @@ class LocalGitClient(GitClient):
         pack_data,
         progress=None,
         depth=None,
-    ):
+    ) -> FetchPackResult:
         """Retrieve a pack from a git smart server.
 
         Args:
@@ -1543,7 +1543,7 @@ class LocalGitClient(GitClient):
             # Note that the client still expects a 0-object pack in most cases.
             if objects_iter is None:
                 return FetchPackResult(None, symrefs, agent)
-            write_pack_objects(pack_data, objects_iter)
+            write_pack_objects(pack_data, objects_iter, reuse_pack=r.object_store)
             return FetchPackResult(r.get_refs(), symrefs, agent)
 
     def get_refs(self, path):
blob - a182b4c820f25ae8f5dabdd2f3e555117048f43d
blob + e6fa896832f5911b5d0f0e9465b8e15bd56369f9
--- dulwich/fastexport.py
+++ dulwich/fastexport.py
@@ -30,6 +30,9 @@ from dulwich.objects import (
     Tag,
     ZERO_SHA,
 )
+from dulwich.object_store import (
+    iter_tree_contents,
+)
 from fastimport import (
     commands,
     errors as fastimport_errors,
@@ -232,7 +235,7 @@ class GitImportProcessor(processor.ImportProcessor):
                 path,
                 mode,
                 hexsha,
-            ) in self.repo.object_store.iter_tree_contents(tree_id):
+            ) in iter_tree_contents(self.repo.object_store, tree_id):
                 self._contents[path] = (mode, hexsha)
 
     def reset_handler(self, cmd):
blob - bf7396d286010e88d61dc0ccf41c79b2168961fc
blob + 2bbb5e19fa548a4639fb279e18f7f24aa396a55b
--- dulwich/greenthreads.py
+++ dulwich/greenthreads.py
@@ -31,12 +31,13 @@ from dulwich.objects import (
 )
 from dulwich.object_store import (
     MissingObjectFinder,
+    _collect_ancestors,
     _collect_filetree_revs,
     ObjectStoreIterator,
 )
 
 
-def _split_commits_and_tags(obj_store, lst, ignore_unknown=False, pool=None):
+def _split_commits_and_tags(obj_store, lst, *, ignore_unknown=False, pool=None):
     """Split object id list into two list with commit SHA1s and tag SHA1s.
 
     Same implementation as object_store._split_commits_and_tags
@@ -90,11 +91,11 @@ class GreenThreadsMissingObjectFinder(MissingObjectFin
         self.object_store = object_store
         p = pool.Pool(size=concurrency)
 
-        have_commits, have_tags = _split_commits_and_tags(object_store, haves, True, p)
-        want_commits, want_tags = _split_commits_and_tags(object_store, wants, False, p)
-        all_ancestors = object_store._collect_ancestors(have_commits)[0]
-        missing_commits, common_commits = object_store._collect_ancestors(
-            want_commits, all_ancestors
+        have_commits, have_tags = _split_commits_and_tags(object_store, haves, ignore_unknown=True, pool=p)
+        want_commits, want_tags = _split_commits_and_tags(object_store, wants, ignore_unknown=False, pool=p)
+        all_ancestors = _collect_ancestors(object_store, have_commits)[0]
+        missing_commits, common_commits = _collect_ancestors(
+            object_store, want_commits, all_ancestors
         )
 
         self.sha_done = set()
blob - d875b4851077035a92c82421f2e8f1fc460ea433
blob + 4b13df9eb1147da3efc5e3906ceffad403b82581
--- dulwich/index.py
+++ dulwich/index.py
@@ -32,16 +32,12 @@ from typing import (
     Dict,
     List,
     Optional,
-    TYPE_CHECKING,
     Iterable,
     Iterator,
     Tuple,
     Union,
 )
 
-if TYPE_CHECKING:
-    from dulwich.object_store import BaseObjectStore
-
 from dulwich.file import GitFile
 from dulwich.objects import (
     Blob,
@@ -52,9 +48,11 @@ from dulwich.objects import (
     sha_to_hex,
     ObjectID,
 )
+from dulwich.object_store import iter_tree_contents
 from dulwich.pack import (
     SHA1Reader,
     SHA1Writer,
+    ObjectContainer,
 )
 
 
@@ -451,7 +449,7 @@ class Index:
 
 
 def commit_tree(
-    object_store: "BaseObjectStore", blobs: Iterable[Tuple[bytes, bytes, int]]
+    object_store: ObjectContainer, blobs: Iterable[Tuple[bytes, bytes, int]]
 ) -> bytes:
     """Commit a new tree.
 
@@ -494,7 +492,7 @@ def commit_tree(
     return build_tree(b"")
 
 
-def commit_index(object_store: "BaseObjectStore", index: Index) -> bytes:
+def commit_index(object_store: ObjectContainer, index: Index) -> bytes:
     """Create a new tree from an index.
 
     Args:
@@ -509,7 +507,7 @@ def commit_index(object_store: "BaseObjectStore", inde
 def changes_from_tree(
     names: Iterable[bytes],
     lookup_entry: Callable[[bytes], Tuple[bytes, int]],
-    object_store: "BaseObjectStore",
+    object_store: ObjectContainer,
     tree: Optional[bytes],
     want_unchanged=False,
 ) -> Iterable[
@@ -535,7 +533,7 @@ def changes_from_tree(
     other_names = set(names)
 
     if tree is not None:
-        for (name, mode, sha) in object_store.iter_tree_contents(tree):
+        for (name, mode, sha) in iter_tree_contents(object_store, tree):
             try:
                 (other_sha, other_mode) = lookup_entry(name)
             except KeyError:
@@ -686,7 +684,7 @@ def validate_path(path: bytes,
 def build_index_from_tree(
     root_path: Union[str, bytes],
     index_path: Union[str, bytes],
-    object_store: "BaseObjectStore",
+    object_store: ObjectContainer,
     tree_id: bytes,
     honor_filemode: bool = True,
     validate_path_element=validate_path_element_default,
@@ -711,7 +709,7 @@ def build_index_from_tree(
     if not isinstance(root_path, bytes):
         root_path = os.fsencode(root_path)
 
-    for entry in object_store.iter_tree_contents(tree_id):
+    for entry in iter_tree_contents(object_store, tree_id):
         if not validate_path(entry.path, validate_path_element):
             continue
         full_path = _tree_to_fs_path(root_path, entry.path)
@@ -727,6 +725,7 @@ def build_index_from_tree(
             # TODO(jelmer): record and return submodule paths
         else:
             obj = object_store[entry.sha]
+            assert isinstance(obj, Blob)
             st = build_file_from_blob(
                 obj, entry.mode, full_path,
                 honor_filemode=honor_filemode,
@@ -927,7 +926,7 @@ def index_entry_from_directory(st, path: bytes) -> Opt
 
 
 def index_entry_from_path(
-        path: bytes, object_store: Optional["BaseObjectStore"] = None
+        path: bytes, object_store: Optional[ObjectContainer] = None
 ) -> Optional[IndexEntry]:
     """Create an index from a filesystem path.
 
@@ -957,7 +956,7 @@ def index_entry_from_path(
 
 def iter_fresh_entries(
     paths: Iterable[bytes], root_path: bytes,
-    object_store: Optional["BaseObjectStore"] = None
+    object_store: Optional[ObjectContainer] = None
 ) -> Iterator[Tuple[bytes, Optional[IndexEntry]]]:
     """Iterate over current versions of index entries on disk.
 
blob - 7060795636f2266217fae6be5c69bffcdd7387de
blob + 44a028dfd1c096769c1965e49fb8cca8f246cfdb
--- dulwich/line_ending.py
+++ dulwich/line_ending.py
@@ -136,6 +136,7 @@ Sources:
 - https://adaptivepatchwork.com/2012/03/01/mind-the-end-of-your-line/
 """
 
+from dulwich.object_store import iter_tree_contents
 from dulwich.objects import Blob
 from dulwich.patch import is_binary
 
@@ -290,7 +291,7 @@ class TreeBlobNormalizer(BlobNormalizer):
         if tree:
             self.existing_paths = {
                 name
-                for name, _, _ in object_store.iter_tree_contents(tree)
+                for name, _, _ in iter_tree_contents(object_store, tree)
             }
         else:
             self.existing_paths = set()
blob - 9e2eb72057ef6f2f61542ab5530dcbf6cca4fabd
blob + d91771e9710ddfbf1e9db3650492dccaaec683c0
--- dulwich/object_store.py
+++ dulwich/object_store.py
@@ -26,13 +26,10 @@ from io import BytesIO
 import os
 import stat
 import sys
+import warnings
 
-from typing import Callable, Dict, List, Optional, Tuple
+from typing import Callable, Dict, List, Optional, Tuple, Protocol, Union, Iterator, Set
 
-from dulwich.diff_tree import (
-    tree_changes,
-    walk_trees,
-)
 from dulwich.errors import (
     NotTreeError,
 )
@@ -48,10 +45,12 @@ from dulwich.objects import (
     sha_to_hex,
     hex_to_filename,
     S_ISGITLINK,
+    TreeEntry,
     object_class,
     valid_hexsha,
 )
 from dulwich.pack import (
+    ObjectContainer,
     Pack,
     PackData,
     PackInflater,
@@ -79,6 +78,14 @@ PACKDIR = "pack"
 PACK_MODE = 0o444 if sys.platform != "win32" else 0o644
 
 
+class PackContainer(Protocol):
+
+    def add_pack(
+        self
+    ) -> Tuple[BytesIO, Callable[[], None], Callable[[], None]]:
+        """Add a new pack."""
+
+
 class BaseObjectStore:
     """Object store interface."""
 
@@ -213,6 +220,8 @@ class BaseObjectStore:
         Returns: Iterator over tuples with
             (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
         """
+
+        from dulwich.diff_tree import tree_changes
         for change in tree_changes(
             self,
             source,
@@ -239,11 +248,10 @@ class BaseObjectStore:
         Returns: Iterator over TreeEntry namedtuples for all the objects in a
             tree.
         """
-        for entry, _ in walk_trees(self, tree_id, None):
-            if (
-                entry.mode is not None and not stat.S_ISDIR(entry.mode)
-            ) or include_trees:
-                yield entry
+        warnings.warn(
+            "Please use dulwich.object_store.iter_tree_contents",
+            DeprecationWarning, stacklevel=2)
+        return iter_tree_contents(self, tree_id, include_trees=include_trees)
 
     def find_missing_objects(
         self,
@@ -334,47 +342,10 @@ class BaseObjectStore:
             intermediate tags; if the original ref does not point to a tag,
             this will equal the original SHA1.
         """
-        obj = self[sha]
-        obj_class = object_class(obj.type_name)
-        while obj_class is Tag:
-            obj_class, sha = obj.object
-            obj = self[sha]
-        return obj
-
-    def _collect_ancestors(
-        self,
-        heads,
-        common=frozenset(),
-        shallow=frozenset(),
-        get_parents=lambda commit: commit.parents,
-    ):
-        """Collect all ancestors of heads up to (excluding) those in common.
-
-        Args:
-          heads: commits to start from
-          common: commits to end at, or empty set to walk repository
-            completely
-          get_parents: Optional function for getting the parents of a
-            commit.
-        Returns: a tuple (A, B) where A - all commits reachable
-            from heads but not present in common, B - common (shared) elements
-            that are directly reachable from heads
-        """
-        bases = set()
-        commits = set()
-        queue = []
-        queue.extend(heads)
-        while queue:
-            e = queue.pop(0)
-            if e in common:
-                bases.add(e)
-            elif e not in commits:
-                commits.add(e)
-                if e in shallow:
-                    continue
-                cmt = self[e]
-                queue.extend(get_parents(cmt))
-        return (commits, bases)
+        warnings.warn(
+            "Please use dulwich.object_store.peel_sha()",
+            DeprecationWarning, stacklevel=2)
+        return peel_sha(self, sha)
 
     def _get_depth(
         self, head, get_parents=lambda commit: commit.parents, max_depth=None,
@@ -588,6 +559,46 @@ class PackBasedObjectStore(BaseObjectStore):
         for alternate in self.alternates:
             try:
                 return alternate.get_raw(hexsha)
+            except KeyError:
+                pass
+        raise KeyError(hexsha)
+
+    def get_raw_unresolved(self, name: bytes) -> Tuple[int, Union[bytes, None], List[bytes]]:
+        """Obtain the unresolved data for an object.
+
+        Args:
+          name: sha for the object.
+        """
+        if name == ZERO_SHA:
+            raise KeyError(name)
+        if len(name) == 40:
+            sha = hex_to_sha(name)
+            hexsha = name
+        elif len(name) == 20:
+            sha = name
+            hexsha = None
+        else:
+            raise AssertionError("Invalid object name {!r}".format(name))
+        for pack in self._iter_cached_packs():
+            try:
+                return pack.get_raw_unresolved(sha)
+            except (KeyError, PackFileDisappeared):
+                pass
+        if hexsha is None:
+            hexsha = sha_to_hex(name)
+        ret = self._get_loose_object(hexsha)
+        if ret is not None:
+            return ret.type_num, None, ret.as_raw_chunks()
+        # Maybe something else has added a pack with the object
+        # in the mean time?
+        for pack in self._update_pack_cache():
+            try:
+                return pack.get_raw_unresolved(sha)
+            except KeyError:
+                pass
+        for alternate in self.alternates:
+            try:
+                return alternate.get_raw_unresolved(hexsha)
             except KeyError:
                 pass
         raise KeyError(hexsha)
@@ -1083,10 +1094,10 @@ class MemoryObjectStore(BaseObjectStore):
             commit()
 
 
-class ObjectIterator:
+class ObjectIterator(Protocol):
     """Interface for iterating over objects."""
 
-    def iterobjects(self):
+    def iterobjects(self) -> Iterator[ShaFile]:
         raise NotImplementedError(self.iterobjects)
 
 
@@ -1178,7 +1189,7 @@ def tree_lookup_path(lookup_obj, root_sha, path):
     return tree.lookup_path(lookup_obj, path)
 
 
-def _collect_filetree_revs(obj_store, tree_sha, kset):
+def _collect_filetree_revs(obj_store: ObjectContainer, tree_sha: ObjectID, kset: Set[ObjectID]) -> None:
     """Collect SHA1s of files and directories for specified tree.
 
     Args:
@@ -1187,6 +1198,7 @@ def _collect_filetree_revs(obj_store, tree_sha, kset):
       kset: set to fill with references to files and directories
     """
     filetree = obj_store[tree_sha]
+    assert isinstance(filetree, Tree)
     for name, mode, sha in filetree.iteritems():
         if not S_ISGITLINK(mode) and sha not in kset:
             kset.add(sha)
@@ -1194,7 +1206,7 @@ def _collect_filetree_revs(obj_store, tree_sha, kset):
                 _collect_filetree_revs(obj_store, sha, kset)
 
 
-def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
+def _split_commits_and_tags(obj_store: ObjectContainer, lst, *, ignore_unknown=False) -> Tuple[Set[bytes], Set[bytes], Set[bytes]]:
     """Split object id list into three lists with commit, tag, and other SHAs.
 
     Commits referenced by tags are included into commits
@@ -1209,9 +1221,9 @@ def _split_commits_and_tags(obj_store, lst, ignore_unk
         silently.
     Returns: A tuple of (commits, tags, others) SHA1s
     """
-    commits = set()
-    tags = set()
-    others = set()
+    commits: Set[bytes] = set()
+    tags: Set[bytes] = set()
+    others: Set[bytes] = set()
     for e in lst:
         try:
             o = obj_store[e]
@@ -1224,12 +1236,12 @@ def _split_commits_and_tags(obj_store, lst, ignore_unk
             elif isinstance(o, Tag):
                 tags.add(e)
                 tagged = o.object[1]
-                c, t, o = _split_commits_and_tags(
+                c, t, os = _split_commits_and_tags(
                     obj_store, [tagged], ignore_unknown=ignore_unknown
                 )
                 commits |= c
                 tags |= t
-                others |= o
+                others |= os
             else:
                 others.add(e)
     return (commits, tags, others)
@@ -1270,20 +1282,22 @@ class MissingObjectFinder:
         # wants shall list only known SHAs, and otherwise
         # _split_commits_and_tags fails with KeyError
         have_commits, have_tags, have_others = _split_commits_and_tags(
-            object_store, haves, True
+            object_store, haves, ignore_unknown=True
         )
         want_commits, want_tags, want_others = _split_commits_and_tags(
-            object_store, wants, False
+            object_store, wants, ignore_unknown=False
         )
         # all_ancestors is a set of commits that shall not be sent
         # (complete repository up to 'haves')
-        all_ancestors = object_store._collect_ancestors(
+        all_ancestors = _collect_ancestors(
+            object_store,
             have_commits, shallow=shallow, get_parents=self._get_parents
         )[0]
         # all_missing - complete set of commits between haves and wants
         # common - commits from all_ancestors we hit into while
         # traversing parent hierarchy of wants
-        missing_commits, common_commits = object_store._collect_ancestors(
+        missing_commits, common_commits = _collect_ancestors(
+            object_store,
             want_commits,
             all_ancestors,
             shallow=shallow,
@@ -1606,3 +1620,85 @@ class BucketBasedObjectStore(PackBasedObjectStore):
             return final_pack
 
         return pf, commit, pf.close
+
+
+def _collect_ancestors(
+    store: ObjectContainer,
+    heads,
+    common=frozenset(),
+    shallow=frozenset(),
+    get_parents=lambda commit: commit.parents,
+):
+    """Collect all ancestors of heads up to (excluding) those in common.
+
+    Args:
+      heads: commits to start from
+      common: commits to end at, or empty set to walk repository
+        completely
+      get_parents: Optional function for getting the parents of a
+        commit.
+    Returns: a tuple (A, B) where A - all commits reachable
+        from heads but not present in common, B - common (shared) elements
+        that are directly reachable from heads
+    """
+    bases = set()
+    commits = set()
+    queue = []
+    queue.extend(heads)
+    while queue:
+        e = queue.pop(0)
+        if e in common:
+            bases.add(e)
+        elif e not in commits:
+            commits.add(e)
+            if e in shallow:
+                continue
+            cmt = store[e]
+            queue.extend(get_parents(cmt))
+    return (commits, bases)
+
+
+def iter_tree_contents(
+        store: ObjectContainer, tree_id: bytes, *, include_trees: bool = False):
+    """Iterate the contents of a tree and all subtrees.
+
+    Iteration is depth-first pre-order, as in e.g. os.walk.
+
+    Args:
+      tree_id: SHA1 of the tree.
+      include_trees: If True, include tree objects in the iteration.
+    Returns: Iterator over TreeEntry namedtuples for all the objects in a
+        tree.
+    """
+    # This could be fairly easily generalized to >2 trees if we find a use
+    # case.
+    todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
+    while todo:
+        entry = todo.pop()
+        if stat.S_ISDIR(entry.mode):
+            extra = []
+            tree = store[entry.sha]
+            assert isinstance(tree, Tree)
+            for subentry in tree.iteritems(name_order=True):
+                extra.append(subentry.in_path(entry.path))
+            todo.extend(reversed(extra))
+        if not stat.S_ISDIR(entry.mode) or include_trees:
+            yield entry
+
+
+def peel_sha(store: ObjectContainer, sha: bytes) -> ShaFile:
+    """Peel all tags from a SHA.
+
+    Args:
+      sha: The object SHA to peel.
+    Returns: The fully-peeled SHA1 of a tag object, after peeling all
+        intermediate tags; if the original ref does not point to a tag,
+        this will equal the original SHA1.
+    """
+    obj = store[sha]
+    obj_class = object_class(obj.type_name)
+    while obj_class is Tag:
+        assert isinstance(obj, Tag)
+        obj_class, sha = obj.object
+        obj = store[sha]
+    return obj
blob - ee15fa1377a0669cab618f931812671b6c291338
blob + 1fd4feb6ac3adacc2e41fb828b530cd6a13028e5
--- dulwich/pack.py
+++ dulwich/pack.py
@@ -49,7 +49,7 @@ from itertools import chain
 
 import os
 import sys
-from typing import Optional, Callable, Tuple, List, Deque, Union
+from typing import Optional, Callable, Tuple, List, Deque, Union, Protocol, Iterable, Iterator
 import warnings
 
 from hashlib import sha1
@@ -94,6 +94,34 @@ DELTA_TYPES = (OFS_DELTA, REF_DELTA)
 
 
 DEFAULT_PACK_DELTA_WINDOW_SIZE = 10
+
+
+class ObjectContainer(Protocol):
+
+    def add_object(self, obj: ShaFile) -> None:
+        """Add a single object to this object store."""
+
+    def add_objects(
+            self, objects: Iterable[Tuple[ShaFile, Optional[str]]],
+            progress: Optional[Callable[[str], None]] = None) -> None:
+        """Add a set of objects to this object store.
+
+        Args:
+          objects: Iterable over a list of (object, path) tuples
+        """
+
+    def __contains__(self, sha1: bytes) -> bool:
+        """Check if a hex sha is present."""
+
+    def __getitem__(self, sha1: bytes) -> ShaFile:
+        """Retrieve an object."""
+
+
+class PackedObjectContainer(ObjectContainer):
+
+    def get_raw_unresolved(self, sha1: bytes) -> Tuple[int, Union[bytes, None], List[bytes]]:
+        """Get a raw unresolved object."""
+        raise NotImplementedError(self.get_raw_unresolved)
 
 
 def take_msb_bytes(read: Callable[[int], bytes], crc32: Optional[int] = None) -> Tuple[List[int], Optional[int]]:
@@ -513,7 +541,7 @@ class FilePackIndex(PackIndex):
             self._contents, self._size = (contents, size)
 
     @property
-    def path(self):
+    def path(self) -> str:
         return self._filename
 
     def __eq__(self, other):
@@ -526,16 +554,16 @@ class FilePackIndex(PackIndex):
 
         return super().__eq__(other)
 
-    def close(self):
+    def close(self) -> None:
         self._file.close()
         if getattr(self._contents, "close", None) is not None:
             self._contents.close()
 
-    def __len__(self):
+    def __len__(self) -> int:
         """Return the number of entries in this pack index."""
         return self._fan_out_table[-1]
 
-    def _unpack_entry(self, i):
+    def _unpack_entry(self, i: int) -> Tuple[bytes, int, Optional[int]]:
         """Unpack the i-th entry in the index file.
 
         Returns: Tuple with object name (SHA), offset in pack file and CRC32
@@ -555,11 +583,11 @@ class FilePackIndex(PackIndex):
         """Unpack the crc32 checksum for the ith object from the index file."""
         raise NotImplementedError(self._unpack_crc32_checksum)
 
-    def _itersha(self):
+    def _itersha(self) -> Iterator[bytes]:
         for i in range(len(self)):
             yield self._unpack_name(i)
 
-    def iterentries(self):
+    def iterentries(self) -> Iterator[Tuple[bytes, int, Optional[int]]]:
         """Iterate over the entries in this pack index.
 
         Returns: iterator over tuples with object name, offset in packfile and
@@ -568,7 +596,7 @@ class FilePackIndex(PackIndex):
         for i in range(len(self)):
             yield self._unpack_entry(i)
 
-    def _read_fan_out_table(self, start_offset):
+    def _read_fan_out_table(self, start_offset: int):
         ret = []
         for i in range(0x100):
             fanout_entry = self._contents[
@@ -577,35 +605,35 @@ class FilePackIndex(PackIndex):
             ret.append(struct.unpack(">L", fanout_entry)[0])
         return ret
 
-    def check(self):
+    def check(self) -> None:
         """Check that the stored checksum matches the actual checksum."""
         actual = self.calculate_checksum()
         stored = self.get_stored_checksum()
         if actual != stored:
             raise ChecksumMismatch(stored, actual)
 
-    def calculate_checksum(self):
+    def calculate_checksum(self) -> bytes:
         """Calculate the SHA1 checksum over this pack index.
 
         Returns: This is a 20-byte binary digest
         """
         return sha1(self._contents[:-20]).digest()
 
-    def get_pack_checksum(self):
+    def get_pack_checksum(self) -> bytes:
         """Return the SHA1 checksum stored for the corresponding packfile.
 
         Returns: 20-byte binary digest
         """
         return bytes(self._contents[-40:-20])
 
-    def get_stored_checksum(self):
+    def get_stored_checksum(self) -> bytes:
         """Return the SHA1 checksum stored for this index.
 
         Returns: 20-byte binary digest
         """
         return bytes(self._contents[-20:])
 
-    def object_index(self, sha):
+    def object_index(self, sha: bytes) -> int:
         """Return the index in to the corresponding packfile for the object.
 
         Given the name of an object it will return the offset that object
@@ -644,7 +672,7 @@ class FilePackIndex(PackIndex):
 class PackIndex1(FilePackIndex):
     """Version 1 Pack Index file."""
 
-    def __init__(self, filename, file=None, contents=None, size=None):
+    def __init__(self, filename: str, file=None, contents=None, size=None):
         super().__init__(filename, file, contents, size)
         self.version = 1
         self._fan_out_table = self._read_fan_out_table(0)
@@ -669,7 +697,7 @@ class PackIndex1(FilePackIndex):
 class PackIndex2(FilePackIndex):
     """Version 2 Pack Index file."""
 
-    def __init__(self, filename, file=None, contents=None, size=None):
+    def __init__(self, filename: str, file=None, contents=None, size=None):
         super().__init__(filename, file, contents, size)
         if self._contents[:4] != b"\377tOc":
             raise AssertionError("Not a v2 pack index file")
@@ -707,7 +735,7 @@ class PackIndex2(FilePackIndex):
         return unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
 
 
-def read_pack_header(read):
+def read_pack_header(read) -> Tuple[Optional[int], Optional[int]]:
     """Read the header of a pack file.
 
     Args:
@@ -727,7 +755,7 @@ def read_pack_header(read):
     return (version, num_objects)
 
 
-def chunks_length(chunks):
+def chunks_length(chunks: Union[bytes, Iterable[bytes]]) -> int:
     if isinstance(chunks, bytes):
         return len(chunks)
     else:
@@ -740,7 +768,7 @@ def unpack_object(
     compute_crc32=False,
     include_comp=False,
     zlib_bufsize=_ZLIB_BUFSIZE,
-):
+) -> Tuple[UnpackedObject, bytes]:
     """Unpack a Git object.
 
     Args:
@@ -1596,12 +1624,13 @@ def write_pack_object(write, type, object, sha=None, c
 
 
 def write_pack(
-    filename,
-    objects,
-    deltify=None,
-    delta_window_size=None,
-    compression_level=-1,
-):
+        filename,
+        objects,
+        *,
+        deltify: Optional[bool] = None,
+        delta_window_size: Optional[int] = None,
+        compression_level: int = -1,
+        reuse_pack: Optional[PackedObjectContainer] = None):
     """Write a new pack data file.
 
     Args:
@@ -1619,6 +1648,7 @@ def write_pack(
             delta_window_size=delta_window_size,
             deltify=deltify,
             compression_level=compression_level,
+            reuse_pack=reuse_pack,
         )
     entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
     with GitFile(filename + ".idx", "wb") as f:
@@ -1643,7 +1673,10 @@ def write_pack_header(write, num_objects):
         write(chunk)
 
 
-def deltify_pack_objects(objects, window_size: Optional[int] = None, reuse_pack=None):
+def deltify_pack_objects(
+        objects: Iterable[Tuple[ShaFile, str]],
+        window_size: Optional[int] = None,
+        reuse_pack: Optional[PackedObjectContainer] = None):
     """Generate deltas for pack objects.
 
     Args:
@@ -1685,7 +1718,7 @@ def deltify_pack_objects(objects, window_size: Optiona
         magic.append((obj.type_num, path, -obj.raw_length(), obj))
     magic.sort()
 
-    possible_bases: Deque[Tuple[bytes, int, bytes]] = deque()
+    possible_bases: Deque[Tuple[bytes, int, List[bytes]]] = deque()
 
     for type_num, path, neg_length, o in magic:
         raw = o.as_raw_chunks()
@@ -1712,7 +1745,11 @@ def deltify_pack_objects(objects, window_size: Optiona
             possible_bases.pop()
 
 
-def pack_objects_to_data(objects):
+def pack_objects_to_data(
+        objects,
+        delta_window_size: Optional[int] = None,
+        deltify: Optional[bool] = None,
+        reuse_pack: Optional[PackedObjectContainer] = None):
     """Create pack data from objects
 
     Args:
@@ -1720,17 +1757,30 @@ def pack_objects_to_data(objects):
     Returns: Tuples with (type_num, hexdigest, delta base, object chunks)
     """
     count = len(objects)
-    return (
-        count,
-        (
-            (o.type_num, o.sha().digest(), None, o.as_raw_chunks())
-            for (o, path) in objects
-        ),
-    )
+    if deltify is None:
+        # PERFORMANCE/TODO(jelmer): This should be enabled but is *much* too
+        # slow at the moment.
+        deltify = False
+    if deltify:
+        pack_contents = deltify_pack_objects(
+            objects, window_size=delta_window_size, reuse_pack=reuse_pack)
+        return (count, pack_contents)
+    else:
+        return (
+            count,
+            (
+                (o.type_num, o.sha().digest(), None, o.as_raw_chunks())
+                for (o, path) in objects
+            ),
+        )
 
 
 def write_pack_objects(
-    write, objects, delta_window_size=None, deltify=None, reuse_pack=None, compression_level=-1
+        write, objects,
+        delta_window_size: Optional[int] = None,
+        deltify: Optional[bool] = None,
+        reuse_pack: Optional[PackedObjectContainer] = None,
+        compression_level: int = -1
 ):
     """Write a new pack data file.
 
@@ -1751,16 +1801,10 @@ def write_pack_objects(
             DeprecationWarning, stacklevel=2)
         write = write.write
 
-    if deltify is None:
-        # PERFORMANCE/TODO(jelmer): This should be enabled but is *much* too
-        # slow at the moment.
-        deltify = False
-    if deltify:
-        pack_contents = deltify_pack_objects(
-            objects, window_size=delta_window_size, reuse_pack=reuse_pack)
-        pack_contents_count = len(objects)
-    else:
-        pack_contents_count, pack_contents = pack_objects_to_data(objects)
+    pack_contents_count, pack_contents = pack_objects_to_data(
+        objects, delta_window_size=delta_window_size,
+        deltify=deltify,
+        reuse_pack=reuse_pack)
 
     return write_pack_data(
         write,
blob - bcae70e32166efc77b675b9fa5167f8c376101f1
blob + 3652e33424e0937c1a3cb683458755689c7d5951
--- dulwich/patch.py
+++ dulwich/patch.py
@@ -34,6 +34,7 @@ from dulwich.objects import (
     Commit,
     S_ISGITLINK,
 )
+from dulwich.pack import ObjectContainer
 
 FIRST_FEW_BYTES = 8000
 
@@ -192,7 +193,7 @@ def patch_filename(p, root):
         return root + b"/" + p
 
 
-def write_object_diff(f, store, old_file, new_file, diff_binary=False):
+def write_object_diff(f, store: ObjectContainer, old_file, new_file, diff_binary=False):
     """Write the diff for an object.
 
     Args:
blob - 850747722e33b4d032f087e7c1c18f0d22a6ff78
blob + 31d590f8a356e3a7829dfdf267e04c359bac0dcc
--- dulwich/refs.py
+++ dulwich/refs.py
@@ -37,6 +37,7 @@ from dulwich.objects import (
     Tag,
     ObjectID,
 )
+from dulwich.pack import ObjectContainer
 from dulwich.file import (
     GitFile,
     ensure_dir_exists,
@@ -1150,8 +1151,10 @@ def read_info_refs(f):
     return ret
 
 
-def write_info_refs(refs, store):
+def write_info_refs(refs, store: ObjectContainer):
     """Generate info refs."""
+    # Avoid recursive import :(
+    from dulwich.object_store import peel_sha
     for name, sha in sorted(refs.items()):
         # get_refs() includes HEAD as a special case, but we don't want to
         # advertise it
@@ -1161,7 +1164,7 @@ def write_info_refs(refs, store):
             o = store[sha]
         except KeyError:
             continue
-        peeled = store.peel_sha(sha)
+        peeled = peel_sha(store, sha)
         yield o.id + b"\t" + name + b"\n"
         if o.id != peeled.id:
             yield peeled.id + b"\t" + name + ANNOTATED_TAG_SUFFIX + b"\n"
blob - f8ccfeedd838706b5d182b6eb7b18de95abe58f3
blob + 53d889c29e74271cc59631a4ae5bab62f8254bb0
--- dulwich/repo.py
+++ dulwich/repo.py
@@ -72,6 +72,7 @@ from dulwich.object_store import (
     MemoryObjectStore,
     BaseObjectStore,
     ObjectStoreGraphWalker,
+    peel_sha,
 )
 from dulwich.objects import (
     check_hexsha,
@@ -757,7 +758,7 @@ class BaseRepo:
         cached = self.refs.get_peeled(ref)
         if cached is not None:
             return cached
-        return self.object_store.peel_sha(self.refs[ref]).id
+        return peel_sha(self.object_store, self.refs[ref]).id
 
     def get_walker(self, include: Optional[List[bytes]] = None,
                    *args, **kwargs):
blob - 43b23fe1012a34267186a6935608f68b40cfd746
blob + 53376d3a5859dbed1a819b3d1f69ec089c1d1b01
--- dulwich/server.py
+++ dulwich/server.py
@@ -47,7 +47,7 @@ import os
 import socket
 import sys
 import time
-from typing import List, Tuple, Dict, Optional, Iterable
+from typing import List, Tuple, Dict, Optional, Iterable, Set
 import zlib
 
 import socketserver
@@ -66,9 +66,13 @@ from dulwich import log_utils
 from dulwich.objects import (
     Commit,
     valid_hexsha,
+)
+from dulwich.object_store import (
+    peel_sha,
 )
 from dulwich.pack import (
     write_pack_objects,
+    ObjectContainer,
 )
 from dulwich.protocol import (
     BufferedPktLineWriter,
@@ -456,7 +460,7 @@ def _split_proto_line(line, allowed):
     raise GitProtocolError("Received invalid line from client: %r" % line)
 
 
-def _find_shallow(store, heads, depth):
+def _find_shallow(store: ObjectContainer, heads, depth):
     """Find shallow commits according to a given depth.
 
     Args:
@@ -468,7 +472,7 @@ def _find_shallow(store, heads, depth):
         considered shallow and unshallow according to the arguments. Note that
         these sets may overlap if a commit is reachable along multiple paths.
     """
-    parents = {}
+    parents: Dict[bytes, List[bytes]] = {}
 
     def get_parents(sha):
         result = parents.get(sha, None)
@@ -479,7 +483,7 @@ def _find_shallow(store, heads, depth):
 
     todo = []  # stack of (sha, depth)
     for head_sha in heads:
-        obj = store.peel_sha(head_sha)
+        obj = peel_sha(store, head_sha)
         if isinstance(obj, Commit):
             todo.append((obj.id, 1))
 
@@ -497,7 +501,7 @@ def _find_shallow(store, heads, depth):
     return shallow, not_shallow
 
 
-def _want_satisfied(store, haves, want, earliest):
+def _want_satisfied(store: ObjectContainer, haves, want, earliest):
     o = store[want]
     pending = collections.deque([o])
     known = {want}
@@ -505,7 +509,7 @@ def _want_satisfied(store, haves, want, earliest):
         commit = pending.popleft()
         if commit.id in haves:
             return True
-        if commit.type_name != b"commit":
+        if not isinstance(commit, Commit):
             # non-commit wants are assumed to be satisfied
             continue
         for parent in commit.parents:
@@ -513,13 +517,14 @@ def _want_satisfied(store, haves, want, earliest):
                 continue
             known.add(parent)
             parent_obj = store[parent]
+            assert isinstance(parent_obj, Commit)
             # TODO: handle parents with later commit times than children
             if parent_obj.commit_time >= earliest:
                 pending.append(parent_obj)
     return False
 
 
-def _all_wants_satisfied(store, haves, wants):
+def _all_wants_satisfied(store: ObjectContainer, haves, wants):
     """Check whether all the current wants are satisfied by a set of haves.
 
     Args:
@@ -531,7 +536,8 @@ def _all_wants_satisfied(store, haves, wants):
     """
     haves = set(haves)
     if haves:
-        earliest = min([store[h].commit_time for h in haves])
+        have_objs = [store[h] for h in haves]
+        earliest = min([h.commit_time for h in have_objs if isinstance(h, Commit)])
     else:
         earliest = 0
     for want in wants:
@@ -555,20 +561,20 @@ class _ProtocolGraphWalker:
     any calls to next() or ack() are made.
     """
 
-    def __init__(self, handler, object_store, get_peeled, get_symrefs):
+    def __init__(self, handler, object_store: ObjectContainer, get_peeled, get_symrefs):
         self.handler = handler
-        self.store = object_store
+        self.store: ObjectContainer = object_store
         self.get_peeled = get_peeled
         self.get_symrefs = get_symrefs
         self.proto = handler.proto
         self.stateless_rpc = handler.stateless_rpc
         self.advertise_refs = handler.advertise_refs
-        self._wants = []
-        self.shallow = set()
-        self.client_shallow = set()
-        self.unshallow = set()
+        self._wants: List[bytes] = []
+        self.shallow: Set[bytes] = set()
+        self.client_shallow: Set[bytes] = set()
+        self.unshallow: Set[bytes] = set()
         self._cached = False
-        self._cache = []
+        self._cache: List[bytes] = []
         self._cache_index = 0
         self._impl = None
 
@@ -1104,7 +1110,7 @@ class UploadArchiveHandler(Handler):
         prefix = b""
         format = "tar"
         i = 0
-        store = self.repo.object_store
+        store: ObjectContainer = self.repo.object_store
         while i < len(arguments):
             argument = arguments[i]
             if argument == b"--prefix":
blob - dc98d8792450299ff187445bbf197eda758a6580
blob + 5bc44c27cdd444437b8d31b0fba3b9b086b65ed5
--- dulwich/submodule.py
+++ dulwich/submodule.py
@@ -22,6 +22,7 @@
 """
 
 from typing import Iterator, Tuple
+from .object_store import iter_tree_contents
 from .objects import S_ISGITLINK
 
 
@@ -35,6 +36,6 @@ def iter_cached_submodules(store, root_tree_id: bytes)
     Returns:
       Iterator over over (path, sha) tuples
     """
-    for entry in store.iter_tree_contents(root_tree_id):
+    for entry in iter_tree_contents(store, root_tree_id):
         if S_ISGITLINK(entry.mode):
             yield entry.path, entry.sha
blob - be067b5110c14e61c9bd63d26e469b5346930641
blob + e4f8c537dfcdf5f82ca0d9786cc77897e55fda70
--- dulwich/tests/test_object_store.py
+++ dulwich/tests/test_object_store.py
@@ -51,6 +51,8 @@ from dulwich.object_store import (
     OverlayObjectStore,
     ObjectStoreGraphWalker,
     commit_tree_changes,
+    iter_tree_contents,
+    peel_sha,
     read_packs_file,
     tree_lookup_path,
 )
@@ -219,7 +221,7 @@ class ObjectStoreTests:
         tree_id = commit_tree(self.store, blobs)
         self.assertEqual(
             [TreeEntry(p, m, h) for (p, h, m) in blobs],
-            list(self.store.iter_tree_contents(tree_id)),
+            list(iter_tree_contents(self.store, tree_id)),
         )
 
     def test_iter_tree_contents_include_trees(self):
@@ -247,7 +249,7 @@ class ObjectStoreTests:
             TreeEntry(b"ad/bd", 0o040000, tree_bd.id),
             TreeEntry(b"ad/bd/c", 0o100755, blob_c.id),
         ]
-        actual = self.store.iter_tree_contents(tree_id, include_trees=True)
+        actual = iter_tree_contents(self.store, tree_id, include_trees=True)
         self.assertEqual(expected, list(actual))
 
     def make_tag(self, name, obj):
@@ -261,7 +263,7 @@ class ObjectStoreTests:
         tag2 = self.make_tag(b"2", testobject)
         tag3 = self.make_tag(b"3", testobject)
         for obj in [testobject, tag1, tag2, tag3]:
-            self.assertEqual(testobject, self.store.peel_sha(obj.id))
+            self.assertEqual(testobject, peel_sha(self.store, obj.id))
 
     def test_get_raw(self):
         self.store.add_object(testobject)