commit - d58e9cf00fa278b47713604710c3fd40c7676008
commit + c18863d65ee8d1e668f14bea3e8a48978b364353
blob - 6bcc9918cc886bf27d15dd95282277a824ac17a6
blob + a3edd9f5fce0bb37afa8a474925239e96b2485a3
--- .github/workflows/pythontest.yml
+++ .github/workflows/pythontest.yml
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- pip install -U pip coverage flake8 fastimport paramiko urllib3
+ pip install -U ".[fastimport,paramiko,https]"
- name: Install gpg on supported platforms
- run: pip install -U gpg
+ run: pip install -U ".[pgp]"
if: "matrix.os != 'windows-latest' && matrix.python-version != 'pypy3'"
- - name: Install mypy
- run: |
- pip install -U mypy types-paramiko types-requests
- if: "matrix.python-version != 'pypy3'"
- name: Style checks
run: |
+ pip install -U flake8
python -m flake8
- name: Typing checks
run: |
+ pip install -U mypy types-paramiko types-requests
python -m mypy dulwich
if: "matrix.python-version != 'pypy3'"
- name: Build
python setup.py build_ext -i
- name: Coverage test suite run
run: |
+ pip install -U coverage
python -m coverage run -p -m unittest dulwich.tests.test_suite
blob - 046fa71df671632ecde0c64a7f23b8f4be0429dc
blob + bfef77b97ba93e33a3bc24a861ee1e1dc3001fde
--- NEWS
+++ NEWS
-0.20.51 UNRELEASED
+0.21.0 UNRELEASED
+
+ * Pack internals have been significantly refactored, including
+ significant low-level API changes.
+
+ As a consequence of this, Dulwich now reuses pack deltas
+ when communicating with remote servers, which brings a
+ big boost to network performance.
+ (Jelmer Vernooij)
0.20.50 2022-10-30
blob - 94fd0a480d08527d185cbb3443f5f520a1a07ef7
blob + 65d0f03cdbe4cd204a096ea2902074a7c4a88398
--- docs/tutorial/remote.txt
+++ docs/tutorial/remote.txt
>>> class DummyGraphWalker(object):
... def ack(self, sha): pass
+ ... def nak(self): pass
... def next(self): pass
... def __next__(self): pass
blob - ca08b38b030d51253ea59c74f53726a7400c4c9d
blob + 4f3b573cadc1a2233d17d7a2d65979c3267ccda1
--- dulwich/__init__.py
+++ dulwich/__init__.py
"""Python implementation of the Git file formats and protocols."""
-__version__ = (0, 20, 50)
+__version__ = (0, 21, 0)
blob - 7e9e94897cc6f45f0764ffe94fcb74c9b57b57d9
blob + 52dd47408048903e9b085e5e14b4c1d20491bf35
--- dulwich/bundle.py
+++ dulwich/bundle.py
references: Dict[str, bytes] = {}
pack_data: Union[PackData, Sequence[bytes]] = []
+ def __repr__(self):
+ return (f"<{type(self).__name__}(version={self.version}, "
+ f"capabilities={self.capabilities}, "
+ f"prerequisites={self.prerequisites}, "
+ f"references={self.references})>")
+
def __eq__(self, other):
if not isinstance(other, type(self)):
return False
for ref, obj_id in bundle.references.items():
f.write(b"%s %s\n" % (obj_id, ref))
f.write(b"\n")
- write_pack_data(f.write, records=bundle.pack_data)
+ write_pack_data(f.write, num_records=len(bundle.pack_data), records=bundle.pack_data.iter_unpacked())
blob - 1f2e12fdc323b7f4c0176729bef23e3def6ca4ed
blob + 2595a2d28d8420bfdb25de6670bc1d282fc41ca1
--- dulwich/cli.py
+++ dulwich/cli.py
from typing import Dict, Type, Optional
from dulwich import porcelain
-from dulwich.client import get_transport_and_path
+from dulwich.client import get_transport_and_path, GitProtocolError
from dulwich.errors import ApplyDeltaError
from dulwich.index import Index
from dulwich.objectspec import parse_commit
else:
target = None
- porcelain.clone(source, target, bare=options.bare, depth=options.depth,
- branch=options.branch)
+ try:
+ porcelain.clone(source, target, bare=options.bare, depth=options.depth,
+ branch=options.branch)
+ except GitProtocolError as e:
+ print("%s" % e)
class cmd_commit(Command):
blob - 660e98925d9e6af12a6d56522f8d41d2bec52645
blob + 19122b8b7f23662036bce5954a323c9df203e90f
--- dulwich/client.py
+++ dulwich/client.py
Callable,
Dict,
List,
+ Iterable,
+ Iterator,
Optional,
Set,
Tuple,
pkt_line,
)
from dulwich.pack import (
- write_pack_objects,
+ write_pack_from_container,
+ UnpackedObject,
PackChunkGenerator,
)
from dulwich.refs import (
yield None
-def _read_side_band64k_data(pkt_seq, channel_callbacks):
+def _read_side_band64k_data(pkt_seq: Iterable[bytes], channel_callbacks: Dict[int, Callable[[bytes], None]]) -> None:
"""Read per-channel data.
This requires the side-band-64k capability.
def _handle_upload_pack_tail(
proto,
- capabilities,
+ capabilities: Set[bytes],
graph_walker,
- pack_data,
+ pack_data: Callable[[bytes], None],
progress=None,
rbufsize=_RBUFSIZE,
):
parts = pkt.rstrip(b"\n").split(b" ")
if parts[0] == b"ACK":
graph_walker.ack(parts[1])
+ if parts[0] == b"NAK":
+ graph_walker.nak()
if len(parts) < 3 or parts[2] not in (
b"ready",
b"continue",
"""
raise NotImplementedError(cls.from_parsedurl)
- def send_pack(self, path, update_refs, generate_pack_data, progress=None):
+ def send_pack(self, path, update_refs, generate_pack_data: Callable[[Set[bytes], Set[bytes], bool], Tuple[int, Iterator[UnpackedObject]]], progress=None):
"""Upload a pack to a remote repository.
Args:
determine_wants,
graph_walker,
pack_data,
+ *,
progress=None,
depth=None,
):
header_handler.have,
header_handler.want,
ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities),
+ progress=progress,
)
if self._should_send_pack(new_refs):
- for chunk in PackChunkGenerator(pack_data_count, pack_data):
+ for chunk in PackChunkGenerator(pack_data_count, pack_data, progress=progress):
proto.write(chunk)
ref_status = self._handle_receive_pack_tail(
"""
with self._open_repo(path) as r:
- objects_iter = r.fetch_objects(
+ missing_objects = r.find_missing_objects(
determine_wants, graph_walker, progress=progress, depth=depth
)
+ other_haves = missing_objects.get_remote_has()
+ object_ids = list(missing_objects)
symrefs = r.refs.get_symrefs()
agent = agent_string()
# Did the process short-circuit (e.g. in a stateless RPC call)?
# Note that the client still expects a 0-object pack in most cases.
- if objects_iter is None:
+ if object_ids is None:
return FetchPackResult(None, symrefs, agent)
- write_pack_objects(pack_data, objects_iter, reuse_pack=r.object_store)
+ write_pack_from_container(pack_data, r.object_store, object_ids, other_haves=other_haves)
return FetchPackResult(r.get_refs(), symrefs, agent)
def get_refs(self, path):
blob - d97e2374e3d0cf591f20cb0d0914c7ddbd51315c
blob + e2f9625a256c98145d615e4932a54ebc082a9d2a
--- dulwich/contrib/swift.py
+++ dulwich/contrib/swift.py
from dulwich.greenthreads import (
GreenThreadsMissingObjectFinder,
- GreenThreadsObjectStoreIterator,
)
from dulwich.lru_cache import LRUSizeCache
"""
-class PackInfoObjectStoreIterator(GreenThreadsObjectStoreIterator):
- def __len__(self):
- while self.finder.objects_to_send:
- for _ in range(0, len(self.finder.objects_to_send)):
- sha = self.finder.next()
- self._shas.append(sha)
- return len(self._shas)
-
-
class PackInfoMissingObjectFinder(GreenThreadsMissingObjectFinder):
def next(self):
while True:
"""Loose objects are not supported by this repository"""
return []
- def iter_shas(self, finder):
- """An iterator over pack's ObjectStore.
-
- Returns: a `ObjectStoreIterator` or `GreenThreadsObjectStoreIterator`
- instance if gevent is enabled
- """
- shas = iter(finder.next, None)
- return PackInfoObjectStoreIterator(self, shas, finder, self.scon.concurrency)
-
- def find_missing_objects(self, *args, **kwargs):
- kwargs["concurrency"] = self.scon.concurrency
- return PackInfoMissingObjectFinder(self, *args, **kwargs)
-
def pack_info_get(self, sha):
for pack in self.packs:
if sha in pack:
blob - 2bbb5e19fa548a4639fb279e18f7f24aa396a55b
blob + 4a56f46ef02872dde132dd716563f9041644c6d1
--- dulwich/greenthreads.py
+++ dulwich/greenthreads.py
MissingObjectFinder,
_collect_ancestors,
_collect_filetree_revs,
- ObjectStoreIterator,
)
else:
self.progress = progress
self._tagged = get_tagged and get_tagged() or {}
-
-
-class GreenThreadsObjectStoreIterator(ObjectStoreIterator):
- """ObjectIterator that works on top of an ObjectStore.
-
- Same implementation as object_store.ObjectStoreIterator
- except we use gevent to parallelize object retrieval.
- """
-
- def __init__(self, store, shas, finder, concurrency=1):
- self.finder = finder
- self.p = pool.Pool(size=concurrency)
- super().__init__(store, shas)
-
- def retrieve(self, args):
- sha, path = args
- return self.store[sha], path
-
- def __iter__(self):
- yield from self.p.imap_unordered(self.retrieve, self.itershas())
-
- def __len__(self):
- if len(self._shas) > 0:
- return len(self._shas)
- while self.finder.objects_to_send:
- jobs = []
- for _ in range(0, len(self.finder.objects_to_send)):
- jobs.append(self.p.spawn(self.finder.next))
- gevent.joinall(jobs)
- for j in jobs:
- if j.value is not None:
- self._shas.append(j.value)
- return len(self._shas)
blob - 1701cf2d7f1c8a91bcd73b64a42f1ccd38eaa62a
blob + 9201aefc4bdd2ebbc00dd0cc2a47bad55083510c
--- dulwich/object_store.py
+++ dulwich/object_store.py
import sys
import warnings
-from typing import Callable, Dict, List, Optional, Tuple, Protocol, Union, Iterator, Set
+from typing import Callable, Dict, List, Optional, Tuple, Iterator, Set, Iterable, Sequence, cast
+try:
+ from typing import Protocol
+except ImportError: # python << 3.8
+ from typing_extensions import Protocol # type: ignore
+
from dulwich.errors import (
NotTreeError,
)
ShaFile,
Tag,
Tree,
+ Blob,
ZERO_SHA,
hex_to_sha,
sha_to_hex,
ObjectContainer,
Pack,
PackData,
+ PackHint,
PackInflater,
PackFileDisappeared,
+ UnpackedObject,
load_pack_index_file,
iter_sha1,
+ full_unpacked_object,
+ generate_unpacked_objects,
pack_objects_to_data,
write_pack_header,
write_pack_index_v2,
compute_file_sha,
PackIndexer,
PackStreamCopier,
+ PackedObjectContainer,
)
from dulwich.protocol import DEPTH_INFINITE
from dulwich.refs import ANNOTATED_TAG_SUFFIX, Ref
and not sha == ZERO_SHA
]
- def iter_shas(self, shas):
- """Iterate over the objects for the specified shas.
-
- Args:
- shas: Iterable object with SHAs
- Returns: Object iterator
- """
- return ObjectStoreIterator(self, shas)
-
def contains_loose(self, sha):
"""Check if a particular object is present by SHA1 and is loose."""
raise NotImplementedError(self.contains_loose)
- def contains_packed(self, sha):
- """Check if a particular object is present by SHA1 and is packed."""
- raise NotImplementedError(self.contains_packed)
-
- def __contains__(self, sha):
+ def __contains__(self, sha1: bytes) -> bool:
"""Check if a particular object is present by SHA1.
This method makes no distinction between loose and packed objects.
"""
- return self.contains_packed(sha) or self.contains_loose(sha)
+ return self.contains_loose(sha1)
@property
def packs(self):
"""
raise NotImplementedError(self.get_raw)
- def __getitem__(self, sha: ObjectID):
+ def __getitem__(self, sha1: ObjectID) -> ShaFile:
"""Obtain an object by SHA1."""
- type_num, uncomp = self.get_raw(sha)
- return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
+ type_num, uncomp = self.get_raw(sha1)
+ return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)
def __iter__(self):
"""Iterate over the SHAs that are present in this store."""
raise NotImplementedError(self.__iter__)
- def add_pack(
- self
- ) -> Tuple[BytesIO, Callable[[], None], Callable[[], None]]:
- """Add a new pack to this object store."""
- raise NotImplementedError(self.add_pack)
-
def add_object(self, obj):
"""Add a single object to this object store."""
raise NotImplementedError(self.add_object)
"""
raise NotImplementedError(self.add_objects)
- def add_pack_data(self, count, pack_data, progress=None):
- """Add pack data to this object store.
-
- Args:
- count: Number of items to add
- pack_data: Iterator over pack data tuples
- """
- if count == 0:
- # Don't bother writing an empty pack file
- return
- f, commit, abort = self.add_pack()
- try:
- write_pack_data(
- f.write,
- count,
- pack_data,
- progress,
- compression_level=self.pack_compression_level,
- )
- except BaseException:
- abort()
- raise
- else:
- return commit()
-
def tree_changes(
self,
source,
DeprecationWarning, stacklevel=2)
return iter_tree_contents(self, tree_id, include_trees=include_trees)
- def find_missing_objects(
- self,
- haves,
- wants,
- shallow=None,
- progress=None,
- get_tagged=None,
- get_parents=lambda commit: commit.parents,
- ):
- """Find the missing objects required for a set of revisions.
+ def iterobjects_subset(self, shas: Iterable[bytes], *, allow_missing: bool = False) -> Iterator[ShaFile]:
+ for sha in shas:
+ try:
+ yield self[sha]
+ except KeyError:
+ if not allow_missing:
+ raise
- Args:
- haves: Iterable over SHAs already in common.
- wants: Iterable over SHAs of objects to fetch.
- shallow: Set of shallow commit SHA1s to skip
- progress: Simple progress function that will be called with
- updated progress strings.
- get_tagged: Function that returns a dict of pointed-to sha ->
- tag sha for including tags.
- get_parents: Optional function for getting the parents of a
- commit.
- Returns: Iterator over (sha, path) pairs.
- """
- warnings.warn(
- 'Please use MissingObjectFinder(store)', DeprecationWarning)
- finder = MissingObjectFinder(
- self,
- haves=haves,
- wants=wants,
- shallow=shallow,
- progress=progress,
- get_tagged=get_tagged,
- get_parents=get_parents,
- )
- return iter(finder)
-
def find_common_revisions(self, graphwalker):
"""Find which revisions this store has in common using graphwalker.
sha = next(graphwalker)
return haves
- def generate_pack_contents(self, have, want, shallow=None, progress=None):
- """Iterate over the contents of a pack file.
-
- Args:
- have: List of SHA1s of objects that should not be sent
- want: List of SHA1s of objects that should be sent
- shallow: Set of shallow commit SHA1s to skip
- progress: Optional progress reporting method
- """
- missing = MissingObjectFinder(
- self, haves=have, wants=want, shallow=shallow, progress=progress)
- return self.iter_shas(missing)
-
def generate_pack_data(
- self, have, want, shallow=None, progress=None, ofs_delta=True
- ):
+ self, have, want, shallow=None, progress=None,
+ ofs_delta=True
+ ) -> Tuple[int, Iterator[UnpackedObject]]:
"""Generate pack data objects for a set of wants/haves.
Args:
ofs_delta: Whether OFS deltas can be included
progress: Optional progress reporting method
"""
- # TODO(jelmer): More efficient implementation
+ # Note that the pack-specific implementation below is more efficient,
+ # as it reuses deltas
+ missing_objects = MissingObjectFinder(
+ self, haves=have, wants=want, shallow=shallow, progress=progress)
+ object_ids = list(missing_objects)
return pack_objects_to_data(
- self.generate_pack_contents(have, want, shallow, progress)
- )
+ [(self[oid], path) for oid, path in object_ids], ofs_delta=ofs_delta,
+ progress=progress)
def peel_sha(self, sha):
"""Peel all tags from a SHA.
self._pack_cache = {}
self.pack_compression_level = pack_compression_level
+ def add_pack(
+ self
+ ) -> Tuple[BytesIO, Callable[[], None], Callable[[], None]]:
+ """Add a new pack to this object store."""
+ raise NotImplementedError(self.add_pack)
+
+ def add_pack_data(self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None) -> None:
+ """Add pack data to this object store.
+
+ Args:
+ count: Number of items to add
+ pack_data: Iterator over pack data tuples
+ """
+ if count == 0:
+ # Don't bother writing an empty pack file
+ return
+ f, commit, abort = self.add_pack()
+ try:
+ write_pack_data(
+ f.write,
+ unpacked_objects,
+ num_records=count,
+ progress=progress,
+ compression_level=self.pack_compression_level,
+ )
+ except BaseException:
+ abort()
+ raise
+ else:
+ return commit()
+
@property
def alternates(self):
return []
if prev_pack:
prev_pack.close()
+ def generate_pack_data(
+ self, have, want, shallow=None, progress=None,
+ ofs_delta=True
+ ) -> Tuple[int, Iterator[UnpackedObject]]:
+ """Generate pack data objects for a set of wants/haves.
+
+ Args:
+ have: List of SHA1s of objects that should not be sent
+ want: List of SHA1s of objects that should be sent
+ shallow: Set of shallow commit SHA1s to skip
+ ofs_delta: Whether OFS deltas can be included
+ progress: Optional progress reporting method
+ """
+ missing_objects = MissingObjectFinder(
+ self, haves=have, wants=want, shallow=shallow, progress=progress)
+ remote_has = missing_objects.get_remote_has()
+ object_ids = list(missing_objects)
+ return len(object_ids), generate_unpacked_objects(
+ cast(PackedObjectContainer, self),
+ object_ids,
+ progress=progress,
+ ofs_delta=ofs_delta,
+ other_haves=remote_has)
+
def _clear_cached_packs(self):
pack_cache = self._pack_cache
self._pack_cache = {}
pass
raise KeyError(hexsha)
- def get_raw_unresolved(self, name: bytes) -> Tuple[int, Union[bytes, None], List[bytes]]:
- """Obtain the unresolved data for an object.
+ def iter_unpacked_subset(self, shas, *, include_comp=False, allow_missing: bool = False, convert_ofs_delta: bool = True) -> Iterator[ShaFile]:
+ todo: Set[bytes] = set(shas)
+ for p in self._iter_cached_packs():
+ for unpacked in p.iter_unpacked_subset(todo, include_comp=include_comp, allow_missing=True, convert_ofs_delta=convert_ofs_delta):
+ yield unpacked
+ hexsha = sha_to_hex(unpacked.sha())
+ todo.remove(hexsha)
+ # Maybe something else has added a pack with the object
+ # in the mean time?
+ for p in self._update_pack_cache():
+ for unpacked in p.iter_unpacked_subset(todo, include_comp=include_comp, allow_missing=True, convert_ofs_delta=convert_ofs_delta):
+ yield unpacked
+ hexsha = sha_to_hex(unpacked.sha())
+ todo.remove(hexsha)
+ for alternate in self.alternates:
+ for unpacked in alternate.iter_unpacked_subset(todo, include_comp=include_comp, allow_missing=True, convert_ofs_delta=convert_ofs_delta):
+ yield unpacked
+ hexsha = sha_to_hex(unpacked.sha())
+ todo.remove(hexsha)
- Args:
- name: sha for the object.
+ def iterobjects_subset(self, shas: Iterable[bytes], *, allow_missing: bool = False) -> Iterator[ShaFile]:
+ todo: Set[bytes] = set(shas)
+ for p in self._iter_cached_packs():
+ for o in p.iterobjects_subset(todo, allow_missing=True):
+ yield o
+ todo.remove(o.id)
+ # Maybe something else has added a pack with the object
+ # in the mean time?
+ for p in self._update_pack_cache():
+ for o in p.iterobjects_subset(todo, allow_missing=True):
+ yield o
+ todo.remove(o.id)
+ for alternate in self.alternates:
+ for o in alternate.iterobjects_subset(todo, allow_missing=True):
+ yield o
+ todo.remove(o.id)
+ for oid in todo:
+ o = self._get_loose_object(oid)
+ if o is not None:
+ yield o
+ elif not allow_missing:
+ raise KeyError(oid)
+
+ def get_unpacked_object(self, sha1: bytes, *, include_comp: bool = False) -> UnpackedObject:
+ """Obtain the unpacked object.
+
+ Args:
+ sha1: sha for the object.
"""
- if name == ZERO_SHA:
- raise KeyError(name)
- if len(name) == 40:
- sha = hex_to_sha(name)
- hexsha = name
- elif len(name) == 20:
- sha = name
+ if sha1 == ZERO_SHA:
+ raise KeyError(sha1)
+ if len(sha1) == 40:
+ sha = hex_to_sha(sha1)
+ hexsha = sha1
+ elif len(sha1) == 20:
+ sha = sha1
hexsha = None
else:
- raise AssertionError("Invalid object name {!r}".format(name))
+ raise AssertionError("Invalid object sha1 {!r}".format(sha1))
for pack in self._iter_cached_packs():
try:
- return pack.get_raw_unresolved(sha)
+ return pack.get_unpacked_object(sha, include_comp=include_comp)
except (KeyError, PackFileDisappeared):
pass
if hexsha is None:
- hexsha = sha_to_hex(name)
- ret = self._get_loose_object(hexsha)
- if ret is not None:
- return ret.type_num, None, ret.as_raw_chunks()
+ hexsha = sha_to_hex(sha1)
# Maybe something else has added a pack with the object
# in the mean time?
for pack in self._update_pack_cache():
try:
- return pack.get_raw_unresolved(sha)
+ return pack.get_unpacked_object(sha, include_comp=include_comp)
except KeyError:
pass
for alternate in self.alternates:
try:
- return alternate.get_raw_unresolved(hexsha)
+ return alternate.get_unpacked_object(hexsha, include_comp=include_comp)
except KeyError:
pass
raise KeyError(hexsha)
- def add_objects(self, objects, progress=None):
+ def add_objects(
+ self, objects: Sequence[Tuple[ShaFile, Optional[str]]],
+ progress: Optional[Callable[[str], None]] = None) -> None:
"""Add a set of objects to this object store.
Args:
__len__.
Returns: Pack object of the objects written.
"""
- return self.add_pack_data(*pack_objects_to_data(objects), progress=progress)
+ count = len(objects)
+ record_iter = (full_unpacked_object(o) for (o, p) in objects)
+ return self.add_pack_data(count, record_iter, progress=progress)
class DiskObjectStore(PackBasedObjectStore):
def iterobjects(self) -> Iterator[ShaFile]:
raise NotImplementedError(self.iterobjects)
-
-
-class ObjectStoreIterator(ObjectIterator):
- """ObjectIterator that works on top of an ObjectStore."""
-
- def __init__(self, store, sha_iter):
- """Create a new ObjectIterator.
-
- Args:
- store: Object store to retrieve from
- sha_iter: Iterator over (sha, path) tuples
- """
- self.store = store
- self.sha_iter = sha_iter
- self._shas = []
-
- def __iter__(self):
- """Yield tuple with next object and path."""
- for sha, path in self.itershas():
- yield self.store[sha], path
-
- def iterobjects(self):
- """Iterate over just the objects."""
- for o, path in self:
- yield o
-
- def itershas(self):
- """Iterate over the SHAs."""
- for sha in self._shas:
- yield sha
- for sha in self.sha_iter:
- self._shas.append(sha)
- yield sha
-
- def __contains__(self, needle):
- """Check if an object is present.
-
- Note: This checks if the object is present in
- the underlying object store, not if it would
- be yielded by the iterator.
-
- Args:
- needle: SHA1 of the object to check for
- """
- if needle == ZERO_SHA:
- return False
- return needle in self.store
-
- def __getitem__(self, key):
- """Find an object by SHA1.
-
- Note: This retrieves the object from the underlying
- object store. It will also succeed if the object would
- not be returned by the iterator.
- """
- return self.store[key]
- def __len__(self):
- """Return the number of objects."""
- return len(list(self.itershas()))
- def _empty(self):
- it = self.itershas()
- try:
- next(it)
- except StopIteration:
- return True
- else:
- return False
-
- def __bool__(self):
- """Indicate whether this object has contents."""
- return not self._empty()
-
-
def tree_lookup_path(lookup_obj, root_sha, path):
"""Look up an object in a Git tree.
shallow=shallow,
get_parents=self._get_parents,
)
- self.sha_done = set()
+ self.remote_has: Set[bytes] = set()
# Now, fill sha_done with commits and revisions of
# files and directories known to be both locally
# and on target. Thus these commits and files
# won't get selected for fetch
for h in common_commits:
- self.sha_done.add(h)
+ self.remote_has.add(h)
cmt = object_store[h]
- _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
+ _collect_filetree_revs(object_store, cmt.tree, self.remote_has)
# record tags we have as visited, too
for t in have_tags:
- self.sha_done.add(t)
+ self.remote_has.add(t)
+ self.sha_done = set(self.remote_has)
- missing_tags = want_tags.difference(have_tags)
- missing_others = want_others.difference(have_others)
# in fact, what we 'want' is commits, tags, and others
# we've found missing
- wants = missing_commits.union(missing_tags)
- wants = wants.union(missing_others)
+ self.objects_to_send = {
+ (w, None, Commit.type_num, False)
+ for w in missing_commits}
+ missing_tags = want_tags.difference(have_tags)
+ self.objects_to_send.update(
+ {(w, None, Tag.type_num, False)
+ for w in missing_tags})
+ missing_others = want_others.difference(have_others)
+ self.objects_to_send.update(
+ {(w, None, None, False)
+ for w in missing_others})
- self.objects_to_send = {(w, None, False) for w in wants}
-
if progress is None:
self.progress = lambda x: None
else:
self.progress = progress
self._tagged = get_tagged and get_tagged() or {}
- def add_todo(self, entries):
+ def get_remote_has(self):
+ return self.remote_has
+
+ def add_todo(self, entries: Iterable[Tuple[ObjectID, Optional[bytes], Optional[int], bool]]):
self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
- def __next__(self):
+ def __next__(self) -> Tuple[bytes, PackHint]:
while True:
if not self.objects_to_send:
- return None
- (sha, name, leaf) = self.objects_to_send.pop()
+ self.progress(("counting objects: %d, done.\n" % len(self.sha_done)).encode("ascii"))
+ raise StopIteration
+ (sha, name, type_num, leaf) = self.objects_to_send.pop()
if sha not in self.sha_done:
break
if not leaf:
o = self.object_store[sha]
if isinstance(o, Commit):
- self.add_todo([(o.tree, b"", False)])
+ self.add_todo([(o.tree, b"", Tree.type_num, False)])
elif isinstance(o, Tree):
self.add_todo(
[
- (s, n, not stat.S_ISDIR(m))
+ (s, n, (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),
+ not stat.S_ISDIR(m))
for n, m, s in o.iteritems()
if not S_ISGITLINK(m)
]
)
elif isinstance(o, Tag):
- self.add_todo([(o.object[1], None, False)])
+ self.add_todo([(o.object[1], None, o.object[0].type_num, False)])
if sha in self._tagged:
- self.add_todo([(self._tagged[sha], None, True)])
+ self.add_todo([(self._tagged[sha], None, None, True)])
self.sha_done.add(sha)
- self.progress(("counting objects: %d\r" % len(self.sha_done)).encode("ascii"))
- return (sha, name)
+ if len(self.sha_done) % 1000 == 0:
+ self.progress(("counting objects: %d\r" % len(self.sha_done)).encode("ascii"))
+ return (sha, (type_num, name))
def __iter__(self):
- return iter(self.__next__, None)
+ return self
class ObjectStoreGraphWalker:
if shallow is None:
shallow = set()
self.shallow = shallow
+
+ def nak(self):
+ """Nothing in common was found."""
def ack(self, sha):
"""Ack that a revision and its ancestors are present in the source."""
yield o_id
done.add(o_id)
+ def iterobjects_subset(self, shas: Iterable[bytes], *, allow_missing: bool = False) -> Iterator[ShaFile]:
+ todo = set(shas)
+ for b in self.bases:
+ for o in b.iterobjects_subset(todo, allow_missing=True):
+ yield o
+ todo.remove(o.id)
+ if todo and not allow_missing:
+ raise KeyError(o.id)
+
+ def iter_unpacked_subset(self, shas: Iterable[bytes], *, include_comp=False, allow_missing: bool = False, convert_ofs_delta=True) -> Iterator[ShaFile]:
+ todo = set(shas)
+ for b in self.bases:
+ for o in b.iter_unpacked_subset(todo, include_comp=include_comp, allow_missing=True, convert_ofs_delta=convert_ofs_delta):
+ yield o
+ todo.remove(o.id)
+ if todo and not allow_missing:
+ raise KeyError(o.id)
+
def get_raw(self, sha_id):
for b in self.bases:
try:
def iter_tree_contents(
- store: ObjectContainer, tree_id: bytes, *, include_trees: bool = False):
+ store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False):
"""Iterate the contents of a tree and all subtrees.
Iteration is depth-first pre-order, as in e.g. os.walk.
Returns: Iterator over TreeEntry namedtuples for all the objects in a
tree.
"""
+ if tree_id is None:
+ return
# This could be fairly easily generalized to >2 trees if we find a use
# case.
todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
blob - 06b12ee7bcbf16b49fe1766c76d73659dcaf185d
blob + 0c16e802f09bc0885d5dc552ee5e51b2c8704f1f
--- dulwich/objects.py
+++ dulwich/objects.py
def raw_length(self) -> int:
"""Returns the length of the raw string of this object."""
- ret = 0
- for chunk in self.as_raw_chunks():
- ret += len(chunk)
- return ret
+ return sum(map(len, self.as_raw_chunks()))
def sha(self):
"""The SHA1 object that is the name of this object."""
blob - 1fd4feb6ac3adacc2e41fb828b530cd6a13028e5
blob + 9dcfa31e67776df61b6c2d09f37f968c35955084
--- dulwich/pack.py
+++ dulwich/pack.py
from collections import defaultdict
import binascii
+from contextlib import suppress
from io import BytesIO, UnsupportedOperation
from collections import (
deque,
import os
import sys
-from typing import Optional, Callable, Tuple, List, Deque, Union, Protocol, Iterable, Iterator
+from typing import Optional, Callable, Tuple, List, Deque, Union, Iterable, Iterator, Dict, TypeVar, Generic, Sequence, Set
+
+try:
+ from typing import Protocol
+except ImportError: # python << 3.8
+ from typing_extensions import Protocol # type: ignore
+
import warnings
from hashlib import sha1
)
from dulwich.objects import (
ShaFile,
+ ObjectID,
hex_to_sha,
sha_to_hex,
object_header,
DEFAULT_PACK_DELTA_WINDOW_SIZE = 10
+OldUnpackedObject = Union[Tuple[Union[bytes, int], List[bytes]], List[bytes]]
+ResolveExtRefFn = Callable[[bytes], Tuple[int, OldUnpackedObject]]
+ProgressFn = Callable[[int, str], None]
+PackHint = Tuple[int, Optional[bytes]]
+
+
class ObjectContainer(Protocol):
def add_object(self, obj: ShaFile) -> None:
"""Add a single object to this object store."""
def add_objects(
- self, objects: Iterable[Tuple[ShaFile, Optional[str]]],
+ self, objects: Sequence[Tuple[ShaFile, Optional[str]]],
progress: Optional[Callable[[str], None]] = None) -> None:
"""Add a set of objects to this object store.
class PackedObjectContainer(ObjectContainer):
- def get_raw_unresolved(self, sha1: bytes) -> Tuple[int, Union[bytes, None], List[bytes]]:
+ def get_unpacked_object(self, sha1: bytes, *, include_comp: bool = False) -> "UnpackedObject":
"""Get a raw unresolved object."""
- raise NotImplementedError(self.get_raw_unresolved)
+ raise NotImplementedError(self.get_unpacked_object)
+ def iterobjects_subset(self, shas: Iterable[bytes], *, allow_missing: bool = False) -> Iterator[ShaFile]:
+ raise NotImplementedError(self.iterobjects_subset)
+ def iter_unpacked_subset(
+ self, shas: Set[bytes], include_comp: bool = False, allow_missing: bool = False,
+ convert_ofs_delta: bool = True) -> Iterator["UnpackedObject"]:
+ raise NotImplementedError(self.iter_unpacked_subset)
+
+
+class UnpackedObjectStream:
+
+ def __iter__(self) -> Iterator["UnpackedObject"]:
+ raise NotImplementedError(self.__iter__)
+
+ def __len__(self) -> int:
+ raise NotImplementedError(self.__len__)
+
+
def take_msb_bytes(read: Callable[[int], bytes], crc32: Optional[int] = None) -> Tuple[List[int], Optional[int]]:
"""Read bytes marked with most significant bit.
obj_type_num: Optional[int]
obj_chunks: Optional[List[bytes]]
+ delta_base: Union[None, bytes, int]
+ decomp_chunks: List[bytes]
# TODO(dborowitz): read_zlib_chunks and unpack_object could very well be
# methods of this object.
- def __init__(self, pack_type_num, delta_base, decomp_len, crc32):
- self.offset = None
- self._sha = None
+ def __init__(self, pack_type_num, *, delta_base=None, decomp_len=None, crc32=None, sha=None, decomp_chunks=None, offset=None):
+ self.offset = offset
+ self._sha = sha
self.pack_type_num = pack_type_num
self.delta_base = delta_base
self.comp_chunks = None
- self.decomp_chunks: List[bytes] = []
- self.decomp_len = decomp_len
+ self.decomp_chunks: List[bytes] = decomp_chunks or []
+ if decomp_chunks is not None and decomp_len is None:
+ self.decomp_len = sum(map(len, decomp_chunks))
+ else:
+ self.decomp_len = decomp_len
self.crc32 = crc32
if pack_type_num in DELTA_TYPES:
# Only provided for backwards compatibility with code that expects either
# chunks or a delta tuple.
- def _obj(self):
+ def _obj(self) -> OldUnpackedObject:
"""Return the decompressed chunks, or (delta base, delta chunks)."""
if self.pack_type_num in DELTA_TYPES:
+ assert isinstance(self.delta_base, (bytes, int))
return (self.delta_base, self.decomp_chunks)
else:
return self.decomp_chunks
return None
+PackIndexEntry = Tuple[bytes, int, Optional[int]]
+
+
class PackIndex:
"""An index in to a packfile.
def __ne__(self, other):
return not self.__eq__(other)
- def __len__(self):
+ def __len__(self) -> int:
"""Return the number of entries in this pack index."""
raise NotImplementedError(self.__len__)
- def __iter__(self):
+ def __iter__(self) -> Iterator[bytes]:
"""Iterate over the SHAs in this pack."""
return map(sha_to_hex, self._itersha())
- def iterentries(self):
+ def iterentries(self) -> Iterator[PackIndexEntry]:
"""Iterate over the entries in this pack index.
Returns: iterator over tuples with object name, offset in packfile and
"""
raise NotImplementedError(self.iterentries)
- def get_pack_checksum(self):
+ def get_pack_checksum(self) -> bytes:
"""Return the SHA1 checksum stored for the corresponding packfile.
Returns: 20-byte binary digest
"""
raise NotImplementedError(self.get_pack_checksum)
- def object_index(self, sha):
- """Return the index in to the corresponding packfile for the object.
+ def object_index(self, sha: bytes) -> int:
+ warnings.warn('Please use object_offset instead', DeprecationWarning, stacklevel=2)
+ return self.object_offset(sha)
+ def object_offset(self, sha: bytes) -> int:
+ """Return the offset in to the corresponding packfile for the object.
+
Given the name of an object it will return the offset that object
lives at within the corresponding pack file. If the pack file doesn't
have the object then None will be returned.
"""
- raise NotImplementedError(self.object_index)
+ raise NotImplementedError(self.object_offset)
- def object_sha1(self, index):
+ def object_sha1(self, index: int) -> bytes:
"""Return the SHA1 corresponding to the index in the pack file."""
- # PERFORMANCE/TODO(jelmer): Avoid scanning entire index
for (name, offset, crc32) in self.iterentries():
if offset == index:
return name
else:
raise KeyError(index)
- def _object_index(self, sha):
- """See object_index.
+ def _object_offset(self, sha: bytes) -> int:
+ """See object_offset.
Args:
sha: A *binary* SHA string. (20 characters long)_
"""
- raise NotImplementedError(self._object_index)
+ raise NotImplementedError(self._object_offset)
- def objects_sha1(self):
+ def objects_sha1(self) -> bytes:
"""Return the hex SHA1 over all the shas of all objects in this pack.
Note: This is used for the filename of the pack.
"""
return iter_sha1(self._itersha())
- def _itersha(self):
+ def _itersha(self) -> Iterator[bytes]:
"""Yield all the SHA1's of the objects in the index, sorted."""
raise NotImplementedError(self._itersha)
def close(self):
pass
+ def check(self) -> None:
+ pass
+
class MemoryPackIndex(PackIndex):
"""Pack index that is stored entirely in memory."""
pack_checksum: Optional pack checksum
"""
self._by_sha = {}
- self._by_index = {}
- for name, idx, crc32 in entries:
- self._by_sha[name] = idx
- self._by_index[idx] = name
+ self._by_offset = {}
+ for name, offset, crc32 in entries:
+ self._by_sha[name] = offset
+ self._by_offset[offset] = name
self._entries = entries
self._pack_checksum = pack_checksum
def __len__(self):
return len(self._entries)
- def object_index(self, sha):
+ def object_offset(self, sha):
if len(sha) == 40:
sha = hex_to_sha(sha)
- return self._by_sha[sha][0]
+ return self._by_sha[sha]
- def object_sha1(self, index):
- return self._by_index[index]
+ def object_sha1(self, offset):
+ return self._by_offset[offset]
def _itersha(self):
return iter(self._by_sha)
def iterentries(self):
return iter(self._entries)
+ @classmethod
+ def for_pack(cls, pack):
+ return MemoryPackIndex(pack.sorted_entries(), pack.calculate_checksum())
+ @classmethod
+ def clone(cls, other_index):
+ return cls(other_index.iterentries(), other_index.get_pack_checksum())
+
+
class FilePackIndex(PackIndex):
"""Pack index that is based on a file.
"""Return the number of entries in this pack index."""
return self._fan_out_table[-1]
- def _unpack_entry(self, i: int) -> Tuple[bytes, int, Optional[int]]:
+ def _unpack_entry(self, i: int) -> PackIndexEntry:
"""Unpack the i-th entry in the index file.
Returns: Tuple with object name (SHA), offset in pack file and CRC32
for i in range(len(self)):
yield self._unpack_name(i)
- def iterentries(self) -> Iterator[Tuple[bytes, int, Optional[int]]]:
+ def iterentries(self) -> Iterator[PackIndexEntry]:
"""Iterate over the entries in this pack index.
Returns: iterator over tuples with object name, offset in packfile and
"""
return bytes(self._contents[-20:])
- def object_index(self, sha: bytes) -> int:
- """Return the index in to the corresponding packfile for the object.
+ def object_offset(self, sha: bytes) -> int:
+ """Return the offset in to the corresponding packfile for the object.
Given the name of an object it will return the offset that object
lives at within the corresponding pack file. If the pack file doesn't
if len(sha) == 40:
sha = hex_to_sha(sha)
try:
- return self._object_index(sha)
+ return self._object_offset(sha)
except ValueError as exc:
closed = getattr(self._contents, "closed", None)
if closed in (None, True):
raise PackFileDisappeared(self) from exc
raise
- def _object_index(self, sha):
- """See object_index.
+ def _object_offset(self, sha: bytes) -> int:
+ """See object_offset.
Args:
sha: A *binary* SHA string. (20 characters long)_
return unpack_from(">L", self._contents, self._crc32_table_offset + i * 4)[0]
-def read_pack_header(read) -> Tuple[Optional[int], Optional[int]]:
+def read_pack_header(read) -> Tuple[int, int]:
"""Read the header of a pack file.
Args:
"""
header = read(12)
if not header:
- return None, None
+ raise AssertionError("file too short to contain pack")
if header[:4] != b"PACK":
raise AssertionError("Invalid pack header %r" % header)
(version,) = unpack_from(b">L", header, 4)
else:
delta_base = None
- unpacked = UnpackedObject(type_num, delta_base, size, crc32)
+ unpacked = UnpackedObject(type_num, delta_base=delta_base, decomp_len=size, crc32=crc32)
unused = read_zlib_chunks(
read_some,
unpacked,
def __len__(self):
return self._num_objects
- def read_objects(self, compute_crc32=False):
+ def read_objects(self, compute_crc32=False) -> Iterator[UnpackedObject]:
"""Read the objects in this pack file.
Args:
# read buffer and (20 - N) come from the wire.
self.read(20)
- pack_sha = bytearray(self._trailer)
+ pack_sha = bytearray(self._trailer) # type: ignore
if pack_sha != self.sha.digest():
raise ChecksumMismatch(sha_to_hex(pack_sha), self.sha.hexdigest())
def __eq__(self, other):
if isinstance(other, PackData):
return self.get_stored_checksum() == other.get_stored_checksum()
- if isinstance(other, list):
- if len(self) != len(other):
- return False
- for o1, o2 in zip(self.iterobjects(), other):
- if o1 != o2:
- return False
- return True
return False
def _get_size(self):
"""
return compute_file_sha(self._file, end_ofs=-20).digest()
- def iterobjects(self, progress=None, compute_crc32=True):
+ def iter_unpacked(self, *, include_comp: bool = False):
self._file.seek(self._header_size)
- for i in range(1, self._num_objects + 1):
- offset = self._file.tell()
- unpacked, unused = unpack_object(
- self._file.read, compute_crc32=compute_crc32
- )
- if progress is not None:
- progress(i, self._num_objects)
- yield (
- offset,
- unpacked.pack_type_num,
- unpacked._obj(),
- unpacked.crc32,
- )
- # Back up over unused data.
- self._file.seek(-len(unused), SEEK_CUR)
- def _iter_unpacked(self):
- # TODO(dborowitz): Merge this with iterobjects, if we can change its
- # return type.
- self._file.seek(self._header_size)
-
if self._num_objects is None:
return
for _ in range(self._num_objects):
offset = self._file.tell()
- unpacked, unused = unpack_object(self._file.read, compute_crc32=False)
+ unpacked, unused = unpack_object(self._file.read, compute_crc32=False, include_comp=include_comp)
unpacked.offset = offset
yield unpacked
# Back up over unused data.
self._file.seek(-len(unused), SEEK_CUR)
- def iterentries(self, progress=None, resolve_ext_ref=None):
+ def iterentries(self, progress: Optional[ProgressFn] = None, resolve_ext_ref: Optional[ResolveExtRefFn] = None):
"""Yield entries summarizing the contents of this pack.
Args:
progress(i, num_objects)
yield result
- def sorted_entries(self, progress=None, resolve_ext_ref=None):
+ def sorted_entries(self, progress: Optional[ProgressFn] = None, resolve_ext_ref: Optional[ResolveExtRefFn] = None):
"""Return entries in this pack, sorted by SHA.
Args:
if actual != stored:
raise ChecksumMismatch(stored, actual)
- def get_compressed_data_at(self, offset):
- """Given offset in the packfile return compressed data that is there.
-
- Using the associated index the location of an object can be looked up,
- and then the packfile can be asked directly for that object using this
- function.
+ def get_unpacked_object_at(self, offset: int, *, include_comp: bool = False) -> UnpackedObject:
+ """Given offset in the packfile return a UnpackedObject.
"""
assert offset >= self._header_size
self._file.seek(offset)
- unpacked, _ = unpack_object(self._file.read, include_comp=True)
- return (
- unpacked.pack_type_num,
- unpacked.delta_base,
- unpacked.comp_chunks,
- )
+ unpacked, _ = unpack_object(self._file.read, include_comp=include_comp)
+ unpacked.offset = offset
+ return unpacked
- def get_decompressed_data_at(self, offset):
- """Given an offset in the packfile, decompress the data that is there.
-
- Using the associated index the location of an object can be looked up,
- and then the packfile can be asked directly for that object using this
- function.
- """
- assert offset >= self._header_size
- self._file.seek(offset)
- unpacked, _ = unpack_object(self._file.read, include_comp=False)
- return (
- unpacked.pack_type_num,
- unpacked.delta_base,
- unpacked.decomp_chunks,
- )
-
- def get_object_at(self, offset):
+ def get_object_at(self, offset: int) -> Tuple[int, OldUnpackedObject]:
"""Given an offset in to the packfile return the object that is there.
Using the associated index the location of an object can be looked up,
return self._offset_cache[offset]
except KeyError:
pass
- assert offset >= self._header_size
- self._file.seek(offset)
- unpacked, _ = unpack_object(self._file.read)
+ unpacked = self.get_unpacked_object_at(offset, include_comp=False)
return (unpacked.pack_type_num, unpacked._obj())
-class DeltaChainIterator:
+T = TypeVar('T')
+
+
+class DeltaChainIterator(Generic[T]):
"""Abstract iterator over pack data based on delta chains.
Each object in the pack is guaranteed to be inflated exactly once,
_compute_crc32 = False
_include_comp = False
- def __init__(self, file_obj, resolve_ext_ref=None):
+ def __init__(self, file_obj, *, resolve_ext_ref=None) -> None:
self._file = file_obj
self._resolve_ext_ref = resolve_ext_ref
- self._pending_ofs = defaultdict(list)
- self._pending_ref = defaultdict(list)
- self._full_ofs = []
- self._shas = {}
- self._ext_refs = []
+ self._pending_ofs: Dict[int, List[int]] = defaultdict(list)
+ self._pending_ref: Dict[bytes, List[int]] = defaultdict(list)
+ self._full_ofs: List[Tuple[int, int]] = []
+ self._ext_refs: List[bytes] = []
@classmethod
- def for_pack_data(cls, pack_data, resolve_ext_ref=None):
+ def for_pack_data(cls, pack_data: PackData, resolve_ext_ref=None):
walker = cls(None, resolve_ext_ref=resolve_ext_ref)
walker.set_pack_data(pack_data)
- for unpacked in pack_data._iter_unpacked():
+ for unpacked in pack_data.iter_unpacked(include_comp=False):
walker.record(unpacked)
return walker
- def record(self, unpacked):
+ @classmethod
+ def for_pack_subset(
+ cls, pack: "Pack", shas: Iterable[bytes], *,
+ allow_missing: bool = False, resolve_ext_ref=None):
+ walker = cls(None, resolve_ext_ref=resolve_ext_ref)
+ walker.set_pack_data(pack.data)
+ todo = set()
+ for sha in shas:
+ assert isinstance(sha, bytes)
+ try:
+ off = pack.index.object_offset(sha)
+ except KeyError:
+ if not allow_missing:
+ raise
+ todo.add(off)
+ done = set()
+ while todo:
+ off = todo.pop()
+ unpacked = pack.data.get_unpacked_object_at(off)
+ walker.record(unpacked)
+ done.add(off)
+ base_ofs = None
+ if unpacked.pack_type_num == OFS_DELTA:
+ base_ofs = unpacked.offset - unpacked.delta_base
+ elif unpacked.pack_type_num == REF_DELTA:
+ with suppress(KeyError):
+ assert isinstance(unpacked.delta_base, bytes)
+ base_ofs = pack.index.object_index(unpacked.delta_base)
+ if base_ofs is not None and base_ofs not in done:
+ todo.add(base_ofs)
+ return walker
+
+ def record(self, unpacked: UnpackedObject) -> None:
type_num = unpacked.pack_type_num
offset = unpacked.offset
if type_num == OFS_DELTA:
base_offset = offset - unpacked.delta_base
self._pending_ofs[base_offset].append(offset)
elif type_num == REF_DELTA:
+ assert isinstance(unpacked.delta_base, bytes)
self._pending_ref[unpacked.delta_base].append(offset)
else:
self._full_ofs.append((offset, type_num))
- def set_pack_data(self, pack_data):
+ def set_pack_data(self, pack_data: PackData) -> None:
self._file = pack_data._file
def _walk_all_chains(self):
for offset, type_num in self._full_ofs:
yield from self._follow_chain(offset, type_num, None)
yield from self._walk_ref_chains()
- assert not self._pending_ofs
+ assert not self._pending_ofs, repr(self._pending_ofs)
- def _ensure_no_pending(self):
+ def _ensure_no_pending(self) -> None:
if self._pending_ref:
raise KeyError([sha_to_hex(s) for s in self._pending_ref])
self._ensure_no_pending()
- def _result(self, unpacked):
- return unpacked
+ def _result(self, unpacked: UnpackedObject) -> T:
+ raise NotImplementedError
- def _resolve_object(self, offset, obj_type_num, base_chunks):
+ def _resolve_object(self, offset: int, obj_type_num: int, base_chunks: List[bytes]) -> UnpackedObject:
self._file.seek(offset)
unpacked, _ = unpack_object(
self._file.read,
unpacked.obj_chunks = apply_delta(base_chunks, unpacked.decomp_chunks)
return unpacked
- def _follow_chain(self, offset, obj_type_num, base_chunks):
+ def _follow_chain(self, offset: int, obj_type_num: int, base_chunks: List[bytes]):
# Unlike PackData.get_object_at, there is no need to cache offsets as
# this approach by design inflates each object exactly once.
todo = [(offset, obj_type_num, base_chunks)]
self._pending_ref.pop(unpacked.sha(), []),
)
todo.extend(
- (new_offset, unpacked.obj_type_num, unpacked.obj_chunks)
+ (new_offset, unpacked.obj_type_num, unpacked.obj_chunks) # type: ignore
for new_offset in unblocked
)
- def __iter__(self):
+ def __iter__(self) -> Iterator[T]:
return self._walk_all_chains()
def ext_refs(self):
return self._ext_refs
-class PackIndexer(DeltaChainIterator):
+class UnpackedObjectIterator(DeltaChainIterator[UnpackedObject]):
+ """Delta chain iterator that yield unpacked objects."""
+
+ def _result(self, unpacked):
+ return unpacked
+
+
+class PackIndexer(DeltaChainIterator[PackIndexEntry]):
"""Delta chain iterator that yields index entries."""
_compute_crc32 = True
return unpacked.sha(), unpacked.offset, unpacked.crc32
-class PackInflater(DeltaChainIterator):
+class PackInflater(DeltaChainIterator[ShaFile]):
"""Delta chain iterator that yields ShaFile objects."""
def _result(self, unpacked):
compression_level: the zlib compression level
Returns: Tuple with offset at which the object was written, and crc32
"""
- if hasattr(write, 'write'):
- warnings.warn(
- 'write_pack_object() now takes a write rather than file argument',
- DeprecationWarning, stacklevel=2)
- write = write.write
crc32 = 0
for chunk in pack_object_chunks(
type, object, compression_level=compression_level):
def write_pack(
filename,
- objects,
+ objects: Union[Sequence[ShaFile], Sequence[Tuple[ShaFile, Optional[bytes]]]],
*,
deltify: Optional[bool] = None,
delta_window_size: Optional[int] = None,
- compression_level: int = -1,
- reuse_pack: Optional[PackedObjectContainer] = None):
+ compression_level: int = -1):
"""Write a new pack data file.
Args:
filename: Path to the new pack file (without .pack extension)
- objects: (object, path) tuple iterable to write. Should provide __len__
+ container: PackedObjectContainer
+ entries: Sequence of (object_id, path) tuples to write
delta_window_size: Delta window size
deltify: Whether to deltify pack objects
compression_level: the zlib compression level
delta_window_size=delta_window_size,
deltify=deltify,
compression_level=compression_level,
- reuse_pack=reuse_pack,
)
entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
with GitFile(filename + ".idx", "wb") as f:
write(chunk)
+def find_reusable_deltas(
+ container: PackedObjectContainer,
+ object_ids: Set[bytes],
+ *, other_haves: Optional[Set[bytes]] = None, progress=None) -> Iterator[UnpackedObject]:
+ if other_haves is None:
+ other_haves = set()
+ reused = 0
+ for i, unpacked in enumerate(container.iter_unpacked_subset(object_ids, allow_missing=True, convert_ofs_delta=True)):
+ if progress is not None and i % 1000 == 0:
+ progress(("checking for reusable deltas: %d/%d\r" % (i, len(object_ids))).encode('utf-8'))
+ if unpacked.pack_type_num == REF_DELTA:
+ hexsha = sha_to_hex(unpacked.delta_base)
+ if hexsha in object_ids or hexsha in other_haves:
+ yield unpacked
+ reused += 1
+ if progress is not None:
+ progress(("found %d deltas to reuse\n" % (reused, )).encode('utf-8'))
+
+
def deltify_pack_objects(
- objects: Iterable[Tuple[ShaFile, str]],
- window_size: Optional[int] = None,
- reuse_pack: Optional[PackedObjectContainer] = None):
+ objects: Union[Iterator[bytes], Iterator[Tuple[ShaFile, Optional[bytes]]]],
+ *, window_size: Optional[int] = None,
+ progress=None) -> Iterator[UnpackedObject]:
"""Generate deltas for pack objects.
Args:
objects: An iterable of (object, path) tuples to deltify.
window_size: Window size; None for default
- reuse_pack: Pack object we can search for objects to reuse
Returns: Iterator over type_num, object id, delta_base, content
delta_base is None for full text entries
"""
- # TODO(jelmer): Use threads
- if window_size is None:
- window_size = DEFAULT_PACK_DELTA_WINDOW_SIZE
-
- reused_deltas = set()
- if reuse_pack:
- # Build a set of SHA1 IDs which will be part of this pack file.
- # We can only reuse a delta if its base will be present in the
- # generated pack file.
- objects_to_pack = set()
- for obj, path in objects:
- objects_to_pack.add(sha_to_hex(obj.sha().digest()))
- for o, _ in objects:
- sha_digest = o.sha().digest()
- # get_raw_unresolved() translates OFS_DELTA into REF_DELTA for us
- try:
- (obj_type, delta_base, chunks) = reuse_pack.get_raw_unresolved(sha_digest)
- except KeyError:
- continue
- if obj_type == REF_DELTA and delta_base in objects_to_pack:
- yield obj_type, sha_digest, hex_to_sha(delta_base), chunks
- reused_deltas.add(sha_digest)
+ def objects_with_hints():
+ for e in objects:
+ if isinstance(e, ShaFile):
+ yield (e, (e.type_num, None))
+ else:
+ yield (e[0], (e[0].type_num, e[1]))
+ yield from deltas_from_sorted_objects(
+ sort_objects_for_delta(objects_with_hints()),
+ window_size=window_size,
+ progress=progress)
+
+def sort_objects_for_delta(objects: Union[Iterator[ShaFile], Iterator[Tuple[ShaFile, Optional[PackHint]]]]) -> Iterator[ShaFile]:
+ magic = []
+ for entry in objects:
+ if isinstance(entry, tuple):
+ obj, hint = entry
+ if hint is None:
+ type_num = None
+ path = None
+ else:
+ (type_num, path) = hint
+ else:
+ obj = entry
+ magic.append((type_num, path, -obj.raw_length(), obj))
# Build a list of objects ordered by the magic Linus heuristic
# This helps us find good objects to diff against us
- magic = []
- for obj, path in objects:
- if obj.sha().digest() in reused_deltas:
- continue
- magic.append((obj.type_num, path, -obj.raw_length(), obj))
magic.sort()
+ return (x[3] for x in magic)
- possible_bases: Deque[Tuple[bytes, int, List[bytes]]] = deque()
- for type_num, path, neg_length, o in magic:
+def deltas_from_sorted_objects(objects, window_size: Optional[int] = None, progress=None):
+ # TODO(jelmer): Use threads
+ if window_size is None:
+ window_size = DEFAULT_PACK_DELTA_WINDOW_SIZE
+
+ possible_bases: Deque[Tuple[bytes, int, List[bytes]]] = deque()
+ for i, o in enumerate(objects):
+ if progress is not None and i % 1000 == 0:
+ progress(("generating deltas: %d\r" % (i, )).encode('utf-8'))
raw = o.as_raw_chunks()
winner = raw
winner_len = sum(map(len, winner))
winner_base = None
for base_id, base_type_num, base in possible_bases:
- if base_type_num != type_num:
+ if base_type_num != o.type_num:
continue
delta_len = 0
delta = []
winner_base = base_id
winner = delta
winner_len = sum(map(len, winner))
- yield type_num, o.sha().digest(), winner_base, winner
- possible_bases.appendleft((o.sha().digest(), type_num, raw))
+ yield UnpackedObject(o.type_num, sha=o.sha().digest(), delta_base=winner_base, decomp_len=winner_len, decomp_chunks=winner)
+ possible_bases.appendleft((o.sha().digest(), o.type_num, raw))
while len(possible_bases) > window_size:
possible_bases.pop()
def pack_objects_to_data(
- objects,
- delta_window_size: Optional[int] = None,
+ objects: Union[Sequence[ShaFile], Sequence[Tuple[ShaFile, Optional[bytes]]]],
+ *,
deltify: Optional[bool] = None,
- reuse_pack: Optional[PackedObjectContainer] = None):
+ delta_window_size: Optional[int] = None,
+ ofs_delta: bool = True,
+ progress=None) -> Tuple[int, Iterator[UnpackedObject]]:
"""Create pack data from objects
Args:
objects: Pack objects
Returns: Tuples with (type_num, hexdigest, delta base, object chunks)
"""
+ # TODO(jelmer): support deltaifying
count = len(objects)
if deltify is None:
# PERFORMANCE/TODO(jelmer): This should be enabled but is *much* too
# slow at the moment.
deltify = False
if deltify:
- pack_contents = deltify_pack_objects(
- objects, window_size=delta_window_size, reuse_pack=reuse_pack)
- return (count, pack_contents)
+ return (
+ count,
+ deltify_pack_objects(iter(objects), window_size=delta_window_size, progress=progress)) # type: ignore
else:
+ def iter_without_path():
+ for o in objects:
+ if isinstance(o, tuple):
+ yield full_unpacked_object(o[0])
+ else:
+ yield full_unpacked_object(o)
return (
count,
- (
- (o.type_num, o.sha().digest(), None, o.as_raw_chunks())
- for (o, path) in objects
- ),
+ iter_without_path()
)
+def generate_unpacked_objects(
+ container: PackedObjectContainer,
+ object_ids: Sequence[Tuple[ObjectID, Optional[PackHint]]],
+ delta_window_size: Optional[int] = None,
+ deltify: Optional[bool] = None,
+ reuse_deltas: bool = True,
+ ofs_delta: bool = True,
+ other_haves: Optional[Set[bytes]] = None,
+ progress=None) -> Iterator[UnpackedObject]:
+ """Create pack data from objects
+
+ Args:
+ objects: Pack objects
+ Returns: Tuples with (type_num, hexdigest, delta base, object chunks)
+ """
+ todo = dict(object_ids)
+ if reuse_deltas:
+ for unpack in find_reusable_deltas(container, set(todo), other_haves=other_haves, progress=progress):
+ del todo[sha_to_hex(unpack.sha())]
+ yield unpack
+ if deltify is None:
+ # PERFORMANCE/TODO(jelmer): This should be enabled but is *much* too
+ # slow at the moment.
+ deltify = False
+ if deltify:
+ objects_to_delta = container.iterobjects_subset(todo.keys(), allow_missing=False)
+ yield from deltas_from_sorted_objects(
+ sort_objects_for_delta(
+ (o, todo[o.id])
+ for o in objects_to_delta),
+ window_size=delta_window_size,
+ progress=progress)
+ else:
+ for oid in todo:
+ yield full_unpacked_object(container[oid])
+
+
+def full_unpacked_object(o: ShaFile) -> UnpackedObject:
+ return UnpackedObject(
+ o.type_num, delta_base=None, crc32=None,
+ decomp_chunks=o.as_raw_chunks(),
+ sha=o.sha().digest())
+
+
+def write_pack_from_container(
+ write,
+ container: PackedObjectContainer,
+ object_ids: Sequence[Tuple[ObjectID, Optional[PackHint]]],
+ delta_window_size: Optional[int] = None,
+ deltify: Optional[bool] = None,
+ reuse_deltas: bool = True,
+ compression_level: int = -1,
+ other_haves: Optional[Set[bytes]] = None
+):
+ """Write a new pack data file.
+
+ Args:
+ write: write function to use
+ container: PackedObjectContainer
+ entries: Sequence of (object_id, path) tuples to write
+ delta_window_size: Sliding window size for searching for deltas;
+ Set to None for default window size.
+ deltify: Whether to deltify objects
+ compression_level: the zlib compression level to use
+ Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
+ """
+ pack_contents_count = len(object_ids)
+ pack_contents = generate_unpacked_objects(
+ container, object_ids, delta_window_size=delta_window_size,
+ deltify=deltify,
+ reuse_deltas=reuse_deltas,
+ other_haves=other_haves)
+
+ return write_pack_data(
+ write,
+ pack_contents,
+ num_records=pack_contents_count,
+ compression_level=compression_level,
+ )
+
+
def write_pack_objects(
- write, objects,
+ write,
+ objects: Union[Sequence[ShaFile], Sequence[Tuple[ShaFile, Optional[bytes]]]],
+ *,
delta_window_size: Optional[int] = None,
deltify: Optional[bool] = None,
- reuse_pack: Optional[PackedObjectContainer] = None,
compression_level: int = -1
):
"""Write a new pack data file.
Args:
write: write function to use
- objects: Iterable of (object, path) tuples to write. Should provide
- __len__
+ objects: Sequence of (object, path) tuples to write
delta_window_size: Sliding window size for searching for deltas;
Set to None for default window size.
deltify: Whether to deltify objects
- reuse_pack: Pack object we can search for objects to reuse
compression_level: the zlib compression level to use
Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
"""
- if hasattr(write, 'write'):
- warnings.warn(
- 'write_pack_objects() now takes a write rather than file argument',
- DeprecationWarning, stacklevel=2)
- write = write.write
-
pack_contents_count, pack_contents = pack_objects_to_data(
- objects, delta_window_size=delta_window_size,
- deltify=deltify,
- reuse_pack=reuse_pack)
+ objects, deltify=deltify)
return write_pack_data(
write,
- pack_contents_count,
pack_contents,
+ num_records=pack_contents_count,
compression_level=compression_level,
)
class PackChunkGenerator:
- def __init__(self, num_records=None, records=None, progress=None, compression_level=-1):
+ def __init__(self, num_records=None, records=None, progress=None, compression_level=-1, reuse_compressed=True):
self.cs = sha1(b"")
self.entries = {}
self._it = self._pack_data_chunks(
- num_records=num_records, records=records, progress=progress, compression_level=compression_level)
+ num_records=num_records, records=records, progress=progress, compression_level=compression_level, reuse_compressed=reuse_compressed)
def sha1digest(self):
return self.cs.digest()
def __iter__(self):
return self._it
- def _pack_data_chunks(self, num_records=None, records=None, progress=None, compression_level=-1):
- """Iterate pack data file chunks..
+ def _pack_data_chunks(self, records: Iterator[UnpackedObject], *, num_records=None, progress=None, compression_level: int = -1, reuse_compressed: bool = True) -> Iterator[bytes]:
+ """Iterate pack data file chunks.
Args:
- num_records: Number of records (defaults to len(records) if None)
- records: Iterator over type_num, object_id, delta_base, raw
+ records: Iterator over UnpackedObject
+ num_records: Number of records (defaults to len(records) if not specified)
progress: Function to report progress to
compression_level: the zlib compression level
Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
"""
# Write the pack
if num_records is None:
- num_records = len(records)
+ num_records = len(records) # type: ignore
offset = 0
for chunk in pack_header_chunks(num_records):
yield chunk
self.cs.update(chunk)
offset += len(chunk)
actual_num_records = 0
- for i, (type_num, object_id, delta_base, raw) in enumerate(records):
- if progress is not None:
+ for i, unpacked in enumerate(records):
+ type_num = unpacked.pack_type_num
+ if progress is not None and i % 1000 == 0:
progress(("writing pack data: %d/%d\r" % (i, num_records)).encode("ascii"))
- if delta_base is not None:
+ raw: Union[List[bytes], Tuple[int, List[bytes]], Tuple[bytes, List[bytes]]]
+ if unpacked.delta_base is not None:
try:
- base_offset, base_crc32 = self.entries[delta_base]
+ base_offset, base_crc32 = self.entries[unpacked.delta_base]
except KeyError:
type_num = REF_DELTA
- raw = (delta_base, raw)
+ assert isinstance(unpacked.delta_base, bytes)
+ raw = (unpacked.delta_base, unpacked.decomp_chunks)
else:
type_num = OFS_DELTA
- raw = (offset - base_offset, raw)
+ raw = (offset - base_offset, unpacked.decomp_chunks)
+ else:
+ raw = unpacked.decomp_chunks
+ if unpacked.comp_chunks is not None and reuse_compressed:
+ chunks = unpacked.comp_chunks
+ else:
+ chunks = pack_object_chunks(type_num, raw, compression_level=compression_level)
crc32 = 0
object_size = 0
- for chunk in pack_object_chunks(type_num, raw, compression_level=compression_level):
+ for chunk in chunks:
yield chunk
crc32 = binascii.crc32(chunk, crc32)
self.cs.update(chunk)
object_size += len(chunk)
actual_num_records += 1
- self.entries[object_id] = (offset, crc32)
+ self.entries[unpacked.sha()] = (offset, crc32)
offset += object_size
if actual_num_records != num_records:
raise AssertionError(
yield self.cs.digest()
-def write_pack_data(write, num_records=None, records=None, progress=None, compression_level=-1):
+def write_pack_data(write, records: Iterator[UnpackedObject], *, num_records=None, progress=None, compression_level=-1):
"""Write a new pack data file.
Args:
compression_level: the zlib compression level
Returns: Dict mapping id -> (offset, crc32 checksum), pack checksum
"""
- if hasattr(write, 'write'):
- warnings.warn(
- 'write_pack_data() now takes a write rather than file argument',
- DeprecationWarning, stacklevel=2)
- write = write.write
chunk_generator = PackChunkGenerator(
num_records=num_records, records=records, progress=progress,
compression_level=compression_level)
return out
-def write_pack_index_v2(f, entries, pack_checksum):
+def write_pack_index_v2(
+ f, entries: Iterable[PackIndexEntry], pack_checksum: bytes) -> bytes:
"""Write a new pack index file.
Args:
f = SHA1Writer(f)
f.write(b"\377tOc") # Magic!
f.write(struct.pack(">L", 2))
- fan_out_table = defaultdict(lambda: 0)
+ fan_out_table: Dict[int, int] = defaultdict(lambda: 0)
for (name, offset, entry_checksum) in entries:
fan_out_table[ord(name[:1])] += 1
# Fan-out table
write_pack_index = write_pack_index_v2
-class _PackTupleIterable:
- """Helper for Pack.pack_tuples."""
-
- def __init__(self, iterobjects, length):
- self._iterobjects = iterobjects
- self._length = length
-
- def __len__(self):
- return self._length
-
- def __iter__(self):
- return ((o, None) for o in self._iterobjects())
-
-
class Pack:
"""A Git pack object."""
_data: Optional[PackData]
_idx: Optional[PackIndex]
- def __init__(self, basename, resolve_ext_ref: Optional[
- Callable[[bytes], Tuple[int, UnpackedObject]]] = None):
+ def __init__(self, basename, resolve_ext_ref: Optional[ResolveExtRefFn] = None):
self._basename = basename
self._data = None
self._idx = None
"""Iterate over all the sha1s of the objects in this pack."""
return iter(self.index)
- def check_length_and_checksum(self):
+ def check_length_and_checksum(self) -> None:
"""Sanity check the length and checksum of the pack index and data."""
assert len(self.index) == len(self.data)
idx_stored_checksum = self.index.get_pack_checksum()
sha_to_hex(data_stored_checksum),
)
- def check(self):
+ def check(self) -> None:
"""Check the integrity of this pack.
Raises:
obj.check()
# TODO: object connectivity checks
- def get_stored_checksum(self):
+ def get_stored_checksum(self) -> bytes:
return self.data.get_stored_checksum()
- def __contains__(self, sha1):
+ def pack_tuples(self):
+ return [(o, None) for o in self.iterobjects()]
+
+ def __contains__(self, sha1: bytes) -> bool:
"""Check whether this pack contains a particular SHA1."""
try:
- self.index.object_index(sha1)
+ self.index.object_offset(sha1)
return True
except KeyError:
return False
- def get_raw_unresolved(self, sha1):
- """Get raw unresolved data for a SHA.
-
- Args:
- sha1: SHA to return data for
- Returns: Tuple with pack object type, delta base (if applicable),
- list of data chunks
- """
- offset = self.index.object_index(sha1)
- (obj_type, delta_base, chunks) = self.data.get_compressed_data_at(offset)
- if obj_type == OFS_DELTA:
- delta_base = sha_to_hex(self.index.object_sha1(offset - delta_base))
- obj_type = REF_DELTA
- return (obj_type, delta_base, chunks)
-
- def get_raw(self, sha1):
- offset = self.index.object_index(sha1)
+ def get_raw(self, sha1: bytes) -> Tuple[int, bytes]:
+ offset = self.index.object_offset(sha1)
obj_type, obj = self.data.get_object_at(offset)
type_num, chunks = self.resolve_object(offset, obj_type, obj)
return type_num, b"".join(chunks)
- def __getitem__(self, sha1):
+ def __getitem__(self, sha1: bytes) -> bytes:
"""Retrieve the specified SHA1."""
type, uncomp = self.get_raw(sha1)
return ShaFile.from_raw_string(type, uncomp, sha=sha1)
- def iterobjects(self):
+ def iterobjects(self) -> Iterator[ShaFile]:
"""Iterate over the objects in this pack."""
return iter(
PackInflater.for_pack_data(self.data, resolve_ext_ref=self.resolve_ext_ref)
)
- def pack_tuples(self):
- """Provide an iterable for use with write_pack_objects.
+ def iterobjects_subset(self, shas, *, allow_missing: bool = False) -> Iterator[ShaFile]:
+ return (
+ uo
+ for uo in
+ PackInflater.for_pack_subset(
+ self, shas, allow_missing=allow_missing,
+ resolve_ext_ref=self.resolve_ext_ref)
+ if uo.sha() in shas)
- Returns: Object that can iterate over (object, path) tuples
- and provides __len__
- """
+ def iter_unpacked_subset(self, shas, *, include_comp: bool = False, allow_missing: bool = False, convert_ofs_delta: bool = False) -> Iterator[UnpackedObject]:
+ ofs_pending: Dict[int, List[UnpackedObject]] = defaultdict(list)
+ ofs: Dict[bytes, int] = {}
+ todo = set(shas)
+ for unpacked in self.iter_unpacked(include_comp=include_comp):
+ sha = unpacked.sha()
+ ofs[unpacked.offset] = sha
+ hexsha = sha_to_hex(sha)
+ if hexsha in todo:
+ if unpacked.pack_type_num == OFS_DELTA:
+ assert isinstance(unpacked.delta_base, int)
+ base_offset = unpacked.offset - unpacked.delta_base
+ try:
+ unpacked.delta_base = ofs[base_offset]
+ except KeyError:
+ ofs_pending[base_offset].append(unpacked)
+ continue
+ else:
+ unpacked.pack_type_num = REF_DELTA
+ yield unpacked
+ todo.remove(hexsha)
+ for child in ofs_pending.pop(unpacked.offset, []):
+ child.pack_type_num = REF_DELTA
+ child.delta_base = sha
+ yield child
+ assert not ofs_pending
+ if not allow_missing and todo:
+ raise KeyError(todo.pop())
- return _PackTupleIterable(self.iterobjects, len(self))
+ def iter_unpacked(self, include_comp=False):
+ ofs_to_entries = {ofs: (sha, crc32) for (sha, ofs, crc32) in self.index.iterentries()}
+ for unpacked in self.data.iter_unpacked(include_comp=include_comp):
+ (sha, crc32) = ofs_to_entries[unpacked.offset]
+ unpacked._sha = sha
+ unpacked.crc32 = crc32
+ yield unpacked
- def keep(self, msg=None):
+ def keep(self, msg: Optional[bytes] = None) -> str:
"""Add a .keep file for the pack, preventing git from garbage collecting it.
Args:
keepfile.write(b"\n")
return keepfile_name
- def get_ref(self, sha) -> Tuple[int, int, UnpackedObject]:
+ def get_ref(self, sha: bytes) -> Tuple[Optional[int], int, OldUnpackedObject]:
"""Get the object for a ref SHA, only looking in this pack."""
# TODO: cache these results
try:
- offset = self.index.object_index(sha)
+ offset = self.index.object_offset(sha)
except KeyError:
offset = None
if offset:
raise KeyError(sha)
return offset, type, obj
- def resolve_object(self, offset, type, obj, get_ref=None):
+ def resolve_object(self, offset: int, type: int, obj, get_ref=None) -> Tuple[int, Iterable[bytes]]:
"""Resolve an object, possibly resolving deltas when necessary.
Returns: Tuple with object type and contents.
self.data._offset_cache[prev_offset] = base_type, chunks
return base_type, chunks
- def entries(self, progress=None):
+ def entries(self, progress: Optional[ProgressFn] = None) -> Iterator[PackIndexEntry]:
"""Yield entries summarizing the contents of this pack.
Args:
return self.data.iterentries(
progress=progress, resolve_ext_ref=self.resolve_ext_ref)
- def sorted_entries(self, progress=None):
+ def sorted_entries(self, progress: Optional[ProgressFn] = None) -> Iterator[PackIndexEntry]:
"""Return entries in this pack, sorted by SHA.
Args:
return self.data.sorted_entries(
progress=progress, resolve_ext_ref=self.resolve_ext_ref)
+ def get_unpacked_object(self, sha: bytes, *, include_comp: bool = False, convert_ofs_delta: bool = True) -> UnpackedObject:
+ """Get the unpacked object for a sha.
+ Args:
+ sha: SHA of object to fetch
+ include_comp: Whether to include compression data in UnpackedObject
+ """
+ offset = self.index.object_offset(sha)
+ unpacked = self.data.get_unpacked_object_at(offset, include_comp=include_comp)
+ if unpacked.pack_type_num == OFS_DELTA and convert_ofs_delta:
+ assert isinstance(unpacked.delta_base, int)
+ unpacked.delta_base = self.index.object_sha1(offset - unpacked.delta_base)
+ unpacked.pack_type_num = REF_DELTA
+ return unpacked
+
+
try:
from dulwich._pack import ( # type: ignore # noqa: F811
apply_delta,
blob - 1bccd43efce625263be54fbeea89023a04055fa8
blob + daf6b6297fe85abdfeab3a58f4592559aa8b2514
--- dulwich/porcelain.py
+++ dulwich/porcelain.py
)
from dulwich.pack import (
write_pack_index,
- write_pack_objects,
+ write_pack_from_container,
)
from dulwich.patch import write_tree_diff
from dulwich.protocol import (
"""
with open_repo_closing(repo) as r:
r.object_store.pack_loose_objects()
-
-
-def find_pack_for_reuse(repo):
- reuse_pack = None
- max_pack_len = 0
- # The pack file which contains the largest number of objects
- # will be most suitable for object reuse.
- for p in repo.object_store.packs:
- if len(p) > max_pack_len:
- reuse_pack = p
- max_pack_len = len(reuse_pack)
- return reuse_pack
def pack_objects(repo, object_ids, packf, idxf, delta_window_size=None, deltify=None, reuse_deltas=True):
reuse_deltas: Allow reuse of existing deltas while deltifying
"""
with open_repo_closing(repo) as r:
- reuse_pack = None
- if deltify and reuse_deltas:
- reuse_pack = find_pack_for_reuse(r)
- entries, data_sum = write_pack_objects(
+ entries, data_sum = write_pack_from_container(
packf.write,
- r.object_store.iter_shas((oid, None) for oid in object_ids),
+ r.object_store,
+ [(oid, None) for oid in object_ids],
deltify=deltify,
delta_window_size=delta_window_size,
- reuse_pack=reuse_pack
+ reuse_deltas=reuse_deltas,
)
if idxf is not None:
entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
blob - 23912c85cfb8baf084b62829820e7667a99c43ce
blob + 3cd323f5a44bb429b2c8fd3d7c13d18c6f61d887
--- dulwich/repo.py
+++ dulwich/repo.py
Callable,
Tuple,
TYPE_CHECKING,
+ FrozenSet,
List,
Dict,
Union,
from dulwich.object_store import (
DiskObjectStore,
MemoryObjectStore,
- BaseObjectStore,
+ MissingObjectFinder,
+ PackBasedObjectStore,
ObjectStoreGraphWalker,
peel_sha,
- MissingObjectFinder,
)
from dulwich.objects import (
check_hexsha,
ObjectID,
)
from dulwich.pack import (
- pack_objects_to_data,
+ generate_unpacked_objects
)
from dulwich.hooks import (
repository
"""
- def __init__(self, object_store: BaseObjectStore, refs: RefsContainer):
+ def __init__(self, object_store: PackBasedObjectStore, refs: RefsContainer):
"""Open a repository.
This shouldn't be called directly, but rather through one of the
depth: Shallow fetch depth
Returns: count and iterator over pack data
"""
- # TODO(jelmer): Fetch pack data directly, don't create objects first.
- objects = self.fetch_objects(
+ missing_objects = self.find_missing_objects(
determine_wants, graph_walker, progress, get_tagged, depth=depth
)
- return pack_objects_to_data(objects)
+ remote_has = missing_objects.get_remote_has()
+ object_ids = list(missing_objects)
+ return len(object_ids), generate_unpacked_objects(
+ self.object_store, object_ids, progress=progress,
+ other_haves=remote_has)
- def fetch_objects(
+ def find_missing_objects(
self,
determine_wants,
graph_walker,
progress,
get_tagged=None,
depth=None,
- ):
+ ) -> Optional[MissingObjectFinder]:
"""Fetch the missing objects required for a set of revisions.
Args:
if not isinstance(wants, list):
raise TypeError("determine_wants() did not return a list")
- shallows = getattr(graph_walker, "shallow", frozenset())
- unshallows = getattr(graph_walker, "unshallow", frozenset())
+ shallows: FrozenSet[ObjectID] = getattr(graph_walker, "shallow", frozenset())
+ unshallows: FrozenSet[ObjectID] = getattr(graph_walker, "unshallow", frozenset())
if wants == []:
# TODO(dborowitz): find a way to short-circuit that doesn't change
# Do not send a pack in shallow short-circuit path
return None
- return []
+ class DummyMissingObjectFinder:
+
+ def get_remote_has(self):
+ return None
+
+ def __len__(self):
+ return 0
+ def __iter__(self):
+ yield from []
+
+ return DummyMissingObjectFinder() # type: ignore
+
# If the graph walker is set up with an implementation that can
# ACK/NAK to the wire, it will write data to the client through
# this call as a side-effect.
def get_parents(commit):
return parents_provider.get_parents(commit.id, commit)
- return self.object_store.iter_shas(
- MissingObjectFinder(
- self.object_store,
- haves=haves,
- wants=wants,
- shallow=self.get_shallow(),
- progress=progress,
- get_tagged=get_tagged,
- get_parents=get_parents,
- )
- )
+ return MissingObjectFinder(
+ self.object_store,
+ haves=haves,
+ wants=wants,
+ shallow=self.get_shallow(),
+ progress=progress,
+ get_tagged=get_tagged,
+ get_parents=get_parents)
def generate_pack_data(self, have: List[ObjectID], want: List[ObjectID],
progress: Optional[Callable[[str], None]] = None,
def __init__(
self,
root: str,
- object_store: Optional[BaseObjectStore] = None,
+ object_store: Optional[PackBasedObjectStore] = None,
bare: Optional[bool] = None
) -> None:
self.symlink_fn = None
for fs_path in fs_paths:
tree_path = _fs_to_tree_path(fs_path)
try:
- tree_entry = self.object_store[tree_id].lookup_path(
+ tree = self.object_store[tree_id]
+ assert isinstance(tree, Tree)
+ tree_entry = tree.lookup_path(
self.object_store.__getitem__, tree_path)
except KeyError:
# if tree_entry didn't exist, this file was being added, so
blob - 53376d3a5859dbed1a819b3d1f69ec089c1d1b01
blob + 9b045eeddbada1386098f6c3630b629a693cb6ff
--- dulwich/server.py
+++ dulwich/server.py
# server.py -- Implementation of the server side git protocols
# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
-# Coprygith (C) 2011-2012 Jelmer Vernooij <jelmer@jelmer.uk>
+# Copyright(C) 2011-2012 Jelmer Vernooij <jelmer@jelmer.uk>
#
# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
# General Public License as public by the Free Software Foundation; version 2.0
"""
import collections
+from functools import partial
import os
import socket
import sys
import time
from typing import List, Tuple, Dict, Optional, Iterable, Set
+
+try:
+ from typing import Protocol as TypingProtocol
+except ImportError: # python < 3.8
+ from typing_extensions import Protocol as TypingProtocol # type: ignore
+
import zlib
import socketserver
from dulwich import log_utils
from dulwich.objects import (
Commit,
+ ObjectID,
valid_hexsha,
)
from dulwich.object_store import (
peel_sha,
)
from dulwich.pack import (
- write_pack_objects,
+ write_pack_from_container,
ObjectContainer,
+ PackedObjectContainer,
)
from dulwich.protocol import (
BufferedPktLineWriter,
NAK_LINE,
)
from dulwich.refs import (
+ RefsContainer,
ANNOTATED_TAG_SUFFIX,
write_info_refs,
)
raise NotImplementedError(self.open_repository)
-class BackendRepo:
+class BackendRepo(TypingProtocol):
"""Repository abstraction used by the Git server.
The methods required here are a subset of those provided by
dulwich.repo.Repo.
"""
- object_store = None
- refs = None
+ object_store: PackedObjectContainer
+ refs: RefsContainer
def get_refs(self) -> Dict[bytes, bytes]:
"""
"""
return None
- def fetch_objects(self, determine_wants, graph_walker, progress, get_tagged=None):
+ def find_missing_objects(self, determine_wants, graph_walker, progress, get_tagged=None):
"""
Yield the objects required for a list of commits.
CAPABILITY_OFS_DELTA,
)
- def progress(self, message):
- if self.has_capability(CAPABILITY_NO_PROGRESS) or self._processing_have_lines:
- return
- self.proto.write_sideband(SIDE_BAND_CHANNEL_PROGRESS, message)
+ def progress(self, message: bytes):
+ pass
- def get_tagged(self, refs=None, repo=None):
+ def _start_pack_send_phase(self):
+ if self.has_capability(CAPABILITY_SIDE_BAND_64K):
+ # The provided haves are processed, and it is safe to send side-
+ # band data now.
+ if not self.has_capability(CAPABILITY_NO_PROGRESS):
+ self.progress = partial(self.proto.write_sideband, SIDE_BAND_CHANNEL_PROGRESS)
+
+ self.write_pack_data = partial(self.proto.write_sideband, SIDE_BAND_CHANNEL_DATA)
+ else:
+ self.write_pack_data = self.proto.write
+
+ def get_tagged(self, refs=None, repo=None) -> Dict[ObjectID, ObjectID]:
"""Get a dict of peeled values of tags to their original tag shas.
Args:
# TODO: fix behavior when missing
return {}
# TODO(jelmer): Integrate this with the refs logic in
- # Repo.fetch_objects
+ # Repo.find_missing_objects
tagged = {}
for name, sha in refs.items():
peeled_sha = repo.get_peeled(name)
return tagged
def handle(self):
- def write(x):
- return self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
+ # Note the fact that client is only processing responses related
+ # to the have lines it sent, and any other data (including side-
+ # band) will be be considered a fatal error.
+ self._processing_have_lines = True
graph_walker = _ProtocolGraphWalker(
self,
wants.extend(graph_walker.determine_wants(refs, **kwargs))
return wants
- objects_iter = self.repo.fetch_objects(
+ missing_objects = self.repo.find_missing_objects(
wants_wrapper,
graph_walker,
self.progress,
get_tagged=self.get_tagged,
)
- # Note the fact that client is only processing responses related
- # to the have lines it sent, and any other data (including side-
- # band) will be be considered a fatal error.
- self._processing_have_lines = True
+ object_ids = list(missing_objects)
# Did the process short-circuit (e.g. in a stateless RPC call)? Note
# that the client still expects a 0-object pack in most cases.
if len(wants) == 0:
return
- # The provided haves are processed, and it is safe to send side-
- # band data now.
- self._processing_have_lines = False
-
if not graph_walker.handle_done(
not self.has_capability(CAPABILITY_NO_DONE), self._done_received
):
return
+ self._start_pack_send_phase()
self.progress(
- ("counting objects: %d, done.\n" % len(objects_iter)).encode("ascii")
+ ("counting objects: %d, done.\n" % len(object_ids)).encode("ascii")
)
- write_pack_objects(write, objects_iter)
+
+ write_pack_from_container(self.write_pack_data, self.repo.object_store, object_ids)
# we are done
self.proto.write_pkt_line(None)
peeled_sha = self.get_peeled(ref)
except KeyError:
# Skip refs that are inaccessible
- # TODO(jelmer): Integrate with Repo.fetch_objects refs
+ # TODO(jelmer): Integrate with Repo.find_missing_objects refs
# logic.
continue
if i == 0:
value = str(value).encode("ascii")
self.proto.unread_pkt_line(command + b" " + value)
+ def nak(self):
+ pass
+
def ack(self, have_ref):
if len(have_ref) != 40:
raise ValueError("invalid sha %r" % have_ref)
blob - 0c1d038f4acbc78b115600d8a518d6655d5ad441
blob + 05aa84ec2945c2580641492f4d5c20f171b91900
--- dulwich/tests/compat/test_client.py
+++ dulwich/tests/compat/test_client.py
sendrefs[b"refs/heads/abranch"] = b"00" * 20
del sendrefs[b"HEAD"]
- def gen_pack(have, want, ofs_delta=False):
+ def gen_pack(have, want, ofs_delta=False, progress=None):
return 0, []
c = self._client()
dest.refs[b"refs/heads/abranch"] = dummy_commit
sendrefs = {b"refs/heads/bbranch": dummy_commit}
- def gen_pack(have, want, ofs_delta=False):
+ def gen_pack(have, want, ofs_delta=False, progress=None):
return 0, []
c = self._client()
blob - 6f95ea3ac043dac8420bb5f86d2e384986665234
blob + 73c0a15a6370a40cddb1848ceb753bc42ad48a0a
--- dulwich/tests/compat/test_pack.py
+++ dulwich/tests/compat/test_pack.py
orig_blob = orig_pack[a_sha]
new_blob = Blob()
new_blob.data = orig_blob.data + b"x"
- all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None)]
+ all_to_pack = [(o, None) for o in orig_pack.iterobjects()] + [(new_blob, None)]
pack_path = os.path.join(self._tempdir, "pack_with_deltas")
write_pack(pack_path, all_to_pack, deltify=True)
output = run_git_or_fail(["verify-pack", "-v", pack_path])
(new_blob, None),
(new_blob_2, None),
]
- pack_path = os.path.join(self._tempdir, "pack_with_deltas")
- write_pack(pack_path, all_to_pack, deltify=True)
+ pack_path = os.path.join(self._tempdir, "pack_with_deltas")
+ write_pack(pack_path, all_to_pack, deltify=True)
output = run_git_or_fail(["verify-pack", "-v", pack_path])
self.assertEqual(
{x[0].id for x in all_to_pack},
(new_blob, None),
(new_blob_2, None),
]
- pack_path = os.path.join(self._tempdir, "pack_with_deltas")
- write_pack(pack_path, all_to_pack, deltify=True)
+ pack_path = os.path.join(self._tempdir, "pack_with_deltas")
+ write_pack(pack_path, all_to_pack, deltify=True)
output = run_git_or_fail(["verify-pack", "-v", pack_path])
self.assertEqual(
{x[0].id for x in all_to_pack},
blob - 7a9f38af1ca37da13ea42a3496a3c23cc6106233
blob + 34edefbf4fb33d5c445db4648b5bf140782b8834
--- dulwich/tests/test_bundle.py
+++ dulwich/tests/test_bundle.py
"""Tests for bundle support."""
+from io import BytesIO
import os
import tempfile
read_bundle,
write_bundle,
)
+from dulwich.pack import (
+ PackData,
+ write_pack_objects,
+)
class BundleTests(TestCase):
origbundle.capabilities = {"foo": None}
origbundle.references = {b"refs/heads/master": b"ab" * 20}
origbundle.prerequisites = [(b"cc" * 20, "comment")]
+ b = BytesIO()
+ write_pack_objects(b.write, [])
+ b.seek(0)
+ origbundle.pack_data = PackData.from_file(b)
with tempfile.TemporaryDirectory() as td:
with open(os.path.join(td, "foo"), "wb") as f:
write_bundle(f, origbundle)
blob - 737ea11c93dbafa61f707f0f8844192c68567a36
blob + dacf1b4735487a03ffcf42319e3d5bb3e1993787
--- dulwich/tests/test_client.py
+++ dulwich/tests/test_client.py
self.assertEqual({}, ret.symrefs)
self.assertEqual(self.rout.getvalue(), b"0000")
- def test_send_pack_no_sideband64k_with_update_ref_error(self):
+ def test_send_pack_no_sideband64k_with_update_ref_error(self) -> None:
# No side-bank-64k reported by server shouldn't try to parse
# side band data
pkts = [
b"refs/foo/bar": commit.id,
}
- def generate_pack_data(have, want, ofs_delta=False):
+ def generate_pack_data(have, want, ofs_delta=False, progress=None):
return pack_objects_to_data(
[
(commit, None),
- (tree, ""),
+ (tree, b""),
]
)
def update_refs(refs):
return {b"refs/heads/master": b"310ca9477129b8586fa2afc779c1f57cf64bba6c"}
- def generate_pack_data(have, want, ofs_delta=False):
+ def generate_pack_data(have, want, ofs_delta=False, progress=None):
return 0, []
self.client.send_pack(b"/", update_refs, generate_pack_data)
def update_refs(refs):
return {b"refs/heads/master": b"0" * 40}
- def generate_pack_data(have, want, ofs_delta=False):
+ def generate_pack_data(have, want, ofs_delta=False, progress=None):
return 0, []
self.client.send_pack(b"/", update_refs, generate_pack_data)
def update_refs(refs):
return {b"refs/heads/master": b"0" * 40}
- def generate_pack_data(have, want, ofs_delta=False):
+ def generate_pack_data(have, want, ofs_delta=False, progress=None):
return 0, []
self.client.send_pack(b"/", update_refs, generate_pack_data)
b"refs/heads/master": b"310ca9477129b8586fa2afc779c1f57cf64bba6c",
}
- def generate_pack_data(have, want, ofs_delta=False):
+ def generate_pack_data(have, want, ofs_delta=False, progress=None):
return 0, []
f = BytesIO()
- write_pack_objects(f.write, {})
+ write_pack_objects(f.write, [])
self.client.send_pack("/", update_refs, generate_pack_data)
self.assertEqual(
self.rout.getvalue(),
b"refs/heads/master": b"310ca9477129b8586fa2afc779c1f57cf64bba6c",
}
- def generate_pack_data(have, want, ofs_delta=False):
+ def generate_pack_data(have, want, ofs_delta=False, progress=None):
return pack_objects_to_data(
[
(commit, None),
)
f = BytesIO()
- write_pack_data(f.write, *generate_pack_data(None, None))
+ count, records = generate_pack_data(None, None)
+ write_pack_data(f.write, records, num_records=count)
self.client.send_pack(b"/", update_refs, generate_pack_data)
self.assertEqual(
self.rout.getvalue(),
def update_refs(refs):
return {b"refs/heads/master": b"0" * 40}
- def generate_pack_data(have, want, ofs_delta=False):
+ def generate_pack_data(have, want, ofs_delta=False, progress=None):
return 0, []
result = self.client.send_pack(b"/", update_refs, generate_pack_data)
def test_fetch_into_empty(self):
c = LocalGitClient()
- t = MemoryRepo()
+ target = tempfile.mkdtemp()
+ self.addCleanup(shutil.rmtree, target)
+ t = Repo.init_bare(target)
s = open_repo("a.git")
self.addCleanup(tear_down_repo, s)
self.assertEqual(s.get_refs(), c.fetch(s.path, t).refs)
blob - 29ff7febf4ee0312c21ad3e3b41c4127c8b225b9
blob + 7f956d5785e08abe9fa3bf809e8a671d51bbe110
--- dulwich/tests/test_greenthreads.py
+++ dulwich/tests/test_greenthreads.py
)
from dulwich.object_store import (
MemoryObjectStore,
- MissingObjectFinder,
)
from dulwich.objects import (
Commit,
if gevent_support:
from dulwich.greenthreads import (
- GreenThreadsObjectStoreIterator,
GreenThreadsMissingObjectFinder,
)
@skipIf(not gevent_support, skipmsg)
-class TestGreenThreadsObjectStoreIterator(TestCase):
- def setUp(self):
- super().setUp()
- self.store = MemoryObjectStore()
- self.cmt_amount = 10
- self.objs = init_store(self.store, self.cmt_amount)
-
- def test_len(self):
- wants = [sha.id for sha in self.objs if isinstance(sha, Commit)]
- finder = MissingObjectFinder(self.store, (), wants)
- iterator = GreenThreadsObjectStoreIterator(
- self.store, iter(finder.next, None), finder
- )
- # One commit refers one tree and one blob
- self.assertEqual(len(iterator), self.cmt_amount * 3)
- haves = wants[0 : self.cmt_amount - 1]
- finder = MissingObjectFinder(self.store, haves, wants)
- iterator = GreenThreadsObjectStoreIterator(
- self.store, iter(finder.next, None), finder
- )
- self.assertEqual(len(iterator), 3)
-
- def test_iter(self):
- wants = [sha.id for sha in self.objs if isinstance(sha, Commit)]
- finder = MissingObjectFinder(self.store, (), wants)
- iterator = GreenThreadsObjectStoreIterator(
- self.store, iter(finder.next, None), finder
- )
- objs = []
- for sha, path in iterator:
- self.assertIn(sha, self.objs)
- objs.append(sha)
- self.assertEqual(len(objs), len(self.objs))
-
-
-@skipIf(not gevent_support, skipmsg)
class TestGreenThreadsMissingObjectFinder(TestCase):
def setUp(self):
super().setUp()
blob - e4f8c537dfcdf5f82ca0d9786cc77897e55fda70
blob + c03a11956bdd8f3a1c4abc67ae7c3fecdc52bd81
--- dulwich/tests/test_object_store.py
+++ dulwich/tests/test_object_store.py
[TreeEntry(p, m, h) for (p, h, m) in blobs],
list(iter_tree_contents(self.store, tree_id)),
)
+ self.assertEqual([], list(iter_tree_contents(self.store, None)))
def test_iter_tree_contents_include_trees(self):
blob_a = make_object(Blob, data=b"a")
def test_add_pack_emtpy(self):
o = MemoryObjectStore()
f, commit, abort = o.add_pack()
- commit()
+ self.assertRaises(AssertionError, commit)
def test_add_thin_pack(self):
o = MemoryObjectStore()
blob - 15f55785c88833e2ebd8acf9446fb6590f41f147
blob + be3579e940386a0d850b3d544bde8672e03471df
--- dulwich/tests/test_pack.py
+++ dulwich/tests/test_pack.py
class PackIndexTests(PackTests):
"""Class that tests the index of packfiles"""
- def test_object_index(self):
+ def test_object_offset(self):
"""Tests that the correct object offset is returned from the index."""
p = self.get_pack_index(pack1_sha)
- self.assertRaises(KeyError, p.object_index, pack1_sha)
- self.assertEqual(p.object_index(a_sha), 178)
- self.assertEqual(p.object_index(tree_sha), 138)
- self.assertEqual(p.object_index(commit_sha), 12)
+ self.assertRaises(KeyError, p.object_offset, pack1_sha)
+ self.assertEqual(p.object_offset(a_sha), 178)
+ self.assertEqual(p.object_offset(tree_sha), 138)
+ self.assertEqual(p.object_offset(commit_sha), 12)
def test_object_sha1(self):
"""Tests that the correct object offset is returned from the index."""
with self.get_pack_data(pack1_sha) as p:
self.assertSucceeds(p.check)
- def test_iterobjects(self):
+ def test_iter_unpacked(self):
with self.get_pack_data(pack1_sha) as p:
commit_data = (
b"tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n"
)
blob_sha = b"6f670c0fb53f9463760b7295fbb814e965fb20c8"
tree_data = b"100644 a\0" + hex_to_sha(blob_sha)
- actual = []
- for offset, type_num, chunks, crc32 in p.iterobjects():
- actual.append((offset, type_num, b"".join(chunks), crc32))
+ actual = list(p.iter_unpacked())
self.assertEqual(
[
- (12, 1, commit_data, 3775879613),
- (138, 2, tree_data, 912998690),
- (178, 3, b"test 1\n", 1373561701),
+ UnpackedObject(offset=12, pack_type_num=1, decomp_chunks=[commit_data], crc32=None),
+ UnpackedObject(offset=138, pack_type_num=2, decomp_chunks=[tree_data], crc32=None),
+ UnpackedObject(offset=178, pack_type_num=3, decomp_chunks=[b"test 1\n"], crc32=None),
],
actual,
)
with self.make_pack(True) as p:
self.assertEqual((3, b"foo1234"), p.get_raw(self.blobs[b"foo1234"].id))
- def test_get_raw_unresolved(self):
+ def test_get_unpacked_object(self):
+ self.maxDiff = None
with self.make_pack(False) as p:
- self.assertEqual(
- (
- 7,
- b"\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c",
- [b"x\x9ccf\x9f\xc0\xccbhdl\x02\x00\x06f\x01l"],
- ),
- p.get_raw_unresolved(self.blobs[b"foo1234"].id),
+ expected = UnpackedObject(
+ 7,
+ delta_base=b"\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c",
+ decomp_chunks=[b'\x03\x07\x90\x03\x041234'],
)
+ expected.offset = 12
+ got = p.get_unpacked_object(self.blobs[b"foo1234"].id)
+ self.assertEqual(expected, got)
with self.make_pack(True) as p:
+ expected = UnpackedObject(
+ 7,
+ delta_base=b"\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c",
+ decomp_chunks=[b'\x03\x07\x90\x03\x041234'],
+ )
+ expected.offset = 12
+ got = p.get_unpacked_object(self.blobs[b"foo1234"].id)
self.assertEqual(
- (
- 7,
- b"\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c",
- [b"x\x9ccf\x9f\xc0\xccbhdl\x02\x00\x06f\x01l"],
- ),
- p.get_raw_unresolved(self.blobs[b"foo1234"].id),
+ expected,
+ got,
)
def test_iterobjects(self):
def setUp(self):
super().setUp()
self.read = BytesIO(self.comp + self.extra).read
- self.unpacked = UnpackedObject(Tree.type_num, None, len(self.decomp), 0)
+ self.unpacked = UnpackedObject(Tree.type_num, decomp_len=len(self.decomp), crc32=0)
def test_decompress_size(self):
good_decomp_len = len(self.decomp)
self.assertRaises(zlib.error, read_zlib_chunks, read, self.unpacked)
def test_decompress_empty(self):
- unpacked = UnpackedObject(Tree.type_num, None, 0, None)
+ unpacked = UnpackedObject(Tree.type_num, decomp_len=0)
comp = zlib.compress(b"")
read = BytesIO(comp + self.extra).read
unused = read_zlib_chunks(read, unpacked)
def test_single(self):
b = Blob.from_string(b"foo")
self.assertEqual(
- [(b.type_num, b.sha().digest(), None, b.as_raw_chunks())],
+ [UnpackedObject(b.type_num, sha=b.sha().digest(), delta_base=None, decomp_chunks=b.as_raw_chunks())],
list(deltify_pack_objects([(b, b"")])),
)
delta = list(create_delta(b1.as_raw_chunks(), b2.as_raw_chunks()))
self.assertEqual(
[
- (b1.type_num, b1.sha().digest(), None, b1.as_raw_chunks()),
- (b2.type_num, b2.sha().digest(), b1.sha().digest(), delta),
+ UnpackedObject(b1.type_num, sha=b1.sha().digest(), delta_base=None, decomp_chunks=b1.as_raw_chunks()),
+ UnpackedObject(b2.type_num, sha=b2.sha().digest(), delta_base=b1.sha().digest(), decomp_chunks=delta),
],
list(deltify_pack_objects([(b1, b""), (b2, b"")])),
)
def test_read_objects_empty(self):
reader = PackStreamReader(BytesIO().read)
- self.assertEqual([], list(reader.read_objects()))
+ self.assertRaises(AssertionError, list, reader.read_objects())
class TestPackIterator(DeltaChainIterator):
resolve_ext_ref = thin and self.get_raw_no_repeat or None
data = PackData("test.pack", file=f)
return TestPackIterator.for_pack_data(data, resolve_ext_ref=resolve_ext_ref)
+
+ def make_pack_iter_subset(self, f, subset, thin=None):
+ if thin is None:
+ thin = bool(list(self.store))
+ resolve_ext_ref = thin and self.get_raw_no_repeat or None
+ data = PackData("test.pack", file=f)
+ assert data
+ index = MemoryPackIndex.for_pack(data)
+ pack = Pack.from_objects(data, index)
+ return TestPackIterator.for_pack_subset(pack, subset, resolve_ext_ref=resolve_ext_ref)
def assertEntriesMatch(self, expected_indexes, entries, pack_iter):
expected = [entries[i] for i in expected_indexes]
],
)
self.assertEntriesMatch([0, 1, 2], entries, self.make_pack_iter(f))
+ f.seek(0)
+ self.assertEntriesMatch([], entries, self.make_pack_iter_subset(f, []))
+ f.seek(0)
+ self.assertEntriesMatch([1, 0], entries, self.make_pack_iter_subset(f, [entries[0][3], entries[1][3]]))
def test_ofs_deltas(self):
f = BytesIO()
)
# Delta resolution changed to DFS
self.assertEntriesMatch([0, 2, 1], entries, self.make_pack_iter(f))
+ f.seek(0)
+ self.assertEntriesMatch(
+ [0, 2, 1], entries,
+ self.make_pack_iter_subset(f, [entries[1][3], entries[2][3]]))
def test_ofs_deltas_chain(self):
f = BytesIO()
blob - bb1186550917231d4ed89c0da475d34a5e9dc5bd
blob + 3ac257e3c14c50943354f8c2bfc78f4804baf370
--- dulwich/tests/test_repository.py
+++ dulwich/tests/test_repository.py
This test demonstrates that ``find_common_revisions()`` actually
returns common heads, not revisions; dulwich already uses
``find_common_revisions()`` in such a manner (see
- ``Repo.fetch_objects()``).
+ ``Repo.find_objects()``).
"""
expected_shas = {b"60dacdc733de308bb77bb76ce0fb0f9b44c9769e"}
blob - c48890e38d6ddd98cb8c63122181a435a7554236
blob + 29802c6ae8956a97aa3b16e691df9d0d8edc4eaf
--- dulwich/tests/test_server.py
+++ dulwich/tests/test_server.py
class UploadPackHandlerTestCase(TestCase):
def setUp(self):
super().setUp()
- self._repo = MemoryRepo.init_bare([], {})
+ self.path = tempfile.mkdtemp()
+ self.addCleanup(shutil.rmtree, self.path)
+ self.repo = Repo.init(self.path)
+ self._repo = Repo.init_bare(self.path)
backend = DictBackend({b"/": self._repo})
self._handler = UploadPackHandler(
backend, [b"/", b"host=lolcathost"], TestProto()
def test_progress(self):
caps = self._handler.required_capabilities()
self._handler.set_client_capabilities(caps)
+ self._handler._start_pack_send_phase()
self._handler.progress(b"first message")
self._handler.progress(b"second message")
self.assertEqual(b"first message", self._handler.proto.get_received_line(2))
}
# repo needs to peel this object
self._repo.object_store.add_object(make_commit(id=FOUR))
- self._repo.refs._update(refs)
+ for name, sha in refs.items():
+ self._repo.refs[name] = sha
peeled = {
b"refs/tags/tag1": b"1234" * 10,
b"refs/tags/tag2": b"5678" * 10,
}
- self._repo.refs._update_peeled(peeled)
+ self._repo.refs._peeled_refs = peeled
+ self._repo.refs.add_packed_refs(refs)
caps = list(self._handler.required_capabilities()) + [b"include-tag"]
self._handler.set_client_capabilities(caps)
tree = Tree()
self._repo.object_store.add_object(tree)
self._repo.object_store.add_object(make_commit(id=ONE, tree=tree))
- self._repo.refs._update(refs)
+ for name, sha in refs.items():
+ self._repo.refs[name] = sha
self._handler.proto.set_output(
[
b"want " + ONE + b" side-band-64k thin-pack ofs-delta",
tree = Tree()
self._repo.object_store.add_object(tree)
self._repo.object_store.add_object(make_commit(id=ONE, tree=tree))
- self._repo.refs._update(refs)
+ for ref, sha in refs.items():
+ self._repo.refs[ref] = sha
self._handler.proto.set_output([None])
self._handler.handle()
# The server should not send a pack, since the client didn't ask for
blob - a80c6ec0c298a1a6551ed6777979566755518e12
blob + 7c72cc5921d925602cf52e7d75fd331cd1239e51
--- dulwich/walk.py
+++ dulwich/walk.py
import collections
import heapq
from itertools import chain
-from typing import List, Tuple, Set, Deque, Literal, Optional
+from typing import List, Tuple, Set, Deque, Optional
from dulwich.diff_tree import (
RENAME_CHANGE_TYPES,
store,
include: List[bytes],
exclude: Optional[List[bytes]] = None,
- order: Literal["date", "topo"] = 'date',
+ order: str = 'date',
reverse: bool = False,
max_entries: Optional[int] = None,
paths: Optional[List[bytes]] = None,
blob - ad428d2e55d87571932b8d89cd67348112e4edd5
blob + d539385f1a2425cdd47129a6a81b8e8fac8495e3
--- setup.cfg
+++ setup.cfg
include_package_data = True
install_requires =
urllib3>=1.25
+ typing_extensions;python_version<="3.7"
zip_safe = False
scripts =
bin/dul-receive-pack