commit - 9f12bdfe7238464eeaf4ae1535c2d20a3b7b3b84
commit + 8ccaacf7a3bb2401fa09ec42bf513ed344c38e04
blob - e71c3e2b132e7b47fd7fe7043c02fad7492d4448
blob + 438c60c4bff448b4949c1c989f0e8f1f777289d1
--- NEWS
+++ NEWS
* Fix handling of symrefs with protocol v2.
(Jelmer Vernooij, #1389)
+
+ * Add ``ObjectStore.iter_prefix``. (Jelmer Vernooij)
0.22.3 2024-10-15
blob - a28e141764774f3ac0f23d144c49588a87891fdb
blob + 37deae8cc77160b4de42eb1d14e80a8da475ce37
--- dulwich/object_store.py
+++ dulwich/object_store.py
"""Git object store interfaces and implementation."""
+import binascii
import os
import stat
import sys
"""Close any files opened by this object store."""
# Default implementation is a NO-OP
+ def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
+ """Iterate over all SHA1s that start with a given prefix.
+ The default implementation is a naive iteration over all objects.
+ However, subclasses may override this method with more efficient
+ implementations.
+ """
+ for sha in self:
+ if sha.startswith(prefix):
+ yield sha
+
+
class PackBasedObjectStore(BaseObjectStore):
def __init__(self, pack_compression_level=-1) -> None:
self._pack_cache: Dict[str, Pack] = {}
os.mkdir(os.path.join(path, PACKDIR))
return cls(path)
+ def iter_prefix(self, prefix):
+ if len(prefix) < 2:
+ yield from super().iter_prefix(prefix)
+ return
+ seen = set()
+ dir = prefix[:2].decode()
+ rest = prefix[2:].decode()
+ for name in os.listdir(os.path.join(self.path, dir)):
+ if name.startswith(rest):
+ sha = os.fsencode(dir + name)
+ if sha not in seen:
+ seen.add(sha)
+ yield sha
+ for p in self.packs:
+ bin_prefix = (
+ binascii.unhexlify(prefix)
+ if len(prefix) % 2 == 0
+ else binascii.unhexlify(prefix[:-1])
+ )
+ for sha in p.index.iter_prefix(bin_prefix):
+ sha = sha_to_hex(sha)
+ if sha.startswith(prefix) and sha not in seen:
+ seen.add(sha)
+ yield sha
+ for alternate in self.alternates:
+ for sha in alternate.iter_prefix(prefix):
+ if sha not in seen:
+ seen.add(sha)
+ yield sha
+
+
class MemoryObjectStore(BaseObjectStore):
"""Object store that keeps all objects in memory."""
blob - edaa5a43487acf0952dc4fe1a80aa1bf3b6ce890
blob + c342379119bebd45eddb6fa77495b0fbf738d15d
--- dulwich/objectspec.py
+++ dulwich/objectspec.py
from typing import TYPE_CHECKING, Iterator, List, Optional, Tuple, Union
+from .objects import Commit, ShaFile, Tree
+
if TYPE_CHECKING:
- from .objects import Commit, ShaFile, Tree
from .refs import Ref, RefsContainer
from .repo import Repo
self.options = options
-def scan_for_short_id(object_store, prefix):
+def scan_for_short_id(object_store, prefix, tp):
"""Scan an object store for a short id."""
- # TODO(jelmer): This could short-circuit looking for objects
- # starting with a certain prefix.
ret = []
- for object_id in object_store:
- if object_id.startswith(prefix):
- ret.append(object_store[object_id])
+ for object_id in object_store.iter_prefix(prefix):
+ o = object_store[object_id]
+ if isinstance(o, tp):
+ ret.append(o)
if not ret:
raise KeyError(prefix)
if len(ret) == 1:
pass
else:
try:
- return scan_for_short_id(repo.object_store, committish)
+ return scan_for_short_id(repo.object_store, committish, Commit)
except KeyError:
pass
raise KeyError(committish)
blob - 988f7a9dfe3f6f60c119ef13436627f88deef18d
blob + e815f4f6285efa1425603d0c01b3d64f6f80826c
--- dulwich/pack.py
+++ dulwich/pack.py
if i is None:
raise KeyError(sha)
return self._unpack_offset(i)
+
+ def iter_prefix(self, prefix: bytes) -> Iterator[bytes]:
+ """Iterate over all SHA1s with the given prefix."""
+ start = ord(prefix[:1])
+ if start == 0:
+ start = 0
+ else:
+ start = self._fan_out_table[start - 1]
+ end = ord(prefix[:1]) + 1
+ if end == 0x100:
+ end = len(self)
+ else:
+ end = self._fan_out_table[end]
+ assert start <= end
+ started = False
+ for i in range(start, end):
+ name = self._unpack_name(i)
+ if name.startswith(prefix):
+ yield name
+ started = True
+ elif started:
+ break
class PackIndex1(FilePackIndex):
blob - 313be5d38b7bc35de706d798614e55a88d7e96f6
blob + aba93f4f9469844bbd81e40a6a74da1ab04576fb
--- dulwich/tests/test_object_store.py
+++ dulwich/tests/test_object_store.py
self.store.add_object(testobject)
self.store.close()
+ def test_iter_prefix(self):
+ self.store.add_object(testobject)
+ self.assertEqual([testobject.id], list(self.store.iter_prefix(testobject.id)))
+ self.assertEqual(
+ [testobject.id], list(self.store.iter_prefix(testobject.id[:10]))
+ )
+ self.assertEqual(
+ [testobject.id], list(self.store.iter_prefix(testobject.id[:4]))
+ )
+ self.assertEqual([testobject.id], list(self.store.iter_prefix(b"")))
+
class PackBasedObjectStoreTests(ObjectStoreTests):
def tearDown(self):
for pack in self.store.packs:
blob - 811e2e1c2d83f656a40945b5ec409ea43169c167
blob + ed1acbeba9b113d2dcd65475224f6acbf4a1ee82
--- tests/test_pack.py
+++ tests/test_pack.py
self.assertEqual(p.object_sha1(178), hex_to_sha(a_sha))
self.assertEqual(p.object_sha1(138), hex_to_sha(tree_sha))
self.assertEqual(p.object_sha1(12), hex_to_sha(commit_sha))
+
+ def test_iter_prefix(self):
+ p = self.get_pack_index(pack1_sha)
+ self.assertEqual([p.object_sha1(178)], list(p.iter_prefix(hex_to_sha(a_sha))))
+ self.assertEqual(
+ [p.object_sha1(178)], list(p.iter_prefix(hex_to_sha(a_sha)[:5]))
+ )
+ self.assertEqual(
+ [p.object_sha1(178)], list(p.iter_prefix(hex_to_sha(a_sha)[:2]))
+ )
def test_index_len(self):
p = self.get_pack_index(pack1_sha)