commit c1e70fdc0b124da0c6992503d595fb0e2217ff93 from: Jelmer Vernooij date: Sun Jan 15 20:31:42 2023 UTC pass along progress function in a few more cases commit - b94072aa350e688927640d587bd7148d3edda540 commit + c1e70fdc0b124da0c6992503d595fb0e2217ff93 blob - a0c511915329920dd6480206bf2efa55bb3756ea blob + 02ca79adbc6f7f59941548d53a5a530b17806e04 --- dulwich/client.py +++ dulwich/client.py @@ -812,15 +812,13 @@ class GitClient: if determine_wants is None: determine_wants = target.object_store.determine_wants_all if CAPABILITY_THIN_PACK in self._fetch_capabilities: - # TODO(jelmer): Avoid reading entire file into memory and - # only processing it after the whole file has been fetched. from tempfile import SpooledTemporaryFile f: IO[bytes] = SpooledTemporaryFile() def commit(): if f.tell(): f.seek(0) - target.object_store.add_thin_pack(f.read, None) + target.object_store.add_thin_pack(f.read, None, progress=progress) f.close() def abort(): blob - 9201aefc4bdd2ebbc00dd0cc2a47bad55083510c blob + a55f157decb93b8afadb4d07fcfac1d2495b2d76 --- dulwich/object_store.py +++ dulwich/object_store.py @@ -804,7 +804,7 @@ class DiskObjectStore(PackBasedObjectStore): suffix = suffix.decode("ascii") return os.path.join(self.pack_dir, "pack-" + suffix) - def _complete_thin_pack(self, f, path, copier, indexer): + def _complete_thin_pack(self, f, path, copier, indexer, progress=None): """Move a specific file containing a pack into the pack directory. Note: The file should be on the same file system as the @@ -816,36 +816,49 @@ class DiskObjectStore(PackBasedObjectStore): copier: A PackStreamCopier to use for writing pack data. indexer: A PackIndexer for indexing the pack. """ - entries = list(indexer) - - # Update the header with the new number of objects. - f.seek(0) - write_pack_header(f.write, len(entries) + len(indexer.ext_refs())) + entries = [] + for i, entry in enumerate(indexer): + if progress is not None: + progress(("generating index: %d\r" % i).encode('ascii')) + entries.append(entry) - # Must flush before reading (http://bugs.python.org/issue3207) - f.flush() + ext_refs = indexer.ext_refs() + + if ext_refs: + # Update the header with the new number of objects. + f.seek(0) + write_pack_header(f.write, len(entries) + len(ext_refs)) - # Rescan the rest of the pack, computing the SHA with the new header. - new_sha = compute_file_sha(f, end_ofs=-20) + # Must flush before reading (http://bugs.python.org/issue3207) + f.flush() - # Must reposition before writing (http://bugs.python.org/issue3207) - f.seek(0, os.SEEK_CUR) + # Rescan the rest of the pack, computing the SHA with the new header. + new_sha = compute_file_sha(f, end_ofs=-20) - # Complete the pack. - for ext_sha in indexer.ext_refs(): - assert len(ext_sha) == 20 - type_num, data = self.get_raw(ext_sha) - offset = f.tell() - crc32 = write_pack_object( - f.write, - type_num, - data, - sha=new_sha, - compression_level=self.pack_compression_level, - ) - entries.append((ext_sha, offset, crc32)) - pack_sha = new_sha.digest() - f.write(pack_sha) + # Must reposition before writing (http://bugs.python.org/issue3207) + f.seek(0, os.SEEK_CUR) + + # Complete the pack. + for i, ext_sha in enumerate(ext_refs): + if progress is not None: + progress(("writing extra base objects: %d/%d\r" % (i, len(ext_refs))).encode("ascii")) + assert len(ext_sha) == 20 + type_num, data = self.get_raw(ext_sha) + offset = f.tell() + crc32 = write_pack_object( + f.write, + type_num, + data, + sha=new_sha, + compression_level=self.pack_compression_level, + ) + entries.append((ext_sha, offset, crc32)) + pack_sha = new_sha.digest() + f.write(pack_sha) + else: + f.seek(-20, os.SEEK_END) + pack_sha = f.read(20) + f.close() # Move the pack in. @@ -875,7 +888,7 @@ class DiskObjectStore(PackBasedObjectStore): self._add_cached_pack(pack_base_name, final_pack) return final_pack - def add_thin_pack(self, read_all, read_some): + def add_thin_pack(self, read_all, read_some, progress=None): """Add a new thin pack to this object store. Thin packs are packs that contain deltas with parents that exist @@ -897,8 +910,8 @@ class DiskObjectStore(PackBasedObjectStore): os.chmod(path, PACK_MODE) indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) - copier.verify() - return self._complete_thin_pack(f, path, copier, indexer) + copier.verify(progress=progress) + return self._complete_thin_pack(f, path, copier, indexer, progress=progress) def move_in_pack(self, path): """Move a specific file containing a pack into the pack directory. @@ -1076,7 +1089,7 @@ class MemoryObjectStore(BaseObjectStore): return f, commit, abort - def _complete_thin_pack(self, f, indexer): + def _complete_thin_pack(self, f, indexer, progress=None): """Complete a thin pack by adding external references. Args: @@ -1085,22 +1098,25 @@ class MemoryObjectStore(BaseObjectStore): """ entries = list(indexer) - # Update the header with the new number of objects. - f.seek(0) - write_pack_header(f.write, len(entries) + len(indexer.ext_refs())) + ext_refs = indexer.ext_refs() - # Rescan the rest of the pack, computing the SHA with the new header. - new_sha = compute_file_sha(f, end_ofs=-20) + if ext_refs: + # Update the header with the new number of objects. + f.seek(0) + write_pack_header(f.write, len(entries) + len(ext_refs)) - # Complete the pack. - for ext_sha in indexer.ext_refs(): - assert len(ext_sha) == 20 - type_num, data = self.get_raw(ext_sha) - write_pack_object(f.write, type_num, data, sha=new_sha) - pack_sha = new_sha.digest() - f.write(pack_sha) + # Rescan the rest of the pack, computing the SHA with the new header. + new_sha = compute_file_sha(f, end_ofs=-20) - def add_thin_pack(self, read_all, read_some): + # Complete the pack. + for ext_sha in indexer.ext_refs(): + assert len(ext_sha) == 20 + type_num, data = self.get_raw(ext_sha) + write_pack_object(f.write, type_num, data, sha=new_sha) + pack_sha = new_sha.digest() + f.write(pack_sha) + + def add_thin_pack(self, read_all, read_some, progress=None): """Add a new thin pack to this object store. Thin packs are packs that contain deltas with parents that exist @@ -1117,8 +1133,8 @@ class MemoryObjectStore(BaseObjectStore): try: indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) - copier.verify() - self._complete_thin_pack(f, indexer) + copier.verify(progress=progress) + self._complete_thin_pack(f, indexer, progress=progress) except BaseException: abort() raise blob - 9dcfa31e67776df61b6c2d09f37f968c35955084 blob + cf7cdd6f77a86dc371e7ef123ebb5ee7efc6032a --- dulwich/pack.py +++ dulwich/pack.py @@ -1069,18 +1069,19 @@ class PackStreamCopier(PackStreamReader): self.outfile.write(data) return data - def verify(self): + def verify(self, progress=None): """Verify a pack stream and write it to the output file. See PackStreamReader.iterobjects for a list of exceptions this may throw. """ - if self._delta_iter: - for unpacked in self.read_objects(): + for i, unpacked in enumerate(self.read_objects()): + if self._delta_iter: self._delta_iter.record(unpacked) - else: - for _ in self.read_objects(): - pass + if progress is not None: + progress(("copying pack entries: %d/%d\r" % (i, len(self))).encode('ascii')) + if progress is not None: + progress(("copied %d pack entries\n" % i).encode('ascii')) def obj_sha(type, chunks): @@ -2352,7 +2353,7 @@ class Pack: def check_length_and_checksum(self) -> None: """Sanity check the length and checksum of the pack index and data.""" - assert len(self.index) == len(self.data) + assert len(self.index) == len(self.data), f"Length mismatch: {len(self.index)} (index) != {len(self.data)} (data)" idx_stored_checksum = self.index.get_pack_checksum() data_stored_checksum = self.data.get_stored_checksum() if idx_stored_checksum != data_stored_checksum: