commit - 4f62e31e79cb1498c62ca45b0b9d9d72ef03a334
commit + fc5f2bb353e14a8a06d1cd6bda751d2e02ac8915
blob - f25600eaa4b77f39953dd90dd1e4ddecea853efa
blob + 30c91308f9baf38141fb6b5d5cb280aab835b020
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
elif len(missing_contents) > 0:
# If a subset of blobs is missing then a subset of trees are missing, too.
self.log.debug(
- "Searching for packed trees which are missing from the archive "
+ "Searching for packed trees which are missing from the archive..."
)
+ i = 0
+ last_p = -1
+ num_commits_total = len(root_tree_vertices)
for root_hash, tree_deps in iter_tree_deps(root_tree_vertices):
+ # Show some debug progress output for very large datasets
+ p = int(i * 100 / num_commits_total)
+ if p != last_p:
+ logger.debug(
+ "Searching for packed trees which are missing from the archive: "
+ f"{p}% of commits processed..."
+ )
+ last_p = p
+ i = i + 1
subtrees = [root_hash]
while len(subtrees) > 0:
tree_hash = subtrees.pop(0)
)
if len(missing_tree):
missing_directories.add(hashutil.hash_to_bytes(tree_hash))
+ p = int(i * 100 / num_commits_total)
+ if p != last_p:
+ logger.debug(
+ "Searching for packed trees which are missing from the archive: "
+ f"{p}% of commits processed."
+ )
+ last_p = p
self.log.debug(
f"Number of packed trees found missing: {len(missing_directories)}"
)