commit - b1e497a6d0bae8ab541b45d1ffcd4b58be2c5b25
commit + 607a92048c2b18c77d6930d181b8326ec9158074
blob - e78ebf760ea71e81d5e57ff1b70f68d2b3d7b405
blob + e4f2f184e4687f11df9528fd46539d7873359b12
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
# Show some debug progress output for very large datasets
p = int(i * 100 / num_commits_total)
if p != last_p:
- logger.debug(f"Adding trees and blobs: {p}% of commits processed...")
+ logger.debug(
+ f"Adding trees and blobs to the graph: {p}% of commits processed..."
+ )
last_p = p
i = i + 1
tree_hex = hashutil.hash_to_bytehex(root_tree_hash)
logger.debug("No objects to load")
return
+ logger.debug("Loading objects in topological order...")
+
if self.save_data_path:
self.save_data()
object_type=GitObjectType.TREE
)
blob_hashes: Set[bytes] = set()
+ logger.debug("Loading blob object IDs...")
+ i = 0
+ last_p = -1
for root_hash, tree_deps in iter_tree_deps(root_tree_vertices):
+ p = int(i * 100 / len(root_tree_vertices))
+ if p != last_p:
+ logger.debug(f"Loading blob object IDs: {p}% of commits processed...")
+ last_p = p
+ i = i + 1
try:
dep_hashes = get_recursive_tree_deps(
tree_deps, obj_type=GitObjectType.BLOB
except KeyError:
continue
+ p = int(i * 100 / len(root_tree_vertices))
+ if p != last_p:
+ logger.debug(f"Loading blob object IDs: {p}% of commits processed.")
+
# Find out which blobs are missing from the archive.
missing_contents = set(
self.storage.content_missing_per_sha1_git(list(blob_hashes))