commit - d90389e678229d944617785754abf30f2f31ce80
commit + d30fbeafa2087c29c558a7e1f539f6aae480a2c2
blob - f26d6c96a73198205002c075fbced564d61970bd
blob + a66297ed71f87b2d38f96647b63a6c473c0534da
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
traversed_trees = set()
seen_blobs = set()
submodule_mode = stat.S_IFDIR | stat.S_IFLNK
+ i = 0
for commit_hash, tree_hash in commits.items():
+ # Show some debug progress output for very large datasets
+ if i > 0 and i % 10000 == 0:
+ logger.debug(f"Finding trees and blobs: {i} commits processed...")
+ i = i + 1
subtrees = [tree_hash]
while len(subtrees) > 0:
tree_hash = subtrees.pop(0)
# add new vertices and edges in batches for performance reasons
if len(new_trees) + len(new_blobs) > 100000 or len(new_edges) > 100000:
+ logger.debug(
+ f"Found {len(new_trees)} new trees and {len(new_blobs)} new blobs "
+ f"and {len(new_edges)} new tree entries"
+ )
if len(new_trees) > 0:
add_vertices(new_trees, GitObjectType.TREE)
if len(new_blobs) > 0: