commit - 5a2803734b8c024d4ab4db8c4f7527547539775f
commit + c683aeaacfbbc2bac3b2e42f576a38d94dca218a
blob - 0dac21502f69d6cd4b68005d9403f4bea472b1b5
blob + 8ae4a5d93145b969b9664e6f0476392c7f159c0d
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
self.ref_object_types[obj.id] = SnapshotTargetType.REVISION
parents = [ref_object_hex]
+ i = 0
while len(parents) > 0:
commit_hex = parents.pop(0)
+ commit_hash = hashutil.bytehex_to_hash(commit_hex)
+ if commit_hash in commits.keys():
+ continue
try:
commit = self.pack[commit_hex]
except KeyError:
continue
- commit_hash = hashutil.bytehex_to_hash(commit_hex)
- if commit_hash in commits.keys():
- continue
+ # Show some debug progress output for very large datasets
+ if i > 0 and i % 100000 == 0:
+ logger.debug(f"{ref_name}: {i} commits processed...")
+ i = i + 1
tree_hash = hashutil.bytehex_to_hash(commit.tree)
commits[commit_hash] = tree_hash
for parent_hex in commit.parents:
- if parent_hex in self.pack:
- if parent_hex not in parents:
- parents.append(parent_hex)
- parent_hash = hashutil.bytehex_to_hash(parent_hex)
- if (commit_hash, parent_hash) not in commit_edges:
- commit_edges.append((commit_hash, parent_hash))
+ if parent_hex in parents:
+ continue
+ parent_hash = hashutil.bytehex_to_hash(parent_hex)
+ if parent_hash in commits.keys():
+ continue
+ parents.append(parent_hex)
+ commit_edges.append((commit_hash, parent_hash))
+ logger.debug(f"Found {len(tags)} tags and {len(commits)} commits")
+
archived_missing_objects = set(
self.storage.object_find_by_sha1_git(missing_objects).keys()
)