commit - 4ff67682be16c48c098e0412aee07cabd74a1f27
commit + e3840f7501a7dbea6e169bd9c0d0fa8a49476af0
blob - ab9b86ab3ca21b10c0676ff6d55f269df436634e
blob + bd5c4b65d0adb1db728a1c1dc33c23318d91840e
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
new_blobs = []
new_edges = []
traversed_trees = set()
+ seen_blobs = set()
submodule_mode = stat.S_IFDIR | stat.S_IFLNK
for commit_hash, tree_hash in commits.items():
subtrees = [tree_hash]
continue # ignore submodules
entry_hash = hashutil.bytehex_to_hash(entry_hex)
if mode & stat.S_IFDIR:
- new_trees.append(entry_hash)
- else:
+ if entry_hash not in traversed_trees:
+ new_trees.append(entry_hash)
+ subtrees.append(entry_hash)
+ elif entry_hash not in seen_blobs:
new_blobs.append(entry_hash)
+ seen_blobs.add(entry_hash)
new_edges.append((tree_hash, entry_hash))
- if mode & stat.S_IFDIR:
- try:
- tree = self.pack[entry_hex]
- subtrees.append(entry_hash)
- except KeyError:
- pass
+
# add new vertices and edges in batches for performance reasons
if len(new_trees) + len(new_blobs) > 100000 or len(new_edges) > 100000:
if len(new_trees) > 0: