commit - 2d1a69eeca244bc64d106fff5b1a4e25fa1e1728
commit + 310666ca1b675f5b9ea18bd71bbda18b9f0da97d
blob - d19645d3dafd42ba011a0f16fa7dfb189c115c9e
blob + dfc9bd15c4a5c3dba256baa055aadfa2b051ffdf
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
new_edges = []
submodule_mode = stat.S_IFDIR | stat.S_IFLNK
i = 0
+ last_p = -1
num_objects_found = 0
num_tree_entries = 0
+ num_commits_total = len(commits.items())
logger.info("Adding trees and blobs to the graph...")
- for commit_hash, tree_hash in commits.items():
- if tree_hash in self._tree_graphs.keys():
- # Keep recently used trees cached in memory.
- self._tree_graphs.move_to_end(tree_hash, last=False)
+ for commit_hash, root_tree_hash in commits.items():
+ # Show some debug progress output for very large datasets
+ p = int(i * 100 / num_commits_total)
+ if p != last_p:
+ logger.debug(f"Adding trees and blobs: {p}% of commits processed...")
+ last_p = p
+ i = i + 1
+ tree_hex = hashutil.hash_to_bytehex(root_tree_hash)
+ t = have_traversed(traversed_objects, tree_hex)
+ if t is None:
continue
- if tree_hash in self._swapped_graphs.keys():
+ if t is True:
+ try:
+ # Keep graphs for recently used trees cached in memory.
+ self._tree_graphs.move_to_end(root_tree_hash, last=False)
+ except KeyError:
+ pass
continue
if len(self._tree_graphs) >= self._max_trees_in_mem:
(other_tree_hash, other_tree_graph) = self._tree_graphs.popitem(
self._swapped_graphs[other_tree_hash] = swapper
swapper.swap_out()
tree_graph = Graph(directed=True)
- self._tree_graphs[tree_hash] = tree_graph
- self._tree_graphs.move_to_end(tree_hash, last=False)
- # Show some debug progress output for very large datasets
- if i > 0 and i % 10000 == 0:
- logger.debug(f"Adding trees and blobs: {i} commits processed...")
- i = i + 1
- new_trees.append(tree_hash)
- subtrees = [tree_hash]
+ self._tree_graphs[root_tree_hash] = tree_graph
+ self._tree_graphs.move_to_end(root_tree_hash, last=False)
+ new_trees.append(root_tree_hash)
+ subtrees = [root_tree_hash]
while len(subtrees) > 0:
tree_hash = subtrees.pop(0)
tree_hex = hashutil.hash_to_bytehex(tree_hash)