commit - f6c9bf1db2d2902e0c4f9ac99825e270f101a794
commit + 19231ab78f7566fb47c90eb83ccc54d4341bc176
blob - 2a84957be3a96df0fca926bc8625ad83ae9c8230
blob + eb13f37423fcf1b24f6257c832bcdc3e9282fb6d
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
# Find all commits and corresponding tree roots in the pack file
commits = {}
+ commit_edges = []
for ref_name, ref_object_hex in self.remote_refs.items():
if utils.ignore_branch_name(ref_name):
continue
except KeyError:
continue
logger.debug(f"Opened {obj}")
+ # Peel tags for now; consider adding them to the graph later
while obj.type_name == b"tag":
try:
- obj = self.pack[obj.object[1]]
+ ref_object_hex = obj.object[1]
+ obj = self.pack[ref_object_hex]
logger.debug(f"Opened {obj}")
except KeyError:
obj = None
if obj is None or obj.type_name != b"commit":
continue
- commit = obj
- tree_hash = hashutil.bytehex_to_hash(commit.tree)
- commit_hash = hashutil.bytehex_to_hash(ref_object_hex)
- commits[commit_hash] = tree_hash
+ parents = [ref_object_hex]
+ while len(parents) > 0:
+ commit_hex = parents.pop(0)
+ try:
+ commit = self.pack[commit_hex]
+ except KeyError:
+ logger.debug(f" pack is missing: {commit_hex}")
+ continue
+ tree_hash = hashutil.bytehex_to_hash(commit.tree)
+ commit_hash = hashutil.bytehex_to_hash(commit_hex)
+ commits[commit_hash] = tree_hash
+ for parent_hex in commit.parents:
+ if parent_hex in self.pack:
+ parents.append(parent_hex)
+ parent_hash = hashutil.bytehex_to_hash(parent_hex)
+ commit_edges.append((parent_hash, commit_hash))
- # Add root commits and trees to the graph
+ # Add commits and root trees to the graph
self._object_graph.add_vertices(list(commits.keys()))
self._object_graph.add_vertices(list(commits.values()))
self._object_graph.add_edges(zip(commits.keys(), commits.values()))
+ self._object_graph.add_edges(commit_edges)
# Populate the graph with trees and blobs
new_vertices = []