commit 19231ab78f7566fb47c90eb83ccc54d4341bc176 from: Stefan Sperling date: Wed Sep 11 11:16:55 2024 UTC add all commits found in pack file to the graph, not just tip commits pointed out by olasd commit - f6c9bf1db2d2902e0c4f9ac99825e270f101a794 commit + 19231ab78f7566fb47c90eb83ccc54d4341bc176 blob - 2a84957be3a96df0fca926bc8625ad83ae9c8230 blob + eb13f37423fcf1b24f6257c832bcdc3e9282fb6d --- swh/loader/git/loader.py +++ swh/loader/git/loader.py @@ -705,6 +705,7 @@ class GitLoader(BaseGitLoader): # Find all commits and corresponding tree roots in the pack file commits = {} + commit_edges = [] for ref_name, ref_object_hex in self.remote_refs.items(): if utils.ignore_branch_name(ref_name): continue @@ -713,9 +714,11 @@ class GitLoader(BaseGitLoader): except KeyError: continue logger.debug(f"Opened {obj}") + # Peel tags for now; consider adding them to the graph later while obj.type_name == b"tag": try: - obj = self.pack[obj.object[1]] + ref_object_hex = obj.object[1] + obj = self.pack[ref_object_hex] logger.debug(f"Opened {obj}") except KeyError: obj = None @@ -723,15 +726,28 @@ class GitLoader(BaseGitLoader): if obj is None or obj.type_name != b"commit": continue - commit = obj - tree_hash = hashutil.bytehex_to_hash(commit.tree) - commit_hash = hashutil.bytehex_to_hash(ref_object_hex) - commits[commit_hash] = tree_hash + parents = [ref_object_hex] + while len(parents) > 0: + commit_hex = parents.pop(0) + try: + commit = self.pack[commit_hex] + except KeyError: + logger.debug(f" pack is missing: {commit_hex}") + continue + tree_hash = hashutil.bytehex_to_hash(commit.tree) + commit_hash = hashutil.bytehex_to_hash(commit_hex) + commits[commit_hash] = tree_hash + for parent_hex in commit.parents: + if parent_hex in self.pack: + parents.append(parent_hex) + parent_hash = hashutil.bytehex_to_hash(parent_hex) + commit_edges.append((parent_hash, commit_hash)) - # Add root commits and trees to the graph + # Add commits and root trees to the graph self._object_graph.add_vertices(list(commits.keys())) self._object_graph.add_vertices(list(commits.values())) self._object_graph.add_edges(zip(commits.keys(), commits.values())) + self._object_graph.add_edges(commit_edges) # Populate the graph with trees and blobs new_vertices = []