Commit Diff


commit - f6c9bf1db2d2902e0c4f9ac99825e270f101a794
commit + 19231ab78f7566fb47c90eb83ccc54d4341bc176
blob - 2a84957be3a96df0fca926bc8625ad83ae9c8230
blob + eb13f37423fcf1b24f6257c832bcdc3e9282fb6d
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
@@ -705,6 +705,7 @@ class GitLoader(BaseGitLoader):
 
         # Find all commits and corresponding tree roots in the pack file
         commits = {}
+        commit_edges = []
         for ref_name, ref_object_hex in self.remote_refs.items():
             if utils.ignore_branch_name(ref_name):
                 continue
@@ -713,9 +714,11 @@ class GitLoader(BaseGitLoader):
             except KeyError:
                 continue
             logger.debug(f"Opened {obj}")
+            # Peel tags for now; consider adding them to the graph later
             while obj.type_name == b"tag":
                 try:
-                    obj = self.pack[obj.object[1]]
+                    ref_object_hex = obj.object[1]
+                    obj = self.pack[ref_object_hex]
                     logger.debug(f"Opened {obj}")
                 except KeyError:
                     obj = None
@@ -723,15 +726,28 @@ class GitLoader(BaseGitLoader):
             if obj is None or obj.type_name != b"commit":
                 continue
 
-            commit = obj
-            tree_hash = hashutil.bytehex_to_hash(commit.tree)
-            commit_hash = hashutil.bytehex_to_hash(ref_object_hex)
-            commits[commit_hash] = tree_hash
+            parents = [ref_object_hex]
+            while len(parents) > 0:
+                commit_hex = parents.pop(0)
+                try:
+                    commit = self.pack[commit_hex]
+                except KeyError:
+                    logger.debug(f"  pack is missing: {commit_hex}")
+                    continue
+                tree_hash = hashutil.bytehex_to_hash(commit.tree)
+                commit_hash = hashutil.bytehex_to_hash(commit_hex)
+                commits[commit_hash] = tree_hash
+                for parent_hex in commit.parents:
+                    if parent_hex in self.pack:
+                        parents.append(parent_hex)
+                        parent_hash = hashutil.bytehex_to_hash(parent_hex)
+                        commit_edges.append((parent_hash, commit_hash))
 
-        # Add root commits and trees to the graph
+        # Add commits and root trees to the graph
         self._object_graph.add_vertices(list(commits.keys()))
         self._object_graph.add_vertices(list(commits.values()))
         self._object_graph.add_edges(zip(commits.keys(), commits.values()))
+        self._object_graph.add_edges(commit_edges)
 
         # Populate the graph with trees and blobs
         new_vertices = []