Commit Diff


commit - 971c04d1b79f92a74c42df10efba38fddf2ad006
commit + 948ae59d4196b17d4098e50439b8cc7e728b308e
blob - 558384425de018b5bd4cb9814df33623677ae268
blob + 0f2285d498628de6a1612b2b81a48554344f3de2
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
@@ -693,8 +693,75 @@ class GitLoader(BaseGitLoader):
         packindex = load_pack_index(indexfile.name)
         self.pack = Pack.from_objects(packdata, packindex)
         self.pack.resolve_ext_ref = self._resolve_ext_ref
+
+        self.make_object_graph()
         return False
+
+    def make_object_graph(self):
+        logger.debug("Building packed objects graph")
 
+        self._object_graph = {}
+
+        # Find all commits and corresponding tree roots in the pack file
+        commits = {}
+        for ref_name, ref_object_hex in self.remote_refs.items():
+            if utils.ignore_branch_name(ref_name):
+                continue
+            try:
+                obj = self.pack[ref_object_hex]
+            except KeyError:
+                continue
+            logger.debug(f"Opened {obj}")
+            while obj.type_name == b"tag":
+                try:
+                    obj = self.pack[obj.object[1]]
+                    logger.debug(f"Opened {obj}")
+                except KeyError:
+                    obj = None
+                    break
+            if obj is None or obj.type_name != b"commit":
+                continue
+
+            commit = obj
+            tree_hash = hashutil.bytehex_to_hash(commit.tree)
+            commit_hash = hashutil.bytehex_to_hash(ref_object_hex)
+            commits[commit_hash] = tree_hash
+
+        for commit_hash, tree_hash in commits.items():
+            logger.debug(
+                f"commit {hashutil.hash_to_hex(commit_hash)} "
+                f"tree {hashutil.hash_to_hex(tree_hash)}"
+            )
+            self._object_graph[commit_hash] = list()
+            self._object_graph[commit_hash].append(tree_hash)
+            tree_hex = hashutil.hash_to_bytehex(tree_hash)
+            try:
+                tree = self.pack[tree_hex]
+                self._object_graph[tree_hash] = list()
+            except KeyError:
+                continue
+            subtrees = [tree]
+            while len(subtrees) > 0:
+                tree = subtrees.pop(0)
+                logger.debug(f"Entries of {tree}:")
+                for (name, mode, entry_hex) in tree.iteritems():
+                    logger.debug(f"  {name} {mode} {entry_hex}")
+                    entry_hash = hashutil.bytehex_to_hash(entry_hex)
+                    self._object_graph[tree_hash].append(entry_hash)
+                    if mode & stat.S_IFDIR:
+                        try:
+                            tree = self.pack[entry_hex]
+                            subtrees.append(tree)
+                            logger.debug(f"  present in pack: {entry_hex}")
+                        except KeyError:
+                            logger.debug(f"  pack is missing: {entry_hex}")
+                            pass
+
+        for (k, l) in self._object_graph.items():
+            logger.debug(f"object {hashutil.hash_to_hex(k)}")
+            for v in l:
+                logger.debug(f" child {hashutil.hash_to_hex(v)}")
+
     def save_data(self) -> None:
         """Store a pack for archival"""
         assert isinstance(self.visit_date, datetime.datetime)