commit - 971c04d1b79f92a74c42df10efba38fddf2ad006
commit + 948ae59d4196b17d4098e50439b8cc7e728b308e
blob - 558384425de018b5bd4cb9814df33623677ae268
blob + 0f2285d498628de6a1612b2b81a48554344f3de2
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
packindex = load_pack_index(indexfile.name)
self.pack = Pack.from_objects(packdata, packindex)
self.pack.resolve_ext_ref = self._resolve_ext_ref
+
+ self.make_object_graph()
return False
+
+ def make_object_graph(self):
+ logger.debug("Building packed objects graph")
+ self._object_graph = {}
+
+ # Find all commits and corresponding tree roots in the pack file
+ commits = {}
+ for ref_name, ref_object_hex in self.remote_refs.items():
+ if utils.ignore_branch_name(ref_name):
+ continue
+ try:
+ obj = self.pack[ref_object_hex]
+ except KeyError:
+ continue
+ logger.debug(f"Opened {obj}")
+ while obj.type_name == b"tag":
+ try:
+ obj = self.pack[obj.object[1]]
+ logger.debug(f"Opened {obj}")
+ except KeyError:
+ obj = None
+ break
+ if obj is None or obj.type_name != b"commit":
+ continue
+
+ commit = obj
+ tree_hash = hashutil.bytehex_to_hash(commit.tree)
+ commit_hash = hashutil.bytehex_to_hash(ref_object_hex)
+ commits[commit_hash] = tree_hash
+
+ for commit_hash, tree_hash in commits.items():
+ logger.debug(
+ f"commit {hashutil.hash_to_hex(commit_hash)} "
+ f"tree {hashutil.hash_to_hex(tree_hash)}"
+ )
+ self._object_graph[commit_hash] = list()
+ self._object_graph[commit_hash].append(tree_hash)
+ tree_hex = hashutil.hash_to_bytehex(tree_hash)
+ try:
+ tree = self.pack[tree_hex]
+ self._object_graph[tree_hash] = list()
+ except KeyError:
+ continue
+ subtrees = [tree]
+ while len(subtrees) > 0:
+ tree = subtrees.pop(0)
+ logger.debug(f"Entries of {tree}:")
+ for (name, mode, entry_hex) in tree.iteritems():
+ logger.debug(f" {name} {mode} {entry_hex}")
+ entry_hash = hashutil.bytehex_to_hash(entry_hex)
+ self._object_graph[tree_hash].append(entry_hash)
+ if mode & stat.S_IFDIR:
+ try:
+ tree = self.pack[entry_hex]
+ subtrees.append(tree)
+ logger.debug(f" present in pack: {entry_hex}")
+ except KeyError:
+ logger.debug(f" pack is missing: {entry_hex}")
+ pass
+
+ for (k, l) in self._object_graph.items():
+ logger.debug(f"object {hashutil.hash_to_hex(k)}")
+ for v in l:
+ logger.debug(f" child {hashutil.hash_to_hex(v)}")
+
def save_data(self) -> None:
"""Store a pack for archival"""
assert isinstance(self.visit_date, datetime.datetime)