Commit Diff


commit - cbe1c11028d08815f6840cc4b5fdaeafca82e9ca
commit + dc6d4f4cddb766959cc8d773a86d6e1cebc53265
blob - 034ae31642dea4c2774d19b1ecd4b3546d3967a8
blob + dd1d00020b1d912022cc7442c498611140c3e375
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
@@ -903,11 +903,14 @@ class GitLoader(BaseGitLoader):
                     "fetched pack file nor in local heads nor in the archive"
                 )
 
-        def get_dependencies(v):
+        def get_successors(v, object_type=None):
             vertices = [v]
             while len(vertices) > 0:
                 v = vertices.pop(0)
                 for s in self._object_graph.successors(v):
+                    if object_type is not None:
+                        if self._object_graph.vs[s]["object_type"] != object_type:
+                            continue
                     if s not in vertices:
                         vertices.append(s)
                     yield (
@@ -915,6 +918,16 @@ class GitLoader(BaseGitLoader):
                         self._object_graph.vs[s]["object_type"],
                     )
 
+        def get_neighbors(v, object_type=None):
+            for s in self._object_graph.neighbors(v):
+                if object_type is not None:
+                    if self._object_graph.vs[s]["object_type"] != object_type:
+                        continue
+                yield (
+                    self._object_graph.vs[s]["name"],
+                    self._object_graph.vs[s]["object_type"],
+                )
+
         try:
             blob_vertices = self._object_graph.vs.select(object_type=GitObjectType.BLOB)
         except KeyError:
@@ -960,7 +973,7 @@ class GitLoader(BaseGitLoader):
                 if tree_hash in missing_directories:
                     continue
                 have_dep = False
-                for dep_hash, dep_type in get_dependencies(t):
+                for dep_hash, dep_type in get_successors(t):
                     have_dep = True
                     if dep_type == GitObjectType.BLOB and dep_hash in missing_contents:
                         # We can infer that the tree is also missing.
@@ -975,6 +988,10 @@ class GitLoader(BaseGitLoader):
                     if len(missing_empty_tree):
                         missing_directories.add(tree_hash)
             self.log.debug(
+                f"Searched {int((nsearched * 100) / len(tree_vertices))}% "
+                f"of {len(tree_vertices)} packed trees..."
+            )
+            self.log.debug(
                 "Number of packed trees considered missing by implication: "
                 f"{len(missing_directories)}"
             )
@@ -1008,11 +1025,15 @@ class GitLoader(BaseGitLoader):
                     )
                 if commit_hash in missing_revisions:
                     continue
-                for dep_hash, dep_type in get_dependencies(c):
+                for dep_hash, dep_type in get_neighbors(c, GitObjectType.TREE):
                     if dep_hash in missing_contents or dep_hash in missing_directories:
                         # We can infer that the commit is also missing.
                         missing_revisions.add(commit_hash)
                         break
+            self.log.debug(
+                f"Searched {int((nsearched * 100) / len(commit_vertices))}% "
+                f"of {len(commit_vertices)} packed commits..."
+            )
             self.log.debug(
                 "Number of packed commits considered missing by implication: "
                 f"{len(missing_revisions)}"
@@ -1048,7 +1069,7 @@ class GitLoader(BaseGitLoader):
             )
             for t in tag_vertices:
                 tag_hash = t["name"]
-                for dep_hash, dep_type in get_dependencies(t):
+                for dep_hash, dep_type in get_neighbors(t):
                     if (
                         dep_hash in missing_revisions
                         or dep_hash in missing_directories