Commit Diff


commit - 387c45f4c7dcbeea29112789e8c87b5bd973ce37
commit + 4ff67682be16c48c098e0412aee07cabd74a1f27
blob - 83e9e758949ae582b359469d41573bd1b3f44205
blob + ab9b86ab3ca21b10c0676ff6d55f269df436634e
--- swh/loader/git/loader.py
+++ swh/loader/git/loader.py
@@ -723,12 +723,14 @@ class GitLoader(BaseGitLoader):
         tags = {}
         commits = {}
         commit_edges = []
+        missing_objects = []
         for ref_name, ref_object_hex in self.remote_refs.items():
             if utils.ignore_branch_name(ref_name):
                 continue
             try:
                 obj = self.pack[ref_object_hex]
             except KeyError:
+                missing_objects.append(ref_object_hex)
                 continue
 
             while obj.type_name == b"tag":
@@ -739,7 +741,7 @@ class GitLoader(BaseGitLoader):
                 try:
                     tagged_obj = self.pack[tagged_object_hex]
                 except KeyError:
-                    logger.debug(f"  pack is missing: {tagged_object_hex}")
+                    missing_objects.append(tagged_object_hex)
                     obj = None
                     break
                 else:
@@ -748,9 +750,11 @@ class GitLoader(BaseGitLoader):
                     obj = tagged_obj
                     ref_object_hex = tagged_object_hex
 
-            # TODO: Allow tags pointing at blobs or trees?
             if obj is None:
+                # Object is missing from pack file.
                 continue
+
+            # TODO: Allow tags pointing at blobs or trees?
             if obj.type_name != b"commit":
                 logger.debug(
                     f"  tag {ref_name} resolves to a {obj.type_name}, not a commit"
@@ -766,7 +770,6 @@ class GitLoader(BaseGitLoader):
                 try:
                     commit = self.pack[commit_hex]
                 except KeyError:
-                    logger.debug(f"  pack is missing: {commit_hex}")
                     continue
                 commit_hash = hashutil.bytehex_to_hash(commit_hex)
                 if commit_hash in commits.keys():
@@ -781,6 +784,15 @@ class GitLoader(BaseGitLoader):
                         if (commit_hash, parent_hash) not in commit_edges:
                             commit_edges.append((commit_hash, parent_hash))
 
+        archived_missing_objects = set(
+            self.storage.object_find_by_sha1_git(missing_objects).keys()
+        )
+        if len(archived_missing_objects) != len(missing_objects):
+            raise NotFound(
+                "Referenced objects found in neither the fetched pack file "
+                f"nor in the archive: {missing_objects - archived_missing_objects}"
+            )
+
         def add_vertices(new_vertices, object_type):
             attributes = dict()
             attributes["object_type"] = [object_type for x in new_vertices]
@@ -811,7 +823,6 @@ class GitLoader(BaseGitLoader):
                 try:
                     tree = self.pack[tree_hex]
                 except KeyError:
-                    logger.debug(f"  pack is missing: {tree_hex}")
                     continue
                 for name, mode, entry_hex in tree.iteritems():
                     if mode & submodule_mode == submodule_mode:
@@ -827,7 +838,7 @@ class GitLoader(BaseGitLoader):
                             tree = self.pack[entry_hex]
                             subtrees.append(entry_hash)
                         except KeyError:
-                            logger.debug(f"  pack is missing: {entry_hex}")
+                            pass
                 # add new vertices and edges in batches for performance reasons
                 if len(new_trees) + len(new_blobs) > 100000 or len(new_edges) > 100000:
                     if len(new_trees) > 0: