commit 39d38c4e1ee888c5d978dd6b689987dc9ff43740 from: Antoine Lambert date: Tue Jun 04 10:16:58 2024 UTC test_loader: Fix implementation of test_loader_with_ref_delta_in_pack Previous implementation was building an invalid pack file with REF_DELTA object types as it was using the new object to deltify as the base of the delta. This was leading to errors and undefined behavior after building an index for such a pack file as the deltified objects could not be properly resolved by dulwich (observed by stsp while working on git loader improvements). The bases for deltified objects are now objects that were previously loaded into the archive. Tag objects produced in that test are also ensured to be valid. commit - a8a5077aeedfaab85ff962970f06d4fba99da514 commit + 39d38c4e1ee888c5d978dd6b689987dc9ff43740 blob - 167936d2e08aaeae45111fcbd7052f5a08c37c81 blob + c891a0e6f3366386a361be4016bca2102d04fcc2 --- swh/loader/git/tests/test_loader.py +++ swh/loader/git/tests/test_loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2023 The Software Heritage developers +# Copyright (C) 2018-2024 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -13,12 +13,13 @@ import subprocess import sys from tempfile import SpooledTemporaryFile from threading import Thread +import time from unittest.mock import MagicMock, call import attr from dulwich.errors import GitProtocolError, NotGitRepository, ObjectFormatException from dulwich.pack import REF_DELTA -from dulwich.porcelain import push +from dulwich.porcelain import get_user_timezones, push import dulwich.repo from dulwich.tests.utils import build_pack import pytest @@ -293,8 +294,12 @@ class TestGitLoader(FullGitLoaderTests, CommonGitLoade def add_tag(tag_name, tag_message, commit): tag = dulwich.objects.Tag() tag.name = tag_name + tag.tagger = b"John Doe " tag.message = tag_message tag.object = (dulwich.objects.Commit, commit) + tag.tag_time = int(time.time()) + tag.tag_timezone = get_user_timezones()[0] + tag.check() self.repo.object_store.add_object(tag) self.repo[b"refs/tags/" + tag_name] = tag.id return tag @@ -350,18 +355,30 @@ class TestGitLoader(FullGitLoaderTests, CommonGitLoade # get all object ids that will be in storage after third load objects_third_load = set(iter(self.repo.object_store)) - # create a pack file containing full objects for newly added blob, tree, - # commit and tag in latest commit but also external references to objects - # that were discovered during the second loading of the repository + # create a pack file containing deltified objects for newly added blob, tree, + # commit and tag in latest commit whose bases are external objects that were + # discovered during the second loading of the repository objects = [] - new_objects_second_load = objects_second_load - objects_first_load - new_objects_third_load = objects_third_load - objects_second_load - for obj_id in new_objects_third_load: - obj = self.repo.object_store[obj_id] - objects.append((obj.type_num, obj.as_raw_string())) - for obj_id in new_objects_second_load: - obj = self.repo.object_store[obj_id] - objects.append((REF_DELTA, (obj_id, obj.as_raw_string()))) + new_objects_second_load = [ + self.repo.object_store[obj_id] + for obj_id in (objects_second_load - objects_first_load) + ] + new_objects_third_load = [ + self.repo.object_store[obj_id] + for obj_id in (objects_third_load - objects_second_load) + ] + for new_obj in new_objects_third_load: + base_obj = next( + obj + for obj in new_objects_second_load + if obj.type_num == new_obj.type_num + ) + objects.append( + ( + REF_DELTA, + (base_obj.id, new_obj.as_raw_string()), + ) + ) buffer = io.BytesIO() build_pack(buffer, objects, self.repo.object_store)