commit a8a5077aeedfaab85ff962970f06d4fba99da514 from: Antoine Lambert date: Tue Jun 04 08:53:35 2024 UTC loader: Ensure to fetch latest snapshot produced by a git visit type SWH data model allows an origin to have multiple visit types, in particular a git origin can have visit types 'git' and 'git-checkout'. We must ensure to retrieve the latest snapshot for a git visit type in the git loader implementation as it can break incremental loading of a git origin having both visit types mentioned above. Indeed a 'git-checkout' visit type produces a snapshot with a single branch while a 'git' visit type produces a snapshot containing all branches of the loaded repository. Previously, if the latest snapshot retrieved was produced by a 'git-checkout' visit type, the loader would refetch all branches and associated git objects while most of them have already been archived. Related to swh/meta#5092. commit - 93d43596df259fc820e027bda298946038243735 commit + a8a5077aeedfaab85ff962970f06d4fba99da514 blob - 4566e827a635100d0acf66a99a06ad411c15477b blob + 5bad54e9d7541a7bdf917ba290f64af394a2c4c5 --- requirements-swh.txt +++ requirements-swh.txt @@ -1,5 +1,5 @@ swh.core >= 2.22.0 -swh.loader.core >= 5.14.2 +swh.loader.core >= 5.18.1 swh.model >= 6.13.0 swh.scheduler >= 0.0.39 -swh.storage >= 0.22.0 +swh.storage >= 2.4.1 blob - 2d4473d9cc61ac91a92b3465f42b66feac46adc2 blob + 6a772bbfcfaae24910677d37e6ab851a7bf2f45c --- requirements-test.txt +++ requirements-test.txt @@ -1,9 +1,9 @@ pytest >= 8.1 pytest-mock requests_mock -swh.loader.core[testing] +swh.loader.core[testing] >= 5.18.1 swh.scheduler[testing] >= 0.5.0 -swh.storage[testing] +swh.storage[testing] >= 2.4.1 types-Deprecated types-click types-urllib3 blob - b11c3fdbef25e449a5c88688057f7d19b62639d5 blob + ec17c8b4aa4844707f053b13e05a7619261dd1fa --- swh/loader/git/loader.py +++ swh/loader/git/loader.py @@ -308,7 +308,11 @@ class GitLoader(BaseGitLoader): ) def get_full_snapshot(self, origin_url) -> Optional[Snapshot]: - return snapshot_get_latest(self.storage, origin_url) + return snapshot_get_latest( + self.storage, + origin_url, + visit_type=self.visit_type, + ) def load_metadata_objects( self, metadata_objects: List[RawExtrinsicMetadata] blob - c38c9efca9cce23733ee2eb12bac100936febf8e blob + 167936d2e08aaeae45111fcbd7052f5a08c37c81 --- swh/loader/git/tests/test_loader.py +++ swh/loader/git/tests/test_loader.py @@ -506,14 +506,14 @@ class TestGitLoader2(FullGitLoaderTests, CommonGitLoad self.repo_url, allowed_statuses=None, require_snapshot=True, - type=None, + type="git", ), # As it does not already have a snapshot, fall back to the parent origin call( f"base://{self.repo_url}", allowed_statuses=None, require_snapshot=True, - type=None, + type="git", ), ] @@ -579,14 +579,14 @@ class TestGitLoader2(FullGitLoaderTests, CommonGitLoad self.repo_url, allowed_statuses=None, require_snapshot=True, - type=None, + type="git", ), # As it does not already have a snapshot, fall back to the parent origin call( f"base://{self.repo_url}", allowed_statuses=None, require_snapshot=True, - type=None, + type="git", ), ] @@ -636,7 +636,7 @@ class TestGitLoader2(FullGitLoaderTests, CommonGitLoad # Tries the same origin, and finds a snapshot call( self.repo_url, - type=None, + type="git", allowed_statuses=None, require_snapshot=True, ), @@ -644,7 +644,7 @@ class TestGitLoader2(FullGitLoaderTests, CommonGitLoad # since the last visit call( f"base://{self.repo_url}", - type=None, + type="git", allowed_statuses=None, require_snapshot=True, ), @@ -762,14 +762,14 @@ class TestGitLoader2(FullGitLoaderTests, CommonGitLoad self.repo_url, allowed_statuses=None, require_snapshot=True, - type=None, + type="git", ), # As it does not already have a snapshot, fall back to the parent origin call( f"base://{self.repo_url}", allowed_statuses=None, require_snapshot=True, - type=None, + type="git", ), ]