commit c902a02aaa278d1ecbb79ad2ccb2dcaf6f2523da from: Stefan Sperling date: Fri Oct 18 13:09:55 2024 UTC switch from bitstring to using bitarray directly commit - 891dbef691a3e5a6102a1383ca2576ecd3e5e589 commit + c902a02aaa278d1ecbb79ad2ccb2dcaf6f2523da blob - 01d8160cd8b522113836973aa5fae67ad62cf054 blob + 84d9d77b0bdebaff70138446a5d01cb0a975c332 --- requirements.txt +++ requirements.txt @@ -1,4 +1,4 @@ -bitstring +bitarray dulwich >= 0.21.5 retrying click blob - d8975f789905962b0e3ce5253dfc702880eaeb28 blob + 2e222d3645cf7f03df9b55de032db11f60632f79 --- swh/loader/git/loader.py +++ swh/loader/git/loader.py @@ -29,7 +29,7 @@ from typing import ( Type, ) -from bitstring import BitArray +from bitarray import bitarray import dulwich.client from dulwich.object_store import ObjectStoreGraphWalker from dulwich.objects import Blob, Commit, ShaFile, Tag, Tree, hex_to_sha @@ -782,7 +782,7 @@ class GitLoader(BaseGitLoader): return bisect_find_sha(start, end, sha, self.pack.index._unpack_name) def mark_as_traversed(bitstr, id_hex): - bitstr.set(True, get_pos_in_index(id_hex)) + bitstr[get_pos_in_index(id_hex)] = 1 def have_traversed(bitstr, id_hex): """ @@ -793,12 +793,12 @@ class GitLoader(BaseGitLoader): n = get_pos_in_index(id_hex) if n is None: return None - return bitstr[n] + return bitstr[n] == 1 # Find all tags, commits and corresponding tree roots in the pack file tags = {} commits = {} - traversed_objects = BitArray(length=self.num_objects) + traversed_objects = bitarray(self.num_objects) commit_edges = [] new_trees = [] new_blobs = [] @@ -919,7 +919,7 @@ class GitLoader(BaseGitLoader): logger.debug( f"Processing commit {i + 1}: {hashutil.hash_to_bytehex(commit_hash)}" ) - traversed_objects = BitArray(length=self.num_objects) + traversed_objects = bitarray(self.num_objects) # Show some debug progress output for very large datasets p = int(i * 100 / num_commits_total) if p != last_p: