commit 4c864bc9ec52f825c70670c246cbeb74bb24c74e from: Jelmer Vernooij date: Fri Jul 21 00:38:22 2023 UTC Properly split out SerializedIndexEntry class commit - 32820bdabd280c2f60d32014bb7a6dd3eac27a04 commit + 4c864bc9ec52f825c70670c246cbeb74bb24c74e blob - b2f34545e8e96d302115d0791a7a4cf90692bcf1 blob + 8b6cfc2100bd83aa2fec9086c093832147c71344 --- dulwich/index.py +++ dulwich/index.py @@ -20,11 +20,11 @@ """Parser for the git index file format.""" -from dataclasses import dataclass import os import stat import struct import sys +from dataclasses import dataclass from enum import Enum from typing import ( Any, @@ -52,42 +52,6 @@ from .objects import ( ) from .pack import ObjectContainer, SHA1Reader, SHA1Writer -@dataclass -class IndexEntry: - ctime: int | float | Tuple[int, int] - mtime: int | float | Tuple[int, int] - dev: int - ino: int - mode: int - uid: int - gid: int - size: int - sha: bytes - flags: int - extended_flags: int - - -class ConflictedIndexEntry: - """Index entry that represents a conflict.""" - - ancestor: Optional[IndexEntry] - this: Optional[IndexEntry] - other: Optional[IndexEntry] - - def __init__(self): - self.ancestor = None - self.this = None - self.other = None - - def entries(self) -> Iterable[IndexEntry]: - if self.ancestor: - yield self.ancestor - if self.this: - yield self.this - if self.other: - yield self.other - - # 2-bit stage (during merge) FLAG_STAGEMASK = 0x3000 FLAG_STAGESHIFT = 12 @@ -107,6 +71,7 @@ EXTENDED_FLAG_INTEND_TO_ADD = 0x2000 DEFAULT_VERSION = 2 + class Stage(Enum): NORMAL = 0 MERGE_CONFLICT_ANCESTOR = 1 @@ -114,14 +79,87 @@ class Stage(Enum): MERGE_CONFLICT_OTHER = 3 -class UnmergedEntries(Exception): - """Unmerged entries exist in the index""" +@dataclass +class SerializedIndexEntry: + name: bytes + ctime: int | float | Tuple[int, int] + mtime: int | float | Tuple[int, int] + dev: int + ino: int + mode: int + uid: int + gid: int + size: int + sha: bytes + flags: int + extended_flags: int + def stage(self) -> Stage: + return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT) -def read_stage(entry: IndexEntry) -> Stage: - return Stage((entry.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT) +@dataclass +class IndexEntry: + ctime: int | float | Tuple[int, int] + mtime: int | float | Tuple[int, int] + dev: int + ino: int + mode: int + uid: int + gid: int + size: int + sha: bytes + @classmethod + def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry": + return cls( + ctime=serialized.ctime, + mtime=serialized.mtime, + dev=serialized.dev, + ino=serialized.ino, + mode=serialized.mode, + uid=serialized.uid, + gid=serialized.gid, + size=serialized.size, + sha=serialized.sha, + ) + + def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry: + return SerializedIndexEntry( + name=name, + ctime=self.ctime, + mtime=self.mtime, + dev=self.dev, + ino=self.ino, + mode=self.mode, + uid=self.uid, + gid=self.gid, + size=self.size, + sha=self.sha, + flags=stage.value << FLAG_STAGESHIFT, + extended_flags=0, + ) + + +class ConflictedIndexEntry: + """Index entry that represents a conflict.""" + + ancestor: Optional[IndexEntry] + this: Optional[IndexEntry] + other: Optional[IndexEntry] + + def __init__(self, ancestor: Optional[IndexEntry] = None, + this: Optional[IndexEntry] = None, + other: Optional[IndexEntry] = None) -> None: + self.ancestor = ancestor + self.this = this + self.other = other + + +class UnmergedEntries(Exception): + """Unmerged entries exist in the index.""" + + def pathsplit(path: bytes) -> Tuple[bytes, bytes]: """Split a /-delimited path into a directory part and a basename. @@ -172,13 +210,11 @@ def write_cache_time(f, t): f.write(struct.pack(">LL", *t)) -def read_cache_entry(f, version: int) -> Tuple[bytes, IndexEntry]: +def read_cache_entry(f, version: int) -> SerializedIndexEntry: """Read an entry from a cache file. Args: f: File-like object to read from - Returns: - tuple with: name, IndexEntry """ beginoffset = f.tell() ctime = read_cache_time(f) @@ -205,24 +241,23 @@ def read_cache_entry(f, version: int) -> Tuple[bytes, if version < 4: real_size = (f.tell() - beginoffset + 8) & ~7 f.read((beginoffset + real_size) - f.tell()) - return ( + return SerializedIndexEntry( name, - IndexEntry( - ctime, - mtime, - dev, - ino, - mode, - uid, - gid, - size, - sha_to_hex(sha), - flags & ~FLAG_NAMEMASK, - extended_flags, - )) + ctime, + mtime, + dev, + ino, + mode, + uid, + gid, + size, + sha_to_hex(sha), + flags & ~FLAG_NAMEMASK, + extended_flags, + ) -def write_cache_entry(f, name: bytes, entry: IndexEntry, version: int) -> None: +def write_cache_entry(f, entry: SerializedIndexEntry, version: int) -> None: """Write an index entry to a file. Args: @@ -232,7 +267,7 @@ def write_cache_entry(f, name: bytes, entry: IndexEntr beginoffset = f.tell() write_cache_time(f, entry.ctime) write_cache_time(f, entry.mtime) - flags = len(name) | (entry.flags & ~FLAG_NAMEMASK) + flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK) if entry.extended_flags: flags |= FLAG_EXTENDED if flags & FLAG_EXTENDED and version is not None and version < 3: @@ -252,7 +287,7 @@ def write_cache_entry(f, name: bytes, entry: IndexEntr ) if flags & FLAG_EXTENDED: f.write(struct.pack(b">H", entry.extended_flags)) - f.write(name) + f.write(entry.name) if version < 4: real_size = (f.tell() - beginoffset + 8) & ~7 f.write(b"\0" * ((beginoffset + real_size) - f.tell())) @@ -265,7 +300,7 @@ class UnsupportedIndexFormat(Exception): self.index_format_version = version -def read_index(f: BinaryIO) -> Iterator[Tuple[bytes, IndexEntry]]: +def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]: """Read an index file, yielding the individual entries.""" header = f.read(4) if header != b"DIRC": @@ -277,7 +312,32 @@ def read_index(f: BinaryIO) -> Iterator[Tuple[bytes, I yield read_cache_entry(f, version) -def write_index(f: BinaryIO, entries: List[Tuple[bytes, IndexEntry]], version: Optional[int] = None): +def read_index_dict(f) -> Dict[bytes, IndexEntry | ConflictedIndexEntry]: + """Read an index file and return it as a dictionary. + Dict Key is tuple of path and stage number, as + path alone is not unique + Args: + f: File object to read fromls. + """ + ret: Dict[bytes, IndexEntry | ConflictedIndexEntry] = {} + for entry in read_index(f): + stage = entry.stage() + if stage == Stage.NORMAL: + ret[entry.name] = IndexEntry.from_serialized(entry) + else: + existing = ret.setdefault(entry.name, ConflictedIndexEntry()) + if isinstance(existing, IndexEntry): + raise AssertionError("Non-conflicted entry for %r exists" % entry.name) + if stage == Stage.MERGE_CONFLICT_ANCESTOR: + existing.ancestor = IndexEntry.from_serialized(entry) + elif stage == Stage.MERGE_CONFLICT_THIS: + existing.this = IndexEntry.from_serialized(entry) + elif stage == Stage.MERGE_CONFLICT_OTHER: + existing.other = IndexEntry.from_serialized(entry) + return ret + + +def write_index(f: BinaryIO, entries: List[SerializedIndexEntry], version: Optional[int] = None): """Write an index file. Args: @@ -289,8 +349,8 @@ def write_index(f: BinaryIO, entries: List[Tuple[bytes version = DEFAULT_VERSION f.write(b"DIRC") f.write(struct.pack(b">LL", version, len(entries))) - for name, entry in entries: - write_cache_entry(f, name, entry, version) + for entry in entries: + write_cache_entry(f, entry, version) def write_index_dict( @@ -306,13 +366,13 @@ def write_index_dict( value = entries[key] if isinstance(value, ConflictedIndexEntry): if value.ancestor is not None: - entries_list.append((key, value.ancestor)) + entries_list.append(value.ancestor.serialize(key, Stage.MERGE_CONFLICT_ANCESTOR)) if value.this is not None: - entries_list.append((key, value.this)) + entries_list.append(value.this.serialize(key, Stage.MERGE_CONFLICT_THIS)) if value.other is not None: - entries_list.append((key, value.other)) + entries_list.append(value.other.serialize(key, Stage.MERGE_CONFLICT_OTHER)) else: - entries_list.append((key, value)) + entries_list.append(value.serialize(key, Stage.NORMAL)) write_index(f, entries_list, version=version) @@ -381,21 +441,7 @@ class Index: f = GitFile(self._filename, "rb") try: f = SHA1Reader(f) - for name, entry in read_index(f): - stage = read_stage(entry) - if stage == Stage.NORMAL: - self[name] = entry - else: - import pdb; pdb.set_trace() - existing = self._byname.setdefault(name, ConflictedIndexEntry()) - if isinstance(existing, IndexEntry): - raise AssertionError("Non-conflicted entry for %r exists" % name) - if stage == Stage.MERGE_CONFLICT_ANCESTOR: - existing.ancestor = entry - elif stage == Stage.MERGE_CONFLICT_THIS: - existing.this = entry - elif stage == Stage.MERGE_CONFLICT_OTHER: - existing.other = entry + self.update(read_index_dict(f)) # FIXME: Additional data? f.read(os.path.getsize(self._filename) - f.tell() - 20) f.check_sha() @@ -423,11 +469,17 @@ class Index: def get_sha1(self, path: bytes) -> bytes: """Return the (git object) SHA1 for the object at a path.""" - return self[path].sha + value = self[path] + if isinstance(value, ConflictedIndexEntry): + raise UnmergedEntries() + return value.sha def get_mode(self, path: bytes) -> int: """Return the POSIX file mode for the object at a path.""" - return self[path].mode + value = self[path] + if isinstance(value, ConflictedIndexEntry): + raise UnmergedEntries() + return value.mode def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]: """Iterate over path, sha, mode tuples for use with commit_tree.""" @@ -443,30 +495,12 @@ class Index: return True return False - def set_merge_conflict(self, apath, stage, mode, sha, time): - entry = IndexEntry(time, - time, - 0, - 0, - mode, - 0, - 0, - 0, - sha, - stage << FLAG_STAGESHIFT, - 0) - if (apath, Stage.NORMAL) in self._byname: - del self._byname[(apath, Stage.NORMAL)] - self._byname[(apath, stage)] = entry - def clear(self): """Remove all contents from this index.""" self._byname = {} def __setitem__(self, name: bytes, value: IndexEntry | ConflictedIndexEntry) -> None: assert isinstance(name, bytes) - if not isinstance(value, (IndexEntry, ConflictedIndexEntry)): - value = IndexEntry(*value) self._byname[name] = value def __delitem__(self, name: bytes) -> None: @@ -478,13 +512,12 @@ class Index: def items(self) -> Iterator[Tuple[bytes, IndexEntry | ConflictedIndexEntry]]: return iter(self._byname.items()) - def update(self, entries: Dict[bytes, IndexEntry]): + def update(self, entries: Dict[bytes, IndexEntry | ConflictedIndexEntry]): for key, value in entries.items(): self[key] = value def paths(self): - for name in self._byname.keys(): - yield name + yield from self._byname.keys() def changes_from_tree( self, object_store, tree: ObjectID, want_unchanged: bool = False): @@ -628,15 +661,13 @@ def changes_from_tree( def index_entry_from_stat( - stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None, - extended_flags: Optional[int] = None + stat_val, hex_sha: bytes, mode: Optional[int] = None, ): """Create a new index entry from a stat value. Args: stat_val: POSIX stat_result instance hex_sha: Hex sha of the object - flags: Index flags """ if mode is None: mode = cleanup_mode(stat_val.st_mode) @@ -651,8 +682,6 @@ def index_entry_from_stat( stat_val.st_gid, stat_val.st_size, hex_sha, - flags, - extended_flags ) @@ -825,7 +854,7 @@ def build_index_from_tree( st = st.__class__(st_tuple) # default to a stage 0 index entry (normal) # when reading from the filesystem - index[entry.path] = index_entry_from_stat(st, entry.sha, 0) + index[entry.path] = index_entry_from_stat(st, entry.sha) index.write() @@ -1001,7 +1030,7 @@ def index_entry_from_directory(st, path: bytes) -> Opt head = read_submodule_head(path) if head is None: return None - return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK) + return index_entry_from_stat(st, head, mode=S_IFGITLINK) return None @@ -1029,7 +1058,7 @@ def index_entry_from_path( blob = blob_from_path_and_stat(path, st) if object_store is not None: object_store.add_object(blob) - return index_entry_from_stat(st, blob.id, 0) + return index_entry_from_stat(st, blob.id) return None @@ -1074,7 +1103,6 @@ def iter_fresh_objects( if include_deleted: yield path, None, None else: - entry = IndexEntry(*entry) yield path, entry.sha, cleanup_mode(entry.mode) @@ -1089,8 +1117,7 @@ def refresh_index(index: Index, root_path: bytes): """ for path, entry in iter_fresh_entries(index, root_path): if entry: - stage = read_stage(entry) - index[(path, stage)] = entry + index[path] = entry class locked_index: blob - 3749078037d60dcac51a080712526a7631d04908 blob + 5a68f58dc586f1fc082ee0a7a3adcb66c77718d4 --- dulwich/porcelain.py +++ dulwich/porcelain.py @@ -1970,7 +1970,7 @@ def checkout_branch(repo, target: Union[bytes, str], f blob = repo.object_store[entry.sha] ensure_dir_exists(os.path.dirname(full_path)) st = build_file_from_blob(blob, entry.mode, full_path) - repo_index[entry.path] = index_entry_from_stat(st, entry.sha, 0) + repo_index[entry.path] = index_entry_from_stat(st, entry.sha) repo_index.write() blob - 72f4052d0a512802240e6fdd27ae71214b6dd5fd blob + b1940d3220c911229f5e7dbabd931955859bdfaa --- dulwich/repo.py +++ dulwich/repo.py @@ -1422,7 +1422,7 @@ class Repo(BaseRepo): blob = blob_from_path_and_stat(full_path, st) blob = blob_normalizer.checkin_normalize(blob, fs_path) self.object_store.add_object(blob) - index[tree_path] = index_entry_from_stat(st, blob.id, 0) + index[tree_path] = index_entry_from_stat(st, blob.id) index.write() def unstage(self, fs_paths: List[str]): @@ -1478,8 +1478,6 @@ class Repo(BaseRepo): gid=st.st_gid if st else 0, size=len(self[tree_entry[1]].data), sha=tree_entry[1], - flags=0, - extended_flags=0 ) index[tree_path] = index_entry blob - 660fa416263b69b99f262355b22775d69912a61f blob + afd599bd53dce72edf45bd703a051615de1fe6e1 --- dulwich/tests/test_index.py +++ dulwich/tests/test_index.py @@ -34,7 +34,7 @@ from dulwich.tests import TestCase, skipIf from ..index import ( Index, IndexEntry, - Stage, + SerializedIndexEntry, _fs_to_tree_path, _tree_to_fs_path, build_index_from_tree, @@ -43,6 +43,7 @@ from ..index import ( get_unstaged_changes, index_entry_from_stat, read_index, + read_index_dict, validate_path_element_default, validate_path_element_ntfs, write_cache_time, @@ -102,8 +103,6 @@ class SimpleIndexTestCase(IndexTestCase): 1000, 0, b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", - 0, - 0, ), self.get_simple_index("index")[b"bla"], ) @@ -134,8 +133,8 @@ class SimpleIndexWriterTestCase(IndexTestCase): def test_simple_write(self): entries = [ ( - b"barbla", - IndexEntry( + SerializedIndexEntry( + b"barbla", (1230680220, 0), (1230680220, 0), 2050, @@ -157,6 +156,38 @@ class SimpleIndexWriterTestCase(IndexTestCase): self.assertEqual(entries, list(read_index(x))) +class ReadIndexDictTests(IndexTestCase): + + def setUp(self): + IndexTestCase.setUp(self) + self.tempdir = tempfile.mkdtemp() + + def tearDown(self): + IndexTestCase.tearDown(self) + shutil.rmtree(self.tempdir) + + def test_simple_write(self): + entries = { + b"barbla": IndexEntry( + (1230680220, 0), + (1230680220, 0), + 2050, + 3761020, + 33188, + 1000, + 1000, + 0, + b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", + ) + } + filename = os.path.join(self.tempdir, "test-simple-write-index") + with open(filename, "wb+") as x: + write_index_dict(x, entries) + + with open(filename, "rb") as x: + self.assertEqual(entries, read_index_dict(x)) + + class CommitTreeTests(TestCase): def setUp(self): super().setUp() @@ -244,7 +275,7 @@ class IndexEntryFromStatTests(TestCase): 1324180496, ) ) - entry = index_entry_from_stat(st, "22" * 20, 0) + entry = index_entry_from_stat(st, b"22" * 20) self.assertEqual( entry, IndexEntry( @@ -256,9 +287,7 @@ class IndexEntryFromStatTests(TestCase): 1000, 1000, 12288, - "2222222222222222222222222222222222222222", - 0, - None, + b"2222222222222222222222222222222222222222", ), ) @@ -277,7 +306,7 @@ class IndexEntryFromStatTests(TestCase): 1324180496, ) ) - entry = index_entry_from_stat(st, "22" * 20, 0, mode=stat.S_IFREG + 0o755) + entry = index_entry_from_stat(st, b"22" * 20, mode=stat.S_IFREG + 0o755) self.assertEqual( entry, IndexEntry( @@ -289,9 +318,7 @@ class IndexEntryFromStatTests(TestCase): 1000, 1000, 12288, - "2222222222222222222222222222222222222222", - 0, - None, + b"2222222222222222222222222222222222222222", ), )