commit 4c864bc9ec52f825c70670c246cbeb74bb24c74e
from: Jelmer Vernooĳ <jelmer@jelmer.uk>
date: Fri Jul 21 00:38:22 2023 UTC

Properly split out SerializedIndexEntry class

commit - 32820bdabd280c2f60d32014bb7a6dd3eac27a04
commit + 4c864bc9ec52f825c70670c246cbeb74bb24c74e
blob - b2f34545e8e96d302115d0791a7a4cf90692bcf1
blob + 8b6cfc2100bd83aa2fec9086c093832147c71344
--- dulwich/index.py
+++ dulwich/index.py
@@ -20,11 +20,11 @@
 
 """Parser for the git index file format."""
 
-from dataclasses import dataclass
 import os
 import stat
 import struct
 import sys
+from dataclasses import dataclass
 from enum import Enum
 from typing import (
     Any,
@@ -52,42 +52,6 @@ from .objects import (
 )
 from .pack import ObjectContainer, SHA1Reader, SHA1Writer
 
-@dataclass
-class IndexEntry:
-    ctime: int | float | Tuple[int, int]
-    mtime: int | float | Tuple[int, int]
-    dev: int
-    ino: int
-    mode: int
-    uid: int
-    gid: int
-    size: int
-    sha: bytes
-    flags: int
-    extended_flags: int
-
-
-class ConflictedIndexEntry:
-    """Index entry that represents a conflict."""
-
-    ancestor: Optional[IndexEntry]
-    this: Optional[IndexEntry]
-    other: Optional[IndexEntry]
-
-    def __init__(self):
-        self.ancestor = None
-        self.this = None
-        self.other = None
-
-    def entries(self) -> Iterable[IndexEntry]:
-        if self.ancestor:
-            yield self.ancestor
-        if self.this:
-            yield self.this
-        if self.other:
-            yield self.other
-
-
 # 2-bit stage (during merge)
 FLAG_STAGEMASK = 0x3000
 FLAG_STAGESHIFT = 12
@@ -107,6 +71,7 @@ EXTENDED_FLAG_INTEND_TO_ADD = 0x2000
 
 DEFAULT_VERSION = 2
 
+
 class Stage(Enum):
     NORMAL = 0
     MERGE_CONFLICT_ANCESTOR = 1
@@ -114,14 +79,87 @@ class Stage(Enum):
     MERGE_CONFLICT_OTHER = 3
 
 
-class UnmergedEntries(Exception):
-    """Unmerged entries exist in the index"""
+@dataclass
+class SerializedIndexEntry:
+    name: bytes
+    ctime: int | float | Tuple[int, int]
+    mtime: int | float | Tuple[int, int]
+    dev: int
+    ino: int
+    mode: int
+    uid: int
+    gid: int
+    size: int
+    sha: bytes
+    flags: int
+    extended_flags: int
 
+    def stage(self) -> Stage:
+        return Stage((self.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
 
-def read_stage(entry: IndexEntry) -> Stage:
-    return Stage((entry.flags & FLAG_STAGEMASK) >> FLAG_STAGESHIFT)
 
+@dataclass
+class IndexEntry:
+    ctime: int | float | Tuple[int, int]
+    mtime: int | float | Tuple[int, int]
+    dev: int
+    ino: int
+    mode: int
+    uid: int
+    gid: int
+    size: int
+    sha: bytes
 
+    @classmethod
+    def from_serialized(cls, serialized: SerializedIndexEntry) -> "IndexEntry":
+        return cls(
+            ctime=serialized.ctime,
+            mtime=serialized.mtime,
+            dev=serialized.dev,
+            ino=serialized.ino,
+            mode=serialized.mode,
+            uid=serialized.uid,
+            gid=serialized.gid,
+            size=serialized.size,
+            sha=serialized.sha,
+        )
+
+    def serialize(self, name: bytes, stage: Stage) -> SerializedIndexEntry:
+        return SerializedIndexEntry(
+            name=name,
+            ctime=self.ctime,
+            mtime=self.mtime,
+            dev=self.dev,
+            ino=self.ino,
+            mode=self.mode,
+            uid=self.uid,
+            gid=self.gid,
+            size=self.size,
+            sha=self.sha,
+            flags=stage.value << FLAG_STAGESHIFT,
+            extended_flags=0,
+        )
+
+
+class ConflictedIndexEntry:
+    """Index entry that represents a conflict."""
+
+    ancestor: Optional[IndexEntry]
+    this: Optional[IndexEntry]
+    other: Optional[IndexEntry]
+
+    def __init__(self, ancestor: Optional[IndexEntry] = None,
+                 this: Optional[IndexEntry] = None,
+                 other: Optional[IndexEntry] = None) -> None:
+        self.ancestor = ancestor
+        self.this = this
+        self.other = other
+
+
+class UnmergedEntries(Exception):
+    """Unmerged entries exist in the index."""
+
+
 def pathsplit(path: bytes) -> Tuple[bytes, bytes]:
     """Split a /-delimited path into a directory part and a basename.
 
@@ -172,13 +210,11 @@ def write_cache_time(f, t):
     f.write(struct.pack(">LL", *t))
 
 
-def read_cache_entry(f, version: int) -> Tuple[bytes, IndexEntry]:
+def read_cache_entry(f, version: int) -> SerializedIndexEntry:
     """Read an entry from a cache file.
 
     Args:
       f: File-like object to read from
-    Returns:
-      tuple with: name, IndexEntry
     """
     beginoffset = f.tell()
     ctime = read_cache_time(f)
@@ -205,24 +241,23 @@ def read_cache_entry(f, version: int) -> Tuple[bytes, 
     if version < 4:
         real_size = (f.tell() - beginoffset + 8) & ~7
         f.read((beginoffset + real_size) - f.tell())
-    return (
+    return SerializedIndexEntry(
         name,
-        IndexEntry(
-            ctime,
-            mtime,
-            dev,
-            ino,
-            mode,
-            uid,
-            gid,
-            size,
-            sha_to_hex(sha),
-            flags & ~FLAG_NAMEMASK,
-            extended_flags,
-        ))
+        ctime,
+        mtime,
+        dev,
+        ino,
+        mode,
+        uid,
+        gid,
+        size,
+        sha_to_hex(sha),
+        flags & ~FLAG_NAMEMASK,
+        extended_flags,
+    )
 
 
-def write_cache_entry(f, name: bytes, entry: IndexEntry, version: int) -> None:
+def write_cache_entry(f, entry: SerializedIndexEntry, version: int) -> None:
     """Write an index entry to a file.
 
     Args:
@@ -232,7 +267,7 @@ def write_cache_entry(f, name: bytes, entry: IndexEntr
     beginoffset = f.tell()
     write_cache_time(f, entry.ctime)
     write_cache_time(f, entry.mtime)
-    flags = len(name) | (entry.flags & ~FLAG_NAMEMASK)
+    flags = len(entry.name) | (entry.flags & ~FLAG_NAMEMASK)
     if entry.extended_flags:
         flags |= FLAG_EXTENDED
     if flags & FLAG_EXTENDED and version is not None and version < 3:
@@ -252,7 +287,7 @@ def write_cache_entry(f, name: bytes, entry: IndexEntr
     )
     if flags & FLAG_EXTENDED:
         f.write(struct.pack(b">H", entry.extended_flags))
-    f.write(name)
+    f.write(entry.name)
     if version < 4:
         real_size = (f.tell() - beginoffset + 8) & ~7
         f.write(b"\0" * ((beginoffset + real_size) - f.tell()))
@@ -265,7 +300,7 @@ class UnsupportedIndexFormat(Exception):
         self.index_format_version = version
 
 
-def read_index(f: BinaryIO) -> Iterator[Tuple[bytes, IndexEntry]]:
+def read_index(f: BinaryIO) -> Iterator[SerializedIndexEntry]:
     """Read an index file, yielding the individual entries."""
     header = f.read(4)
     if header != b"DIRC":
@@ -277,7 +312,32 @@ def read_index(f: BinaryIO) -> Iterator[Tuple[bytes, I
         yield read_cache_entry(f, version)
 
 
-def write_index(f: BinaryIO, entries: List[Tuple[bytes, IndexEntry]], version: Optional[int] = None):
+def read_index_dict(f) -> Dict[bytes, IndexEntry | ConflictedIndexEntry]:
+    """Read an index file and return it as a dictionary.
+       Dict Key is tuple of path and stage number, as
+            path alone is not unique
+    Args:
+      f: File object to read fromls.
+    """
+    ret: Dict[bytes, IndexEntry | ConflictedIndexEntry] = {}
+    for entry in read_index(f):
+        stage = entry.stage()
+        if stage == Stage.NORMAL:
+            ret[entry.name] = IndexEntry.from_serialized(entry)
+        else:
+            existing = ret.setdefault(entry.name, ConflictedIndexEntry())
+            if isinstance(existing, IndexEntry):
+                raise AssertionError("Non-conflicted entry for %r exists" % entry.name)
+            if stage == Stage.MERGE_CONFLICT_ANCESTOR:
+                existing.ancestor = IndexEntry.from_serialized(entry)
+            elif stage == Stage.MERGE_CONFLICT_THIS:
+                existing.this = IndexEntry.from_serialized(entry)
+            elif stage == Stage.MERGE_CONFLICT_OTHER:
+                existing.other = IndexEntry.from_serialized(entry)
+    return ret
+
+
+def write_index(f: BinaryIO, entries: List[SerializedIndexEntry], version: Optional[int] = None):
     """Write an index file.
 
     Args:
@@ -289,8 +349,8 @@ def write_index(f: BinaryIO, entries: List[Tuple[bytes
         version = DEFAULT_VERSION
     f.write(b"DIRC")
     f.write(struct.pack(b">LL", version, len(entries)))
-    for name, entry in entries:
-        write_cache_entry(f, name, entry, version)
+    for entry in entries:
+        write_cache_entry(f, entry, version)
 
 
 def write_index_dict(
@@ -306,13 +366,13 @@ def write_index_dict(
         value = entries[key]
         if isinstance(value, ConflictedIndexEntry):
             if value.ancestor is not None:
-                entries_list.append((key, value.ancestor))
+                entries_list.append(value.ancestor.serialize(key, Stage.MERGE_CONFLICT_ANCESTOR))
             if value.this is not None:
-                entries_list.append((key, value.this))
+                entries_list.append(value.this.serialize(key, Stage.MERGE_CONFLICT_THIS))
             if value.other is not None:
-                entries_list.append((key, value.other))
+                entries_list.append(value.other.serialize(key, Stage.MERGE_CONFLICT_OTHER))
         else:
-            entries_list.append((key, value))
+            entries_list.append(value.serialize(key, Stage.NORMAL))
     write_index(f, entries_list, version=version)
 
 
@@ -381,21 +441,7 @@ class Index:
         f = GitFile(self._filename, "rb")
         try:
             f = SHA1Reader(f)
-            for name, entry in read_index(f):
-                stage = read_stage(entry)
-                if stage == Stage.NORMAL:
-                    self[name] = entry
-                else:
-                    import pdb; pdb.set_trace()
-                    existing = self._byname.setdefault(name, ConflictedIndexEntry())
-                    if isinstance(existing, IndexEntry):
-                        raise AssertionError("Non-conflicted entry for %r exists" % name)
-                    if stage == Stage.MERGE_CONFLICT_ANCESTOR:
-                        existing.ancestor = entry
-                    elif stage == Stage.MERGE_CONFLICT_THIS:
-                        existing.this = entry
-                    elif stage == Stage.MERGE_CONFLICT_OTHER:
-                        existing.other = entry
+            self.update(read_index_dict(f))
             # FIXME: Additional data?
             f.read(os.path.getsize(self._filename) - f.tell() - 20)
             f.check_sha()
@@ -423,11 +469,17 @@ class Index:
 
     def get_sha1(self, path: bytes) -> bytes:
         """Return the (git object) SHA1 for the object at a path."""
-        return self[path].sha
+        value = self[path]
+        if isinstance(value, ConflictedIndexEntry):
+            raise UnmergedEntries()
+        return value.sha
 
     def get_mode(self, path: bytes) -> int:
         """Return the POSIX file mode for the object at a path."""
-        return self[path].mode
+        value = self[path]
+        if isinstance(value, ConflictedIndexEntry):
+            raise UnmergedEntries()
+        return value.mode
 
     def iterobjects(self) -> Iterable[Tuple[bytes, bytes, int]]:
         """Iterate over path, sha, mode tuples for use with commit_tree."""
@@ -443,30 +495,12 @@ class Index:
                 return True
         return False
 
-    def set_merge_conflict(self, apath, stage, mode, sha, time):
-        entry = IndexEntry(time,
-                           time,
-                           0,
-                           0,
-                           mode,
-                           0,
-                           0,
-                           0,
-                           sha,
-                           stage << FLAG_STAGESHIFT,
-                           0)
-        if (apath, Stage.NORMAL) in self._byname:
-            del self._byname[(apath, Stage.NORMAL)]
-        self._byname[(apath, stage)] = entry
-
     def clear(self):
         """Remove all contents from this index."""
         self._byname = {}
 
     def __setitem__(self, name: bytes, value: IndexEntry | ConflictedIndexEntry) -> None:
         assert isinstance(name, bytes)
-        if not isinstance(value, (IndexEntry, ConflictedIndexEntry)):
-            value = IndexEntry(*value)
         self._byname[name] = value
 
     def __delitem__(self, name: bytes) -> None:
@@ -478,13 +512,12 @@ class Index:
     def items(self) -> Iterator[Tuple[bytes, IndexEntry | ConflictedIndexEntry]]:
         return iter(self._byname.items())
 
-    def update(self, entries: Dict[bytes, IndexEntry]):
+    def update(self, entries: Dict[bytes, IndexEntry | ConflictedIndexEntry]):
         for key, value in entries.items():
             self[key] = value
 
     def paths(self):
-        for name in self._byname.keys():
-            yield name
+        yield from self._byname.keys()
 
     def changes_from_tree(
             self, object_store, tree: ObjectID, want_unchanged: bool = False):
@@ -628,15 +661,13 @@ def changes_from_tree(
 
 
 def index_entry_from_stat(
-    stat_val, hex_sha: bytes, flags: int, mode: Optional[int] = None,
-    extended_flags: Optional[int] = None
+    stat_val, hex_sha: bytes, mode: Optional[int] = None,
 ):
     """Create a new index entry from a stat value.
 
     Args:
       stat_val: POSIX stat_result instance
       hex_sha: Hex sha of the object
-      flags: Index flags
     """
     if mode is None:
         mode = cleanup_mode(stat_val.st_mode)
@@ -651,8 +682,6 @@ def index_entry_from_stat(
         stat_val.st_gid,
         stat_val.st_size,
         hex_sha,
-        flags,
-        extended_flags
     )
 
 
@@ -825,7 +854,7 @@ def build_index_from_tree(
             st = st.__class__(st_tuple)
             # default to a stage 0 index entry (normal)
             # when reading from the filesystem
-        index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
+        index[entry.path] = index_entry_from_stat(st, entry.sha)
 
     index.write()
 
@@ -1001,7 +1030,7 @@ def index_entry_from_directory(st, path: bytes) -> Opt
         head = read_submodule_head(path)
         if head is None:
             return None
-        return index_entry_from_stat(st, head, 0, mode=S_IFGITLINK)
+        return index_entry_from_stat(st, head, mode=S_IFGITLINK)
     return None
 
 
@@ -1029,7 +1058,7 @@ def index_entry_from_path(
         blob = blob_from_path_and_stat(path, st)
         if object_store is not None:
             object_store.add_object(blob)
-        return index_entry_from_stat(st, blob.id, 0)
+        return index_entry_from_stat(st, blob.id)
 
     return None
 
@@ -1074,7 +1103,6 @@ def iter_fresh_objects(
             if include_deleted:
                 yield path, None, None
         else:
-            entry = IndexEntry(*entry)
             yield path, entry.sha, cleanup_mode(entry.mode)
 
 
@@ -1089,8 +1117,7 @@ def refresh_index(index: Index, root_path: bytes):
     """
     for path, entry in iter_fresh_entries(index, root_path):
         if entry:
-            stage = read_stage(entry)
-            index[(path, stage)] = entry
+            index[path] = entry
 
 
 class locked_index:
blob - 3749078037d60dcac51a080712526a7631d04908
blob + 5a68f58dc586f1fc082ee0a7a3adcb66c77718d4
--- dulwich/porcelain.py
+++ dulwich/porcelain.py
@@ -1970,7 +1970,7 @@ def checkout_branch(repo, target: Union[bytes, str], f
             blob = repo.object_store[entry.sha]
             ensure_dir_exists(os.path.dirname(full_path))
             st = build_file_from_blob(blob, entry.mode, full_path)
-            repo_index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
+            repo_index[entry.path] = index_entry_from_stat(st, entry.sha)
 
         repo_index.write()
 
blob - 72f4052d0a512802240e6fdd27ae71214b6dd5fd
blob + b1940d3220c911229f5e7dbabd931955859bdfaa
--- dulwich/repo.py
+++ dulwich/repo.py
@@ -1422,7 +1422,7 @@ class Repo(BaseRepo):
                     blob = blob_from_path_and_stat(full_path, st)
                     blob = blob_normalizer.checkin_normalize(blob, fs_path)
                     self.object_store.add_object(blob)
-                    index[tree_path] = index_entry_from_stat(st, blob.id, 0)
+                    index[tree_path] = index_entry_from_stat(st, blob.id)
         index.write()
 
     def unstage(self, fs_paths: List[str]):
@@ -1478,8 +1478,6 @@ class Repo(BaseRepo):
                 gid=st.st_gid if st else 0,
                 size=len(self[tree_entry[1]].data),
                 sha=tree_entry[1],
-                flags=0,
-                extended_flags=0
             )
 
             index[tree_path] = index_entry
blob - 660fa416263b69b99f262355b22775d69912a61f
blob + afd599bd53dce72edf45bd703a051615de1fe6e1
--- dulwich/tests/test_index.py
+++ dulwich/tests/test_index.py
@@ -34,7 +34,7 @@ from dulwich.tests import TestCase, skipIf
 from ..index import (
     Index,
     IndexEntry,
-    Stage,
+    SerializedIndexEntry,
     _fs_to_tree_path,
     _tree_to_fs_path,
     build_index_from_tree,
@@ -43,6 +43,7 @@ from ..index import (
     get_unstaged_changes,
     index_entry_from_stat,
     read_index,
+    read_index_dict,
     validate_path_element_default,
     validate_path_element_ntfs,
     write_cache_time,
@@ -102,8 +103,6 @@ class SimpleIndexTestCase(IndexTestCase):
                 1000,
                 0,
                 b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391",
-                0,
-                0,
             ),
             self.get_simple_index("index")[b"bla"],
         )
@@ -134,8 +133,8 @@ class SimpleIndexWriterTestCase(IndexTestCase):
     def test_simple_write(self):
         entries = [
             (
-                b"barbla",
-                IndexEntry(
+                SerializedIndexEntry(
+                    b"barbla",
                     (1230680220, 0),
                     (1230680220, 0),
                     2050,
@@ -157,6 +156,38 @@ class SimpleIndexWriterTestCase(IndexTestCase):
             self.assertEqual(entries, list(read_index(x)))
 
 
+class ReadIndexDictTests(IndexTestCase):
+
+    def setUp(self):
+        IndexTestCase.setUp(self)
+        self.tempdir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        IndexTestCase.tearDown(self)
+        shutil.rmtree(self.tempdir)
+
+    def test_simple_write(self):
+        entries = {
+            b"barbla": IndexEntry(
+                (1230680220, 0),
+                (1230680220, 0),
+                2050,
+                3761020,
+                33188,
+                1000,
+                1000,
+                0,
+                b"e69de29bb2d1d6434b8b29ae775ad8c2e48c5391",
+            )
+        }
+        filename = os.path.join(self.tempdir, "test-simple-write-index")
+        with open(filename, "wb+") as x:
+            write_index_dict(x, entries)
+
+        with open(filename, "rb") as x:
+            self.assertEqual(entries, read_index_dict(x))
+
+
 class CommitTreeTests(TestCase):
     def setUp(self):
         super().setUp()
@@ -244,7 +275,7 @@ class IndexEntryFromStatTests(TestCase):
                 1324180496,
             )
         )
-        entry = index_entry_from_stat(st, "22" * 20, 0)
+        entry = index_entry_from_stat(st, b"22" * 20)
         self.assertEqual(
             entry,
             IndexEntry(
@@ -256,9 +287,7 @@ class IndexEntryFromStatTests(TestCase):
                 1000,
                 1000,
                 12288,
-                "2222222222222222222222222222222222222222",
-                0,
-                None,
+                b"2222222222222222222222222222222222222222",
             ),
         )
 
@@ -277,7 +306,7 @@ class IndexEntryFromStatTests(TestCase):
                 1324180496,
             )
         )
-        entry = index_entry_from_stat(st, "22" * 20, 0, mode=stat.S_IFREG + 0o755)
+        entry = index_entry_from_stat(st, b"22" * 20, mode=stat.S_IFREG + 0o755)
         self.assertEqual(
             entry,
             IndexEntry(
@@ -289,9 +318,7 @@ class IndexEntryFromStatTests(TestCase):
                 1000,
                 1000,
                 12288,
-                "2222222222222222222222222222222222222222",
-                0,
-                None,
+                b"2222222222222222222222222222222222222222",
             ),
         )