Commit Diff


commit - 0078a59e44b767f958279733ef22bc7003d623e1
commit + 90a2b333aee44a800ea9beb89728e6c9b5ff336b
blob - c51ea2312778272fecff47edc8015963042d288b
blob + b63494263735a2f6758a23bbafb6950c98683faa
--- NEWS
+++ NEWS
@@ -1,3 +1,7 @@
+0.20.31	2022-01-21
+
+ * Add GitClient.clone(). (Jelmer Vernooij, #920)
+
 0.20.30	2022-01-08
 
 0.20.29	2022-01-08
blob - fc2a42b75edd1cae3f05952f180f61368145a86c
blob + 10989e70ddc37e59a0b546b680dcdbc375f5e454
--- PKG-INFO
+++ PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dulwich
-Version: 0.20.30
+Version: 0.20.31
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
 Author: Jelmer Vernooij
blob - b6d263798a69926bced09fe3528edf516a10fed5
blob + e313fa9d074b29fbbc1daf5fc705e0caf99fc051
--- dulwich/__init__.py
+++ dulwich/__init__.py
@@ -22,4 +22,4 @@
 
 """Python implementation of the Git file formats and protocols."""
 
-__version__ = (0, 20, 30)
+__version__ = (0, 20, 31)
blob - ad552ab0a52daeb0ab7ce6c956929a2d28106b60
blob + 12e5b7a10f51a02ae1f1a609989c4a1996c88a3f
--- dulwich/bundle.py
+++ dulwich/bundle.py
@@ -119,4 +119,4 @@ def write_bundle(f, bundle):
     for ref, obj_id in bundle.references.items():
         f.write(b"%s %s\n" % (obj_id, ref))
     f.write(b"\n")
-    write_pack_data(f, len(bundle.pack_data), iter(bundle.pack_data))
+    write_pack_data(f, records=bundle.pack_data)
blob - 03e94f5d8c74c7e64b01a25526f9ac734615c965
blob + 0dc009171566b286623cc4f8411cfd1331872c21
--- dulwich/client.py
+++ dulwich/client.py
@@ -110,6 +110,7 @@ from dulwich.pack import (
 from dulwich.refs import (
     read_info_refs,
     ANNOTATED_TAG_SUFFIX,
+    _import_remote_refs,
 )
 
 
@@ -494,7 +495,75 @@ class GitClient(object):
 
         """
         raise NotImplementedError(self.send_pack)
+
+    def clone(self, path, target_path, mkdir: bool = True, bare=False, origin="origin",
+              checkout=None, branch=None, depth=None):
+        """Clone a repository."""
+        from .refs import _set_origin_head, _set_default_branch, _set_head
+        from .repo import Repo
+
+        if mkdir:
+            os.mkdir(target_path)
+
+        try:
+            target = None
+            if not bare:
+                target = Repo.init(target_path)
+                if checkout is None:
+                    checkout = True
+            else:
+                if checkout:
+                    raise ValueError("checkout and bare are incompatible")
+                target = Repo.init_bare(target_path)
 
+            # TODO(jelmer): abstract method for get_location?
+            if isinstance(self, (LocalGitClient, SubprocessGitClient)):
+                encoded_path = path.encode('utf-8')
+            else:
+                encoded_path = self.get_url(path).encode('utf-8')
+
+            target_config = target.get_config()
+            target_config.set((b"remote", origin.encode('utf-8')), b"url", encoded_path)
+            target_config.set(
+                (b"remote", origin.encode('utf-8')),
+                b"fetch",
+                b"+refs/heads/*:refs/remotes/" + origin.encode('utf-8') + b"/*",
+            )
+            target_config.write_to_path()
+
+            ref_message = b"clone: from " + encoded_path
+            result = self.fetch(path, target, depth=depth)
+            _import_remote_refs(
+                target.refs, origin, result.refs, message=ref_message)
+
+            origin_head = result.symrefs.get(b"HEAD")
+            origin_sha = result.refs.get(b'HEAD')
+            if origin_sha and not origin_head:
+                # set detached HEAD
+                target.refs[b"HEAD"] = origin_sha
+
+            _set_origin_head(target.refs, origin.encode('utf-8'), origin_head)
+            head_ref = _set_default_branch(
+                target.refs, origin.encode('utf-8'), origin_head, branch, ref_message
+            )
+
+            # Update target head
+            if head_ref:
+                head = _set_head(target.refs, head_ref, ref_message)
+            else:
+                head = None
+
+            if checkout and head is not None:
+                target.reset_index()
+        except BaseException:
+            if target is not None:
+                target.close()
+            if mkdir:
+                import shutil
+                shutil.rmtree(target_path)
+            raise
+        return target
+
     def fetch(self, path, target, determine_wants=None, progress=None, depth=None):
         """Fetch into a target repository.
 
blob - 9b76a1d0c91629092479f242c1c2409c5a3276e0
blob + 74c1a1fa59090fec5a02a427fa823fe1b1fc983c
--- dulwich/pack.py
+++ dulwich/pack.py
@@ -1714,12 +1714,12 @@ def write_pack_objects(
     )
 
 
-def write_pack_data(f, num_records, records, progress=None, compression_level=-1):
+def write_pack_data(f, num_records=None, records=None, progress=None, compression_level=-1):
     """Write a new pack data file.
 
     Args:
       f: File to write to
-      num_records: Number of records
+      num_records: Number of records (defaults to len(records) if None)
       records: Iterator over type_num, object_id, delta_base, raw
       progress: Function to report progress to
       compression_level: the zlib compression level
@@ -1728,7 +1728,10 @@ def write_pack_data(f, num_records, records, progress=
     # Write the pack
     entries = {}
     f = SHA1Writer(f)
+    if num_records is None:
+        num_records = len(records)
     write_pack_header(f, num_records)
+    actual_num_records = 0
     for i, (type_num, object_id, delta_base, raw) in enumerate(records):
         if progress is not None:
             progress(("writing pack data: %d/%d\r" % (i, num_records)).encode("ascii"))
@@ -1743,7 +1746,12 @@ def write_pack_data(f, num_records, records, progress=
                 type_num = OFS_DELTA
                 raw = (offset - base_offset, raw)
         crc32 = write_pack_object(f, type_num, raw, compression_level=compression_level)
+        actual_num_records += 1
         entries[object_id] = (offset, crc32)
+    if actual_num_records != num_records:
+        raise AssertionError(
+            'actual records written differs: %d != %d' % (
+                actual_num_records, num_records))
     return entries, f.write_sha()
 
 
blob - 8d4ab0eb4f9731223253ae61659977f5a5308aac
blob + 95309066ea3b3918b21a7bdac2b7a26d1ff0f0dc
--- dulwich/porcelain.py
+++ dulwich/porcelain.py
@@ -74,7 +74,6 @@ import stat
 import sys
 import time
 from typing import (
-    Dict,
     Optional,
     Tuple,
     Union,
@@ -137,11 +136,9 @@ from dulwich.protocol import (
     ZERO_SHA,
 )
 from dulwich.refs import (
-    ANNOTATED_TAG_SUFFIX,
     LOCAL_BRANCH_PREFIX,
     LOCAL_TAG_PREFIX,
-    strip_peeled_refs,
-    RefsContainer,
+    _import_remote_refs,
 )
 from dulwich.repo import BaseRepo, Repo
 from dulwich.server import (
@@ -401,7 +398,7 @@ def clone(
     checkout=None,
     errstream=default_bytes_err_stream,
     outstream=None,
-    origin=b"origin",
+    origin="origin",
     depth=None,
     branch=None,
     **kwargs
@@ -441,15 +438,18 @@ def clone(
 
     mkdir = not os.path.exists(target)
 
-    with open_repo_closing(source) as r:
-        return r.clone(
-            target,
-            mkdir=mkdir,
-            bare=bare,
-            origin=origin,
-            checkout=checkout,
-            branch=branch,
-        )
+    (client, path) = get_transport_and_path(source)
+
+    return client.clone(
+        path,
+        target,
+        mkdir=mkdir,
+        bare=bare,
+        origin=origin,
+        checkout=checkout,
+        branch=branch,
+        depth=depth,
+    )
 
 
 def add(repo=".", paths=None):
@@ -1485,34 +1485,6 @@ def get_branch_remote(repo):
         except KeyError:
             remote_name = b"origin"
     return remote_name
-
-
-def _import_remote_refs(
-    refs_container: RefsContainer,
-    remote_name: str,
-    refs: Dict[str, str],
-    message: Optional[bytes] = None,
-    prune: bool = False,
-    prune_tags: bool = False,
-):
-    stripped_refs = strip_peeled_refs(refs)
-    branches = {
-        n[len(LOCAL_BRANCH_PREFIX) :]: v
-        for (n, v) in stripped_refs.items()
-        if n.startswith(LOCAL_BRANCH_PREFIX)
-    }
-    refs_container.import_refs(
-        b"refs/remotes/" + remote_name.encode(),
-        branches,
-        message=message,
-        prune=prune,
-    )
-    tags = {
-        n[len(b"refs/tags/") :]: v
-        for (n, v) in stripped_refs.items()
-        if n.startswith(b"refs/tags/") and not n.endswith(ANNOTATED_TAG_SUFFIX)
-    }
-    refs_container.import_refs(b"refs/tags", tags, message=message, prune=prune_tags)
 
 
 def fetch(
blob - 9d022f44cc481abfb0c44e6923387c0d398ebe0e
blob + e82f08d5b3d2cb245370501143754fb2a0e6afca
--- dulwich/refs.py
+++ dulwich/refs.py
@@ -23,6 +23,7 @@
 
 """
 import os
+from typing import Dict, Optional
 
 from dulwich.errors import (
     PackedRefsException,
@@ -40,6 +41,7 @@ from dulwich.file import (
 )
 
 
+HEADREF = b"HEAD"
 SYMREF = b"ref: "
 LOCAL_BRANCH_PREFIX = b"refs/heads/"
 LOCAL_TAG_PREFIX = b"refs/tags/"
@@ -247,7 +249,7 @@ class RefsContainer(object):
         Raises:
           KeyError: if a refname is not HEAD or is otherwise not valid.
         """
-        if name in (b"HEAD", b"refs/stash"):
+        if name in (HEADREF, b"refs/stash"):
             return
         if not name.startswith(b"refs/") or not check_ref_format(name[5:]):
             raise RefFormatError(name)
@@ -724,8 +726,8 @@ class DiskRefsContainer(RefsContainer):
 
     def allkeys(self):
         allkeys = set()
-        if os.path.exists(self.refpath(b"HEAD")):
-            allkeys.add(b"HEAD")
+        if os.path.exists(self.refpath(HEADREF)):
+            allkeys.add(HEADREF)
         path = self.refpath(b"")
         refspath = self.refpath(b"refs")
         for root, unused_dirs, files in os.walk(refspath):
@@ -745,7 +747,7 @@ class DiskRefsContainer(RefsContainer):
             name = name.replace(b"/", os.fsencode(os.path.sep))
         # TODO: as the 'HEAD' reference is working tree specific, it
         # should actually not be a part of RefsContainer
-        if name == b"HEAD":
+        if name == HEADREF:
             return os.path.join(self.worktree_path, name)
         else:
             return os.path.join(self.path, name)
@@ -1181,7 +1183,7 @@ def write_info_refs(refs, store):
     for name, sha in sorted(refs.items()):
         # get_refs() includes HEAD as a special case, but we don't want to
         # advertise it
-        if name == b"HEAD":
+        if name == HEADREF:
             continue
         try:
             o = store[sha]
@@ -1210,7 +1212,7 @@ def _set_origin_head(refs, origin, origin_head):
     # set refs/remotes/origin/HEAD
     origin_base = b"refs/remotes/" + origin + b"/"
     if origin_head and origin_head.startswith(LOCAL_BRANCH_PREFIX):
-        origin_ref = origin_base + b"HEAD"
+        origin_ref = origin_base + HEADREF
         target_ref = origin_base + origin_head[len(LOCAL_BRANCH_PREFIX) :]
         if target_ref in refs:
             refs.set_symbolic_ref(origin_ref, target_ref)
@@ -1254,18 +1256,44 @@ def _set_head(refs, head_ref, ref_message):
         if isinstance(head, Tag):
             _cls, obj = head.object
             head = obj.get_object(obj).id
-        del refs[b"HEAD"]
+        del refs[HEADREF]
         refs.set_if_equals(
-            b"HEAD", None, head, message=ref_message
+            HEADREF, None, head, message=ref_message
         )
     else:
         # set HEAD to specific branch
         try:
             head = refs[head_ref]
-            refs.set_symbolic_ref(b"HEAD", head_ref)
-            refs.set_if_equals(
-                b"HEAD", None, head, message=ref_message
-            )
+            refs.set_symbolic_ref(HEADREF, head_ref)
+            refs.set_if_equals(HEADREF, None, head, message=ref_message)
         except KeyError:
             head = None
     return head
+
+
+def _import_remote_refs(
+    refs_container: RefsContainer,
+    remote_name: str,
+    refs: Dict[str, str],
+    message: Optional[bytes] = None,
+    prune: bool = False,
+    prune_tags: bool = False,
+):
+    stripped_refs = strip_peeled_refs(refs)
+    branches = {
+        n[len(LOCAL_BRANCH_PREFIX) :]: v
+        for (n, v) in stripped_refs.items()
+        if n.startswith(LOCAL_BRANCH_PREFIX)
+    }
+    refs_container.import_refs(
+        b"refs/remotes/" + remote_name.encode(),
+        branches,
+        message=message,
+        prune=prune,
+    )
+    tags = {
+        n[len(LOCAL_TAG_PREFIX) :]: v
+        for (n, v) in stripped_refs.items()
+        if n.startswith(LOCAL_TAG_PREFIX) and not n.endswith(ANNOTATED_TAG_SUFFIX)
+    }
+    refs_container.import_refs(LOCAL_TAG_PREFIX, tags, message=message, prune=prune_tags)
blob - e0362ef44e7fc68e42b391ea3ee532c0619d0c7b
blob + 93bfe021f0127a20a677900d924f342440be9602
--- dulwich/repo.py
+++ dulwich/repo.py
@@ -1389,6 +1389,7 @@ class Repo(BaseRepo):
         origin=b"origin",
         checkout=None,
         branch=None,
+        depth=None,
     ):
         """Clone this repository.
 
@@ -1401,6 +1402,7 @@ class Repo(BaseRepo):
             cloned from this repository
           branch: Optional branch or tag to be used as HEAD in the new repository
             instead of this repository's HEAD.
+          depth: Depth at which to fetch
         Returns: Created repository as `Repo`
         """
 
@@ -1432,7 +1434,7 @@ class Repo(BaseRepo):
             target_config.write_to_path()
 
             ref_message = b"clone: from " + encoded_path
-            self.fetch(target)
+            self.fetch(target, depth=depth)
             target.refs.import_refs(
                 b"refs/remotes/" + origin,
                 self.refs.as_dict(b"refs/heads"),
blob - db5ed86a9b515476060843c85841e0be0751cc40
blob + 1717b942ad30f42fe2a9338a4e5a37c8979ffe42
--- dulwich/tests/test_client.py
+++ dulwich/tests/test_client.py
@@ -840,6 +840,18 @@ class LocalGitClientTests(TestCase):
         s = open_repo("a.git")
         self.addCleanup(tear_down_repo, s)
         self.assertEqual(s.get_refs(), c.fetch(s.path, t).refs)
+
+    def test_clone(self):
+        c = LocalGitClient()
+        s = open_repo("a.git")
+        self.addCleanup(tear_down_repo, s)
+        target = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, target)
+        result_repo = c.clone(s.path, target, mkdir=False)
+        expected = dict(s.get_refs())
+        expected[b'refs/remotes/origin/HEAD'] = expected[b'HEAD']
+        expected[b'refs/remotes/origin/master'] = expected[b'refs/heads/master']
+        self.assertEqual(expected, result_repo.get_refs())
 
     def test_fetch_empty(self):
         c = LocalGitClient()
blob - fc2a42b75edd1cae3f05952f180f61368145a86c
blob + 10989e70ddc37e59a0b546b680dcdbc375f5e454
--- dulwich.egg-info/PKG-INFO
+++ dulwich.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dulwich
-Version: 0.20.30
+Version: 0.20.31
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
 Author: Jelmer Vernooij
blob - e0ee7431ac4e6b4f1f0ec9fe5766e68c227588b9
blob + 5d4ab14a88f6589b45ef7675a38bb8c7dc836708
--- setup.py
+++ setup.py
@@ -23,7 +23,7 @@ if sys.version_info < (3, 6):
         'For 2.7 support, please install a version prior to 0.20')
 
 
-dulwich_version_string = '0.20.30'
+dulwich_version_string = '0.20.31'
 
 
 class DulwichDistribution(Distribution):