Browse Source

Import upstream version 0.21.2

Jelmer Vernooij 2 years ago
parent
commit
98b71ae313

+ 5 - 0
NEWS

@@ -1,3 +1,8 @@
+0.21.2	2023-01-18
+
+ * Fix early file close bug in ``dulwich.pack.extend_pack``.
+   (Jelmer Vernooij)
+
 0.21.1	2023-01-17
 
  * Factor out ``dulwich.pack.extend_pack``.

+ 1 - 1
PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dulwich
-Version: 0.21.1
+Version: 0.21.2
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
 Author: Jelmer Vernooij

+ 3 - 1
docs/tutorial/remote.txt

@@ -9,7 +9,8 @@ Most of the tests in this file require a Dulwich server, so let's start one:
     >>> cid = repo.do_commit(b"message", committer=b"Jelmer <jelmer@samba.org>")
     >>> backend = DictBackend({b'/': repo})
     >>> dul_server = TCPGitServer(backend, b'localhost', 0)
-    >>> threading.Thread(target=dul_server.serve).start()
+    >>> server_thread = threading.Thread(target=dul_server.serve)
+    >>> server_thread.start()
     >>> server_address, server_port=dul_server.socket.getsockname()
 
 Remote repositories
@@ -82,6 +83,7 @@ importing the received pack file into the local repository::
    >>> from dulwich.repo import Repo
    >>> local = Repo.init("local", mkdir=True)
    >>> remote_refs = client.fetch(b"/", local)
+   >>> local.close()
 
 Let's shut down the server now that all tests have been run::
 

+ 1 - 1
dulwich.egg-info/PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dulwich
-Version: 0.21.1
+Version: 0.21.2
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
 Author: Jelmer Vernooij

+ 1 - 1
dulwich/__init__.py

@@ -22,4 +22,4 @@
 
 """Python implementation of the Git file formats and protocols."""
 
-__version__ = (0, 21, 1)
+__version__ = (0, 21, 2)

+ 2 - 2
dulwich/client.py

@@ -126,7 +126,7 @@ from dulwich.pack import (
 )
 from dulwich.refs import (
     read_info_refs,
-    ANNOTATED_TAG_SUFFIX,
+    PEELED_TAG_SUFFIX,
     _import_remote_refs,
 )
 from dulwich.repo import Repo
@@ -992,7 +992,7 @@ def check_wants(wants, refs):
 
     """
     missing = set(wants) - {
-        v for (k, v) in refs.items() if not k.endswith(ANNOTATED_TAG_SUFFIX)
+        v for (k, v) in refs.items() if not k.endswith(PEELED_TAG_SUFFIX)
     }
     if missing:
         raise InvalidWants(missing)

+ 45 - 65
dulwich/object_store.py

@@ -22,6 +22,7 @@
 
 """Git object store interfaces and implementation."""
 
+from contextlib import suppress
 from io import BytesIO
 import os
 import stat
@@ -77,7 +78,7 @@ from dulwich.pack import (
     PACK_SPOOL_FILE_MAX_SIZE,
 )
 from dulwich.protocol import DEPTH_INFINITE
-from dulwich.refs import ANNOTATED_TAG_SUFFIX, Ref
+from dulwich.refs import PEELED_TAG_SUFFIX, Ref
 
 INFODIR = "info"
 PACKDIR = "pack"
@@ -115,7 +116,7 @@ class BaseObjectStore:
             sha
             for (ref, sha) in refs.items()
             if (sha not in self or _want_deepen(sha))
-            and not ref.endswith(ANNOTATED_TAG_SUFFIX)
+            and not ref.endswith(PEELED_TAG_SUFFIX)
             and not sha == ZERO_SHA
         ]
 
@@ -277,7 +278,7 @@ class BaseObjectStore:
         warnings.warn(
             "Please use dulwich.object_store.peel_sha()",
             DeprecationWarning, stacklevel=2)
-        return peel_sha(self, sha)
+        return peel_sha(self, sha)[1]
 
     def _get_depth(
         self, head, get_parents=lambda commit: commit.parents, max_depth=None,
@@ -803,7 +804,7 @@ class DiskObjectStore(PackBasedObjectStore):
         suffix = suffix.decode("ascii")
         return os.path.join(self.pack_dir, "pack-" + suffix)
 
-    def _complete_thin_pack(self, f, path, copier, indexer, progress=None):
+    def _complete_pack(self, f, path, num_objects, indexer, progress=None):
         """Move a specific file containing a pack into the pack directory.
 
         Note: The file should be on the same file system as the
@@ -812,41 +813,48 @@ class DiskObjectStore(PackBasedObjectStore):
         Args:
           f: Open file object for the pack.
           path: Path to the pack file.
-          copier: A PackStreamCopier to use for writing pack data.
           indexer: A PackIndexer for indexing the pack.
         """
         entries = []
         for i, entry in enumerate(indexer):
             if progress is not None:
-                progress(("generating index: %d/%d\r" % (i, len(copier))).encode('ascii'))
+                progress(("generating index: %d/%d\r" % (i, num_objects)).encode('ascii'))
             entries.append(entry)
 
         pack_sha, extra_entries = extend_pack(
             f, indexer.ext_refs(), get_raw=self.get_raw, compression_level=self.pack_compression_level,
             progress=progress)
+        f.flush()
+        try:
+            fileno = f.fileno()
+        except AttributeError:
+            pass
+        else:
+            os.fsync(fileno)
+        f.close()
 
         entries.extend(extra_entries)
 
         # Move the pack in.
         entries.sort()
         pack_base_name = self._get_pack_basepath(entries)
-        target_pack = pack_base_name + ".pack"
+
+        for pack in self.packs:
+            if pack._basename == pack_base_name:
+                return pack
+
+        target_pack_path = pack_base_name + ".pack"
+        target_index_path = pack_base_name + ".idx"
         if sys.platform == "win32":
             # Windows might have the target pack file lingering. Attempt
             # removal, silently passing if the target does not exist.
-            try:
-                os.remove(target_pack)
-            except FileNotFoundError:
-                pass
-        os.rename(path, target_pack)
+            with suppress(FileNotFoundError):
+                os.remove(target_pack_path)
+        os.rename(path, target_pack_path)
 
         # Write the index.
-        index_file = GitFile(pack_base_name + ".idx", "wb", mask=PACK_MODE)
-        try:
+        with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file:
             write_pack_index(index_file, entries, pack_sha)
-            index_file.close()
-        finally:
-            index_file.abort()
 
         # Add the pack to the store and return it.
         final_pack = Pack(pack_base_name)
@@ -877,39 +885,7 @@ class DiskObjectStore(PackBasedObjectStore):
             indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
             copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)
             copier.verify(progress=progress)
-            return self._complete_thin_pack(f, path, copier, indexer, progress=progress)
-
-    def move_in_pack(self, path):
-        """Move a specific file containing a pack into the pack directory.
-
-        Note: The file should be on the same file system as the
-            packs directory.
-
-        Args:
-          path: Path to the pack file.
-        """
-        with PackData(path) as p:
-            entries = p.sorted_entries()
-            basename = self._get_pack_basepath(entries)
-            index_name = basename + ".idx"
-            if not os.path.exists(index_name):
-                with GitFile(index_name, "wb", mask=PACK_MODE) as f:
-                    write_pack_index(f, entries, p.get_stored_checksum())
-        for pack in self.packs:
-            if pack._basename == basename:
-                return pack
-        target_pack = basename + ".pack"
-        if sys.platform == "win32":
-            # Windows might have the target pack file lingering. Attempt
-            # removal, silently passing if the target does not exist.
-            try:
-                os.remove(target_pack)
-            except FileNotFoundError:
-                pass
-        os.rename(path, target_pack)
-        final_pack = Pack(basename)
-        self._add_cached_pack(basename, final_pack)
-        return final_pack
+            return self._complete_pack(f, path, len(copier), indexer, progress=progress)
 
     def add_pack(self):
         """Add a new pack to this object store.
@@ -921,16 +897,17 @@ class DiskObjectStore(PackBasedObjectStore):
         import tempfile
 
         fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
-        f = os.fdopen(fd, "wb")
+        f = os.fdopen(fd, "w+b")
         os.chmod(path, PACK_MODE)
 
         def commit():
-            f.flush()
-            os.fsync(fd)
-            f.close()
-            if os.path.getsize(path) > 0:
-                return self.move_in_pack(path)
+            if f.tell() > 0:
+                f.seek(0)
+                with PackData(path, f) as pd:
+                    indexer = PackIndexer.for_pack_data(pd, resolve_ext_ref=self.get_raw)
+                    return self._complete_pack(f, path, len(pd), indexer)
             else:
+                f.close()
                 os.remove(path)
                 return None
 
@@ -1048,14 +1025,17 @@ class MemoryObjectStore(BaseObjectStore):
 
         def commit():
             size = f.tell()
-            f.seek(0)
-            p = PackData.from_file(f, size)
-            for obj in PackInflater.for_pack_data(p, self.get_raw):
-                self.add_object(obj)
-            p.close()
+            if size > 0:
+                f.seek(0)
+                p = PackData.from_file(f, size)
+                for obj in PackInflater.for_pack_data(p, self.get_raw):
+                    self.add_object(obj)
+                p.close()
+            else:
+                f.close()
 
         def abort():
-            pass
+            f.close()
 
         return f, commit, abort
 
@@ -1642,7 +1622,7 @@ def iter_tree_contents(
             yield entry
 
 
-def peel_sha(store: ObjectContainer, sha: bytes) -> ShaFile:
+def peel_sha(store: ObjectContainer, sha: bytes) -> Tuple[ShaFile, ShaFile]:
     """Peel all tags from a SHA.
 
     Args:
@@ -1651,10 +1631,10 @@ def peel_sha(store: ObjectContainer, sha: bytes) -> ShaFile:
         intermediate tags; if the original ref does not point to a tag,
         this will equal the original SHA1.
     """
-    obj = store[sha]
+    unpeeled = obj = store[sha]
     obj_class = object_class(obj.type_name)
     while obj_class is Tag:
         assert isinstance(obj, Tag)
         obj_class, sha = obj.object
         obj = store[sha]
-    return obj
+    return unpeeled, obj

+ 0 - 2
dulwich/pack.py

@@ -2604,8 +2604,6 @@ def extend_pack(f: BinaryIO, object_ids: Set[ObjectID], get_raw, *, compression_
         extra_entries.append((object_id, offset, crc32))
     pack_sha = new_sha.digest()
     f.write(pack_sha)
-    f.close()
-
     return pack_sha, extra_entries
 
 

+ 32 - 7
dulwich/refs.py

@@ -25,6 +25,7 @@
 from contextlib import suppress
 import os
 from typing import Dict, Optional
+import warnings
 
 from dulwich.errors import (
     PackedRefsException,
@@ -50,7 +51,10 @@ SYMREF = b"ref: "
 LOCAL_BRANCH_PREFIX = b"refs/heads/"
 LOCAL_TAG_PREFIX = b"refs/tags/"
 BAD_REF_CHARS = set(b"\177 ~^:?*[")
-ANNOTATED_TAG_SUFFIX = b"^{}"
+PEELED_TAG_SUFFIX = b"^{}"
+
+# For backwards compatibility
+ANNOTATED_TAG_SUFFIX = PEELED_TAG_SUFFIX
 
 
 class SymrefLoop(Exception):
@@ -593,7 +597,7 @@ class InfoRefsContainer(RefsContainer):
         self._peeled = {}
         for line in f.readlines():
             sha, name = line.rstrip(b"\n").split(b"\t")
-            if name.endswith(ANNOTATED_TAG_SUFFIX):
+            if name.endswith(PEELED_TAG_SUFFIX):
                 name = name[:-3]
                 if not check_ref_format(name):
                     raise ValueError("invalid ref name %r" % name)
@@ -1153,7 +1157,7 @@ def read_info_refs(f):
 
 def write_info_refs(refs, store: ObjectContainer):
     """Generate info refs."""
-    # Avoid recursive import :(
+    # TODO: Avoid recursive import :(
     from dulwich.object_store import peel_sha
     for name, sha in sorted(refs.items()):
         # get_refs() includes HEAD as a special case, but we don't want to
@@ -1164,10 +1168,10 @@ def write_info_refs(refs, store: ObjectContainer):
             o = store[sha]
         except KeyError:
             continue
-        peeled = peel_sha(store, sha)
+        unpeeled, peeled = peel_sha(store, sha)
         yield o.id + b"\t" + name + b"\n"
         if o.id != peeled.id:
-            yield peeled.id + b"\t" + name + ANNOTATED_TAG_SUFFIX + b"\n"
+            yield peeled.id + b"\t" + name + PEELED_TAG_SUFFIX + b"\n"
 
 
 def is_local_branch(x):
@@ -1179,7 +1183,7 @@ def strip_peeled_refs(refs):
     return {
         ref: sha
         for (ref, sha) in refs.items()
-        if not ref.endswith(ANNOTATED_TAG_SUFFIX)
+        if not ref.endswith(PEELED_TAG_SUFFIX)
     }
 
 
@@ -1275,6 +1279,27 @@ def _import_remote_refs(
     tags = {
         n[len(LOCAL_TAG_PREFIX) :]: v
         for (n, v) in stripped_refs.items()
-        if n.startswith(LOCAL_TAG_PREFIX) and not n.endswith(ANNOTATED_TAG_SUFFIX)
+        if n.startswith(LOCAL_TAG_PREFIX) and not n.endswith(PEELED_TAG_SUFFIX)
     }
     refs_container.import_refs(LOCAL_TAG_PREFIX, tags, message=message, prune=prune_tags)
+
+
+def serialize_refs(store, refs):
+    # TODO: Avoid recursive import :(
+    from dulwich.object_store import peel_sha
+    ret = {}
+    for ref, sha in refs.items():
+        try:
+            unpeeled, peeled = peel_sha(store, sha)
+        except KeyError:
+            warnings.warn(
+                "ref %s points at non-present sha %s"
+                % (ref.decode("utf-8", "replace"), sha.decode("ascii")),
+                UserWarning,
+            )
+            continue
+        else:
+            if isinstance(unpeeled, Tag):
+                ret[ref + PEELED_TAG_SUFFIX] = peeled.id
+            ret[ref] = unpeeled.id
+    return ret

+ 5 - 19
dulwich/repo.py

@@ -46,6 +46,8 @@ from typing import (
     Iterable,
     Set
 )
+import warnings
+
 
 if TYPE_CHECKING:
     # There are no circular imports here, but we try to defer imports as long
@@ -105,6 +107,7 @@ from dulwich.refs import (  # noqa: F401
     ANNOTATED_TAG_SUFFIX,
     LOCAL_BRANCH_PREFIX,
     LOCAL_TAG_PREFIX,
+    serialize_refs,
     check_ref_format,
     RefsContainer,
     DictRefsContainer,
@@ -120,9 +123,6 @@ from dulwich.refs import (  # noqa: F401
 )
 
 
-import warnings
-
-
 CONTROLDIR = ".git"
 OBJECTDIR = "objects"
 REFSDIR = "refs"
@@ -520,21 +520,7 @@ class BaseRepo:
         if depth not in (None, 0):
             raise NotImplementedError("depth not supported yet")
 
-        refs = {}
-        for ref, sha in self.get_refs().items():
-            try:
-                obj = self.object_store[sha]
-            except KeyError:
-                warnings.warn(
-                    "ref %s points at non-present sha %s"
-                    % (ref.decode("utf-8", "replace"), sha.decode("ascii")),
-                    UserWarning,
-                )
-                continue
-            else:
-                if isinstance(obj, Tag):
-                    refs[ref + ANNOTATED_TAG_SUFFIX] = obj.object[1]
-                refs[ref] = sha
+        refs = serialize_refs(self.object_store, self.get_refs())
 
         wants = determine_wants(refs)
         if not isinstance(wants, list):
@@ -772,7 +758,7 @@ class BaseRepo:
         cached = self.refs.get_peeled(ref)
         if cached is not None:
             return cached
-        return peel_sha(self.object_store, self.refs[ref]).id
+        return peel_sha(self.object_store, self.refs[ref])[1].id
 
     def get_walker(self, include: Optional[List[bytes]] = None,
                    *args, **kwargs):

+ 5 - 5
dulwich/server.py

@@ -128,7 +128,7 @@ from dulwich.protocol import (
 )
 from dulwich.refs import (
     RefsContainer,
-    ANNOTATED_TAG_SUFFIX,
+    PEELED_TAG_SUFFIX,
     write_info_refs,
 )
 from dulwich.repo import (
@@ -499,9 +499,9 @@ def _find_shallow(store: ObjectContainer, heads, depth):
 
     todo = []  # stack of (sha, depth)
     for head_sha in heads:
-        obj = peel_sha(store, head_sha)
-        if isinstance(obj, Commit):
-            todo.append((obj.id, 1))
+        _unpeeled, peeled = peel_sha(store, head_sha)
+        if isinstance(peeled, Commit):
+            todo.append((peeled.id, 1))
 
     not_shallow = set()
     shallow = set()
@@ -635,7 +635,7 @@ class _ProtocolGraphWalker:
                 self.proto.write_pkt_line(line)
                 if peeled_sha != sha:
                     self.proto.write_pkt_line(
-                        format_ref_line(ref + ANNOTATED_TAG_SUFFIX, peeled_sha))
+                        format_ref_line(ref + PEELED_TAG_SUFFIX, peeled_sha))
 
             # i'm done..
             self.proto.write_pkt_line(None)

+ 1 - 0
dulwich/tests/test_client.py

@@ -865,6 +865,7 @@ class LocalGitClientTests(TestCase):
         target = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, target)
         t = Repo.init_bare(target)
+        self.addCleanup(t.close)
         s = open_repo("a.git")
         self.addCleanup(tear_down_repo, s)
         self.assertEqual(s.get_refs(), c.fetch(s.path, t).refs)

+ 3 - 2
dulwich/tests/test_object_store.py

@@ -264,7 +264,7 @@ class ObjectStoreTests:
         tag2 = self.make_tag(b"2", testobject)
         tag3 = self.make_tag(b"3", testobject)
         for obj in [testobject, tag1, tag2, tag3]:
-            self.assertEqual(testobject, peel_sha(self.store, obj.id))
+            self.assertEqual((obj, testobject), peel_sha(self.store, obj.id))
 
     def test_get_raw(self):
         self.store.add_object(testobject)
@@ -305,7 +305,7 @@ class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
     def test_add_pack_emtpy(self):
         o = MemoryObjectStore()
         f, commit, abort = o.add_pack()
-        self.assertRaises(AssertionError, commit)
+        commit()
 
     def test_add_thin_pack(self):
         o = MemoryObjectStore()
@@ -525,6 +525,7 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
 
     def test_add_pack(self):
         o = DiskObjectStore(self.store_dir)
+        self.addCleanup(o.close)
         f, commit, abort = o.add_pack()
         try:
             b = make_object(Blob, data=b"more yummy data")

+ 2 - 0
dulwich/tests/test_repository.py

@@ -413,9 +413,11 @@ class RepositoryRootTests(TestCase):
         dest_dir = os.path.join(temp_dir, "a.git")
         shutil.copytree(os.path.join(repo_dir, "a.git"), dest_dir, symlinks=True)
         r = Repo(dest_dir)
+        self.addCleanup(r.close)
         del r.refs[b"refs/heads/master"]
         del r.refs[b"HEAD"]
         t = r.clone(os.path.join(temp_dir, "b.git"), mkdir=True)
+        self.addCleanup(t.close)
         self.assertEqual(
             {
                 b"refs/tags/mytag": b"28237f4dc30d0d462658d6b937b08a0f0b6ef55a",