Procházet zdrojové kódy

Merge commit 'de801168537280a7665e3d57873483914733a24d' into tempmaster

Jelmer Vernooij před 6 roky
rodič
revize
3345d77bfa

+ 11 - 0
NEWS

@@ -2,11 +2,22 @@
 
  IMPROVEMENTS
 
+ * Use fullname from gecos field, if available.
+   (Jelmer Vernooij)
+
+ * Support ``GIT_AUTHOR_NAME`` / ``GIT_AUTHOR_EMAIL``.
+   (Jelmer Vernooij)
+
  * Add support for short ids in parse_commit. (Jelmer Vernooij)
 
  * Add support for ``prune`` and ``prune_tags`` arguments
    to ``porcelain.fetch``. (Jelmer Vernooij, #681)
 
+ BUG FIXES
+
+  * Fix handling of race conditions when new packs appear.
+    (Jelmer Vernooij)
+
 0.19.10	2018-01-15
 
  IMPROVEMENTS

+ 1 - 1
dulwich/contrib/test_release_robot.py

@@ -62,7 +62,7 @@ class GetRecentTagsTest(unittest.TestCase):
     committer = b"Mark Mikofski <mark.mikofski@sunpowercorp.com>"
     test_tags = [b'v0.1a', b'v0.1']
     tag_test_data = {
-        test_tags[0]: [1484788003, b'0' * 40, None],
+        test_tags[0]: [1484788003, b'3' * 40, None],
         test_tags[1]: [1484788314, b'1' * 40, (1484788401, b'2' * 40)]
     }
 

+ 54 - 44
dulwich/object_store.py

@@ -24,12 +24,10 @@
 
 from io import BytesIO
 import errno
-from itertools import chain
 import os
 import stat
 import sys
 import tempfile
-import time
 
 from dulwich.diff_tree import (
     tree_changes,
@@ -55,6 +53,7 @@ from dulwich.pack import (
     Pack,
     PackData,
     PackInflater,
+    PackFileDisappeared,
     iter_sha1,
     pack_objects_to_data,
     write_pack_header,
@@ -310,8 +309,11 @@ class PackBasedObjectStore(BaseObjectStore):
         This does not check alternates.
         """
         for pack in self.packs:
-            if sha in pack:
-                return True
+            try:
+                if sha in pack:
+                    return True
+            except PackFileDisappeared:
+                pass
         return False
 
     def __contains__(self, sha):
@@ -326,11 +328,7 @@ class PackBasedObjectStore(BaseObjectStore):
                 return True
         return False
 
-    def _pack_cache_stale(self):
-        """Check whether the pack cache is stale."""
-        raise NotImplementedError(self._pack_cache_stale)
-
-    def _add_known_pack(self, base_name, pack):
+    def _add_cached_pack(self, base_name, pack):
         """Add a newly appeared pack to the cache by path.
 
         """
@@ -340,23 +338,27 @@ class PackBasedObjectStore(BaseObjectStore):
             if prev_pack:
                 prev_pack.close()
 
-    def _flush_pack_cache(self):
+    def _clear_cached_packs(self):
         pack_cache = self._pack_cache
         self._pack_cache = {}
         while pack_cache:
             (name, pack) = pack_cache.popitem()
             pack.close()
 
+    def _iter_cached_packs(self):
+        return self._pack_cache.values()
+
+    def _update_pack_cache(self):
+        raise NotImplementedError(self._update_pack_cache)
+
     def close(self):
-        self._flush_pack_cache()
+        self._clear_cached_packs()
 
     @property
     def packs(self):
         """List with pack objects."""
-        if self._pack_cache is None or self._pack_cache_stale():
-            self._update_pack_cache()
-
-        return self._pack_cache.values()
+        return (
+            list(self._iter_cached_packs()) + list(self._update_pack_cache()))
 
     def _iter_alternate_objects(self):
         """Iterate over the SHAs of all the objects in alternate stores."""
@@ -403,7 +405,7 @@ class PackBasedObjectStore(BaseObjectStore):
         old_packs = {p.name(): p for p in self.packs}
         for name, pack in old_packs.items():
             objects.update((obj, None) for obj in pack.iterobjects())
-        self._flush_pack_cache()
+        self._clear_cached_packs()
 
         # The name of the consolidated pack might match the name of a
         # pre-existing pack. Take care not to remove the newly created
@@ -421,9 +423,17 @@ class PackBasedObjectStore(BaseObjectStore):
 
     def __iter__(self):
         """Iterate over the SHAs that are present in this store."""
-        iterables = (list(self.packs) + [self._iter_loose_objects()] +
-                     [self._iter_alternate_objects()])
-        return chain(*iterables)
+        self._update_pack_cache()
+        for pack in self._iter_cached_packs():
+            try:
+                for sha in pack:
+                    yield sha
+            except PackFileDisappeared:
+                pass
+        for sha in self._iter_loose_objects():
+            yield sha
+        for sha in self._iter_alternate_objects():
+            yield sha
 
     def contains_loose(self, sha):
         """Check if a particular object is present by SHA1 and is loose.
@@ -438,6 +448,8 @@ class PackBasedObjectStore(BaseObjectStore):
         :param name: sha for the object.
         :return: tuple with numeric type and object contents.
         """
+        if name == ZERO_SHA:
+            raise KeyError(name)
         if len(name) == 40:
             sha = hex_to_sha(name)
             hexsha = name
@@ -446,16 +458,23 @@ class PackBasedObjectStore(BaseObjectStore):
             hexsha = None
         else:
             raise AssertionError("Invalid object name %r" % name)
-        for pack in self.packs:
+        for pack in self._iter_cached_packs():
             try:
                 return pack.get_raw(sha)
-            except KeyError:
+            except (KeyError, PackFileDisappeared):
                 pass
         if hexsha is None:
             hexsha = sha_to_hex(name)
         ret = self._get_loose_object(hexsha)
         if ret is not None:
             return ret.type_num, ret.as_raw_string()
+        # Maybe something else has added a pack with the object
+        # in the mean time?
+        for pack in self._update_pack_cache():
+            try:
+                return pack.get_raw(sha)
+            except KeyError:
+                pass
         for alternate in self.alternates:
             try:
                 return alternate.get_raw(hexsha)
@@ -486,8 +505,6 @@ class DiskObjectStore(PackBasedObjectStore):
         super(DiskObjectStore, self).__init__()
         self.path = path
         self.pack_dir = os.path.join(self.path, PACKDIR)
-        self._pack_cache_time = 0
-        self._pack_cache = {}
         self._alternates = None
 
     def __repr__(self):
@@ -545,16 +562,14 @@ class DiskObjectStore(PackBasedObjectStore):
         self.alternates.append(DiskObjectStore(path))
 
     def _update_pack_cache(self):
+        """Read and iterate over new pack files and cache them."""
         try:
             pack_dir_contents = os.listdir(self.pack_dir)
         except OSError as e:
             if e.errno == errno.ENOENT:
-                self._pack_cache_time = 0
                 self.close()
-                return
+                return []
             raise
-        self._pack_cache_time = max(
-                os.stat(self.pack_dir).st_mtime, time.time())
         pack_files = set()
         for name in pack_dir_contents:
             if name.startswith("pack-") and name.endswith(".pack"):
@@ -566,20 +581,16 @@ class DiskObjectStore(PackBasedObjectStore):
                     pack_files.add(pack_name)
 
         # Open newly appeared pack files
+        new_packs = []
         for f in pack_files:
             if f not in self._pack_cache:
-                self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
+                pack = Pack(os.path.join(self.pack_dir, f))
+                new_packs.append(pack)
+                self._pack_cache[f] = pack
         # Remove disappeared pack files
         for f in set(self._pack_cache) - pack_files:
             self._pack_cache.pop(f).close()
-
-    def _pack_cache_stale(self):
-        try:
-            return os.stat(self.pack_dir).st_mtime >= self._pack_cache_time
-        except OSError as e:
-            if e.errno == errno.ENOENT:
-                return True
-            raise
+        return new_packs
 
     def _get_shafile_path(self, sha):
         # Check from object dir
@@ -676,7 +687,7 @@ class DiskObjectStore(PackBasedObjectStore):
         # Add the pack to the store and return it.
         final_pack = Pack(pack_base_name)
         final_pack.check_length_and_checksum()
-        self._add_known_pack(pack_base_name, final_pack)
+        self._add_cached_pack(pack_base_name, final_pack)
         return final_pack
 
     def add_thin_pack(self, read_all, read_some):
@@ -714,12 +725,9 @@ class DiskObjectStore(PackBasedObjectStore):
             basename = self._get_pack_basepath(entries)
             with GitFile(basename+".idx", "wb") as f:
                 write_pack_index_v2(f, entries, p.get_stored_checksum())
-        if self._pack_cache is None or self._pack_cache_stale():
-            self._update_pack_cache()
-        try:
-            return self._pack_cache[basename]
-        except KeyError:
-            pass
+        for pack in self.packs:
+            if pack._basename == basename:
+                return pack
         target_pack = basename + '.pack'
         if sys.platform == 'win32':
             # Windows might have the target pack file lingering. Attempt
@@ -731,7 +739,7 @@ class DiskObjectStore(PackBasedObjectStore):
                     raise
         os.rename(path, target_pack)
         final_pack = Pack(basename)
-        self._add_known_pack(basename, final_pack)
+        self._add_cached_pack(basename, final_pack)
         return final_pack
 
     def add_pack(self):
@@ -967,6 +975,8 @@ class ObjectStoreIterator(ObjectIterator):
 
         :param needle: SHA1 of the object to check for
         """
+        if needle == ZERO_SHA:
+            return False
         return needle in self.store
 
     def __getitem__(self, key):

+ 13 - 1
dulwich/pack.py

@@ -111,6 +111,12 @@ def take_msb_bytes(read, crc32=None):
     return ret, crc32
 
 
+class PackFileDisappeared(Exception):
+
+    def __init__(self, obj):
+        self.obj = obj
+
+
 class UnpackedObject(object):
     """Class encapsulating an object unpacked from a pack file.
 
@@ -391,7 +397,13 @@ class PackIndex(object):
         """
         if len(sha) == 40:
             sha = hex_to_sha(sha)
-        return self._object_index(sha)
+        try:
+            return self._object_index(sha)
+        except ValueError:
+            closed = getattr(self._contents, 'closed', None)
+            if closed in (None, True):
+                raise PackFileDisappeared(self)
+            raise
 
     def object_sha1(self, index):
         """Return the SHA1 corresponding to the index in the pack file.

+ 59 - 30
dulwich/repo.py

@@ -120,6 +120,60 @@ class InvalidUserIdentity(Exception):
         self.identity = identity
 
 
+def _get_default_identity():
+    import getpass
+    import socket
+    username = getpass.getuser()
+    try:
+        import pwd
+    except ImportError:
+        fullname = None
+    else:
+        try:
+            gecos = pwd.getpwnam(username).pw_gecos
+        except KeyError:
+            fullname = None
+        else:
+            fullname = gecos.split(',')[0].decode('utf-8')
+    if not fullname:
+        fullname = username
+    email = os.environ.get('EMAIL')
+    if email is None:
+        email = "{}@{}".format(username, socket.gethostname())
+    return (fullname, email)
+
+
+def get_user_identity(config, kind=None):
+    """Determine the identity to use for new commits.
+    """
+    if kind:
+        user = os.environ.get("GIT_" + kind + "_NAME")
+        if user is not None:
+            user = user.encode('utf-8')
+        email = os.environ.get("GIT_" + kind + "_EMAIL")
+        if email is not None:
+            email = email.encode('utf-8')
+    else:
+        user = None
+        email = None
+    if user is None:
+        try:
+            user = config.get(("user", ), "name")
+        except KeyError:
+            user = None
+    if email is None:
+        try:
+            email = config.get(("user", ), "email")
+        except KeyError:
+            email = None
+    default_user, default_email = _get_default_identity()
+    if user is None:
+        user = default_user.encode('utf-8')
+    if email is None:
+        email = default_email.encode('utf-8')
+    return (user + b" <" + email + b">")
+
+
 def check_user_identity(identity):
     """Verify that a user identity is formatted correctly.
 
@@ -614,34 +668,11 @@ class BaseRepo(object):
         else:
             raise ValueError(name)
 
-    def _get_user_identity(self, config):
+    def _get_user_identity(self, config, kind=None):
         """Determine the identity to use for new commits.
         """
-        user = os.environ.get("GIT_COMMITTER_NAME")
-        email = os.environ.get("GIT_COMMITTER_EMAIL")
-        if user:
-            user = user.encode(sys.getdefaultencoding())
-        if email:
-            email = email.encode(sys.getdefaultencoding())
-        if user is None:
-            try:
-                user = config.get(("user", ), "name")
-            except KeyError:
-                user = None
-        if email is None:
-            try:
-                email = config.get(("user", ), "email")
-            except KeyError:
-                email = None
-        if user is None:
-            import getpass
-            user = getpass.getuser().encode(sys.getdefaultencoding())
-        if email is None:
-            import getpass
-            import socket
-            email = ("{}@{}".format(getpass.getuser(), socket.gethostname())
-                     .encode(sys.getdefaultencoding()))
-        return (user + b" <" + email + b">")
+        # TODO(jelmer): Deprecate this function in favor of get_user_identity
+        return get_user_identity(config)
 
     def _add_graftpoints(self, updated_graftpoints):
         """Add or modify graftpoints
@@ -715,7 +746,7 @@ class BaseRepo(object):
         if merge_heads is None:
             merge_heads = self._read_heads('MERGE_HEADS')
         if committer is None:
-            committer = self._get_user_identity(config)
+            committer = get_user_identity(config, kind='COMMITTER')
         check_user_identity(committer)
         c.committer = committer
         if commit_timestamp is None:
@@ -727,9 +758,7 @@ class BaseRepo(object):
             commit_timezone = 0
         c.commit_timezone = commit_timezone
         if author is None:
-            # FIXME: Support GIT_AUTHOR_NAME/GIT_AUTHOR_EMAIL environment
-            # variables
-            author = committer
+            author = get_user_identity(config, kind='AUTHOR')
         c.author = author
         check_user_identity(author)
         if author_timestamp is None:

+ 27 - 0
dulwich/tests/test_repository.py

@@ -863,6 +863,33 @@ class BuildRepoRootTests(TestCase):
             b"Jelmer <jelmer@apache.org>",
             r[commit_sha].committer)
 
+    def overrideEnv(self, name, value):
+        def restore():
+            if oldval is not None:
+                os.environ[name] = oldval
+            else:
+                del os.environ[name]
+        oldval = os.environ.get(name)
+        os.environ[name] = value
+        self.addCleanup(restore)
+
+    def test_commit_config_identity_from_env(self):
+        # commit falls back to the users' identity if it wasn't specified
+        self.overrideEnv('GIT_COMMITTER_NAME', 'joe')
+        self.overrideEnv('GIT_COMMITTER_EMAIL', 'joe@example.com')
+        r = self._repo
+        c = r.get_config()
+        c.set((b"user", ), b"name", b"Jelmer")
+        c.set((b"user", ), b"email", b"jelmer@apache.org")
+        c.write_to_path()
+        commit_sha = r.do_commit(b'message')
+        self.assertEqual(
+            b"Jelmer <jelmer@apache.org>",
+            r[commit_sha].author)
+        self.assertEqual(
+            b"joe <joe@example.com>",
+            r[commit_sha].committer)
+
     def test_commit_fail_ref(self):
         r = self._repo