Browse Source

New upstream version 0.19.11

Jelmer Vernooij 6 years ago
parent
commit
b5638e8b6e

+ 1 - 0
.travis.yml

@@ -2,6 +2,7 @@ language: python
 sudo: false
 cache: pip
 
+
 python:
   - 2.7
   - 3.4

+ 1 - 0
AUTHORS

@@ -146,5 +146,6 @@ Sylvia van Os <sylvia@hackerchick.me>
 Boris Feld <lothiraldan@gmail.com>
 KS Chan <mrkschan@gmail.com>
 egor <egor@sourced.tech>
+Antoine Lambert <anlambert@softwareheritage.org>
 
 If you contributed but are missing from this list, please send me an e-mail.

+ 20 - 0
NEWS

@@ -1,3 +1,23 @@
+0.19.11	2019-02-07
+
+ IMPROVEMENTS
+
+ * Use fullname from gecos field, if available.
+   (Jelmer Vernooij)
+
+ * Support ``GIT_AUTHOR_NAME`` / ``GIT_AUTHOR_EMAIL``.
+   (Jelmer Vernooij)
+
+ * Add support for short ids in parse_commit. (Jelmer Vernooij)
+
+ * Add support for ``prune`` and ``prune_tags`` arguments
+   to ``porcelain.fetch``. (Jelmer Vernooij, #681)
+
+ BUG FIXES
+
+  * Fix handling of race conditions when new packs appear.
+    (Jelmer Vernooij)
+
 0.19.10	2018-01-15
 
  IMPROVEMENTS

+ 2 - 1
PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dulwich
-Version: 0.19.10
+Version: 0.19.11
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
 Author: Jelmer Vernooij
@@ -124,3 +124,4 @@ Classifier: Operating System :: Microsoft :: Windows
 Classifier: Topic :: Software Development :: Version Control
 Provides-Extra: fastimport
 Provides-Extra: https
+Provides-Extra: pgp

+ 2 - 1
dulwich.egg-info/PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: dulwich
-Version: 0.19.10
+Version: 0.19.11
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
 Author: Jelmer Vernooij
@@ -124,3 +124,4 @@ Classifier: Operating System :: Microsoft :: Windows
 Classifier: Topic :: Software Development :: Version Control
 Provides-Extra: fastimport
 Provides-Extra: https
+Provides-Extra: pgp

+ 3 - 0
dulwich.egg-info/requires.txt

@@ -6,3 +6,6 @@ fastimport
 
 [https]
 urllib3[secure]>=1.23
+
+[pgp]
+gpg

+ 1 - 1
dulwich/__init__.py

@@ -22,4 +22,4 @@
 
 """Python implementation of the Git file formats and protocols."""
 
-__version__ = (0, 19, 10)
+__version__ = (0, 19, 11)

+ 2 - 1
dulwich/client.py

@@ -1517,7 +1517,8 @@ class HttpGitClient(GitClient):
         read = BytesIO(resp.data).read
 
         resp.content_type = resp.getheader("Content-Type")
-        resp.redirect_location = resp.get_redirect_location()
+        resp_url = resp.geturl()
+        resp.redirect_location = resp_url if resp_url != url else ''
 
         return resp, read
 

+ 1 - 1
dulwich/config.py

@@ -129,7 +129,7 @@ class Config(object):
     def get_boolean(self, section, name, default=None):
         """Retrieve a configuration setting as boolean.
 
-        :param section: Tuple with section name and optional subsection namee
+        :param section: Tuple with section name and optional subsection name
         :param name: Name of the setting, including section and possible
             subsection.
         :return: Contents of the setting

+ 11 - 21
dulwich/contrib/swift.py

@@ -650,16 +650,16 @@ class SwiftObjectStore(PackBasedObjectStore):
         self.pack_dir = posixpath.join(OBJECTDIR, PACKDIR)
         self._alternates = None
 
-    @property
-    def packs(self):
-        """List with pack objects."""
-        if not self._pack_cache:
-            self._update_pack_cache()
-        return self._pack_cache.values()
-
     def _update_pack_cache(self):
-        for pack in self._load_packs():
-            self._pack_cache[pack._basename] = pack
+        objects = self.scon.get_container_objects()
+        pack_files = [o['name'].replace(".pack", "")
+                      for o in objects if o['name'].endswith(".pack")]
+        ret = []
+        for basename in pack_files:
+            pack = SwiftPack(basename, scon=self.scon)
+            self._pack_cache[basename] = pack
+            ret.append(pack)
+        return ret
 
     def _iter_loose_objects(self):
         """Loose objects are not supported by this repository
@@ -680,16 +680,6 @@ class SwiftObjectStore(PackBasedObjectStore):
         kwargs['concurrency'] = self.scon.concurrency
         return PackInfoMissingObjectFinder(self, *args, **kwargs)
 
-    def _load_packs(self):
-        """Load all packs from Swift
-
-        :return: a list of `SwiftPack` instances
-        """
-        objects = self.scon.get_container_objects()
-        pack_files = [o['name'].replace(".pack", "")
-                      for o in objects if o['name'].endswith(".pack")]
-        return [SwiftPack(pack, scon=self.scon) for pack in pack_files]
-
     def pack_info_get(self, sha):
         for pack in self.packs:
             if sha in pack:
@@ -745,7 +735,7 @@ class SwiftObjectStore(PackBasedObjectStore):
                 index.close()
                 final_pack = SwiftPack(basename, scon=self.scon)
                 final_pack.check_length_and_checksum()
-                self._add_known_pack(basename, final_pack)
+                self._add_cached_pack(basename, final_pack)
                 return final_pack
             else:
                 return None
@@ -838,7 +828,7 @@ class SwiftObjectStore(PackBasedObjectStore):
         # Add the pack to the store and return it.
         final_pack = SwiftPack(pack_base_name, scon=self.scon)
         final_pack.check_length_and_checksum()
-        self._add_known_pack(pack_base_name, final_pack)
+        self._add_cached_pack(pack_base_name, final_pack)
         return final_pack
 
 

+ 1 - 1
dulwich/contrib/test_release_robot.py

@@ -62,7 +62,7 @@ class GetRecentTagsTest(unittest.TestCase):
     committer = b"Mark Mikofski <mark.mikofski@sunpowercorp.com>"
     test_tags = [b'v0.1a', b'v0.1']
     tag_test_data = {
-        test_tags[0]: [1484788003, b'0' * 40, None],
+        test_tags[0]: [1484788003, b'3' * 40, None],
         test_tags[1]: [1484788314, b'1' * 40, (1484788401, b'2' * 40)]
     }
 

+ 1 - 1
dulwich/contrib/test_swift.py

@@ -286,7 +286,7 @@ class TestSwiftObjectStore(TestCase):
                  'fakerepo/objects/pack/pack-'+'2'*40+'.info': ''}
         fsc = FakeSwiftConnector('fakerepo', conf=self.conf, store=store)
         sos = swift.SwiftObjectStore(fsc)
-        packs = sos._load_packs()
+        packs = sos.packs
         self.assertEqual(len(packs), 2)
         for pack in packs:
             self.assertTrue(isinstance(pack, swift.SwiftPack))

+ 7 - 2
dulwich/index.py

@@ -604,7 +604,7 @@ def read_submodule_head(path):
         return None
 
 
-def get_unstaged_changes(index, root_path):
+def get_unstaged_changes(index, root_path, filter_blob_callback=None):
     """Walk through an index and check for differences against working tree.
 
     :param index: index to check
@@ -618,7 +618,12 @@ def get_unstaged_changes(index, root_path):
     for tree_path, entry in index.iteritems():
         full_path = _tree_to_fs_path(root_path, tree_path)
         try:
-            blob = blob_from_path_and_stat(full_path, os.lstat(full_path))
+            blob = blob_from_path_and_stat(
+                full_path, os.lstat(full_path)
+            )
+
+            if filter_blob_callback is not None:
+                blob = filter_blob_callback(blob, tree_path)
         except OSError as e:
             if e.errno != errno.ENOENT:
                 raise

+ 93 - 0
dulwich/line_ending.py

@@ -126,6 +126,9 @@ Sources:
 - https://adaptivepatchwork.com/2012/03/01/mind-the-end-of-your-line/
 """
 
+from dulwich.objects import Blob
+from dulwich.patch import is_binary
+
 CRLF = b"\r\n"
 LF = b"\n"
 
@@ -150,6 +153,24 @@ def convert_lf_to_crlf(text_hunk):
     return intermediary.replace(LF, CRLF)
 
 
+def get_checkout_filter(core_eol, core_autocrlf, git_attributes):
+    """ Returns the correct checkout filter based on the passed arguments
+    """
+    # TODO this function should process the git_attributes for the path and if
+    # the text attribute is not defined, fallback on the
+    # get_checkout_filter_autocrlf function with the autocrlf value
+    return get_checkout_filter_autocrlf(core_autocrlf)
+
+
+def get_checkin_filter(core_eol, core_autocrlf, git_attributes):
+    """ Returns the correct checkin filter based on the passed arguments
+    """
+    # TODO this function should process the git_attributes for the path and if
+    # the text attribute is not defined, fallback on the
+    # get_checkin_filter_autocrlf function with the autocrlf value
+    return get_checkin_filter_autocrlf(core_autocrlf)
+
+
 def get_checkout_filter_autocrlf(core_autocrlf):
     """ Returns the correct checkout filter base on autocrlf value
 
@@ -179,3 +200,75 @@ def get_checkin_filter_autocrlf(core_autocrlf):
 
     # Checking filter should never be `convert_lf_to_crlf`
     return None
+
+
+class BlobNormalizer(object):
+    """ An object to store computation result of which filter to apply based
+    on configuration, gitattributes, path and operation (checkin or checkout)
+    """
+
+    def __init__(self, config_stack, gitattributes):
+        self.config_stack = config_stack
+        self.gitattributes = gitattributes
+
+        # Compute which filters we needs based on parameters
+        try:
+            core_eol = config_stack.get("core", "eol")
+        except KeyError:
+            core_eol = "native"
+
+        try:
+            core_autocrlf = config_stack.get("core", "autocrlf").lower()
+        except KeyError:
+            core_autocrlf = False
+
+        self.fallback_read_filter = get_checkout_filter(
+            core_eol, core_autocrlf, self.gitattributes
+        )
+        self.fallback_write_filter = get_checkin_filter(
+            core_eol, core_autocrlf, self.gitattributes
+        )
+
+    def checkin_normalize(self, blob, tree_path):
+        """ Normalize a blob during a checkin operation
+        """
+        if self.fallback_write_filter is not None:
+            return normalize_blob(
+                blob, self.fallback_write_filter, binary_detection=True
+            )
+
+        return blob
+
+    def checkout_normalize(self, blob, tree_path):
+        """ Normalize a blob during a checkout operation
+        """
+        if self.fallback_read_filter is not None:
+            return normalize_blob(
+                blob, self.fallback_read_filter, binary_detection=True
+            )
+
+        return blob
+
+
+def normalize_blob(blob, conversion, binary_detection):
+    """ Takes a blob as input returns either the original blob if
+    binary_detection is True and the blob content looks like binary, else
+    return a new blob with converted data
+    """
+    # Read the original blob
+    data = blob.data
+
+    # If we need to detect if a file is binary and the file is detected as
+    # binary, do not apply the conversion function and return the original
+    # chunked text
+    if binary_detection is True:
+        if is_binary(data):
+            return blob
+
+    # Now apply the conversion
+    converted_data = conversion(data)
+
+    new_blob = Blob()
+    new_blob.data = converted_data
+
+    return new_blob

+ 57 - 44
dulwich/object_store.py

@@ -24,12 +24,10 @@
 
 from io import BytesIO
 import errno
-from itertools import chain
 import os
 import stat
 import sys
 import tempfile
-import time
 
 from dulwich.diff_tree import (
     tree_changes,
@@ -55,6 +53,7 @@ from dulwich.pack import (
     Pack,
     PackData,
     PackInflater,
+    PackFileDisappeared,
     iter_sha1,
     pack_objects_to_data,
     write_pack_header,
@@ -310,8 +309,11 @@ class PackBasedObjectStore(BaseObjectStore):
         This does not check alternates.
         """
         for pack in self.packs:
-            if sha in pack:
-                return True
+            try:
+                if sha in pack:
+                    return True
+            except PackFileDisappeared:
+                pass
         return False
 
     def __contains__(self, sha):
@@ -326,11 +328,7 @@ class PackBasedObjectStore(BaseObjectStore):
                 return True
         return False
 
-    def _pack_cache_stale(self):
-        """Check whether the pack cache is stale."""
-        raise NotImplementedError(self._pack_cache_stale)
-
-    def _add_known_pack(self, base_name, pack):
+    def _add_cached_pack(self, base_name, pack):
         """Add a newly appeared pack to the cache by path.
 
         """
@@ -340,23 +338,27 @@ class PackBasedObjectStore(BaseObjectStore):
             if prev_pack:
                 prev_pack.close()
 
-    def _flush_pack_cache(self):
+    def _clear_cached_packs(self):
         pack_cache = self._pack_cache
         self._pack_cache = {}
         while pack_cache:
             (name, pack) = pack_cache.popitem()
             pack.close()
 
+    def _iter_cached_packs(self):
+        return self._pack_cache.values()
+
+    def _update_pack_cache(self):
+        raise NotImplementedError(self._update_pack_cache)
+
     def close(self):
-        self._flush_pack_cache()
+        self._clear_cached_packs()
 
     @property
     def packs(self):
         """List with pack objects."""
-        if self._pack_cache is None or self._pack_cache_stale():
-            self._update_pack_cache()
-
-        return self._pack_cache.values()
+        return (
+            list(self._iter_cached_packs()) + list(self._update_pack_cache()))
 
     def _iter_alternate_objects(self):
         """Iterate over the SHAs of all the objects in alternate stores."""
@@ -403,7 +405,6 @@ class PackBasedObjectStore(BaseObjectStore):
         old_packs = {p.name(): p for p in self.packs}
         for name, pack in old_packs.items():
             objects.update((obj, None) for obj in pack.iterobjects())
-        self._flush_pack_cache()
 
         # The name of the consolidated pack might match the name of a
         # pre-existing pack. Take care not to remove the newly created
@@ -421,9 +422,17 @@ class PackBasedObjectStore(BaseObjectStore):
 
     def __iter__(self):
         """Iterate over the SHAs that are present in this store."""
-        iterables = (list(self.packs) + [self._iter_loose_objects()] +
-                     [self._iter_alternate_objects()])
-        return chain(*iterables)
+        self._update_pack_cache()
+        for pack in self._iter_cached_packs():
+            try:
+                for sha in pack:
+                    yield sha
+            except PackFileDisappeared:
+                pass
+        for sha in self._iter_loose_objects():
+            yield sha
+        for sha in self._iter_alternate_objects():
+            yield sha
 
     def contains_loose(self, sha):
         """Check if a particular object is present by SHA1 and is loose.
@@ -438,6 +447,8 @@ class PackBasedObjectStore(BaseObjectStore):
         :param name: sha for the object.
         :return: tuple with numeric type and object contents.
         """
+        if name == ZERO_SHA:
+            raise KeyError(name)
         if len(name) == 40:
             sha = hex_to_sha(name)
             hexsha = name
@@ -446,16 +457,23 @@ class PackBasedObjectStore(BaseObjectStore):
             hexsha = None
         else:
             raise AssertionError("Invalid object name %r" % name)
-        for pack in self.packs:
+        for pack in self._iter_cached_packs():
             try:
                 return pack.get_raw(sha)
-            except KeyError:
+            except (KeyError, PackFileDisappeared):
                 pass
         if hexsha is None:
             hexsha = sha_to_hex(name)
         ret = self._get_loose_object(hexsha)
         if ret is not None:
             return ret.type_num, ret.as_raw_string()
+        # Maybe something else has added a pack with the object
+        # in the mean time?
+        for pack in self._update_pack_cache():
+            try:
+                return pack.get_raw(sha)
+            except KeyError:
+                pass
         for alternate in self.alternates:
             try:
                 return alternate.get_raw(hexsha)
@@ -486,8 +504,6 @@ class DiskObjectStore(PackBasedObjectStore):
         super(DiskObjectStore, self).__init__()
         self.path = path
         self.pack_dir = os.path.join(self.path, PACKDIR)
-        self._pack_cache_time = 0
-        self._pack_cache = {}
         self._alternates = None
 
     def __repr__(self):
@@ -545,16 +561,14 @@ class DiskObjectStore(PackBasedObjectStore):
         self.alternates.append(DiskObjectStore(path))
 
     def _update_pack_cache(self):
+        """Read and iterate over new pack files and cache them."""
         try:
             pack_dir_contents = os.listdir(self.pack_dir)
         except OSError as e:
             if e.errno == errno.ENOENT:
-                self._pack_cache_time = 0
                 self.close()
-                return
+                return []
             raise
-        self._pack_cache_time = max(
-                os.stat(self.pack_dir).st_mtime, time.time())
         pack_files = set()
         for name in pack_dir_contents:
             if name.startswith("pack-") and name.endswith(".pack"):
@@ -566,20 +580,16 @@ class DiskObjectStore(PackBasedObjectStore):
                     pack_files.add(pack_name)
 
         # Open newly appeared pack files
+        new_packs = []
         for f in pack_files:
             if f not in self._pack_cache:
-                self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
+                pack = Pack(os.path.join(self.pack_dir, f))
+                new_packs.append(pack)
+                self._pack_cache[f] = pack
         # Remove disappeared pack files
         for f in set(self._pack_cache) - pack_files:
             self._pack_cache.pop(f).close()
-
-    def _pack_cache_stale(self):
-        try:
-            return os.stat(self.pack_dir).st_mtime >= self._pack_cache_time
-        except OSError as e:
-            if e.errno == errno.ENOENT:
-                return True
-            raise
+        return new_packs
 
     def _get_shafile_path(self, sha):
         # Check from object dir
@@ -607,6 +617,10 @@ class DiskObjectStore(PackBasedObjectStore):
     def _remove_pack(self, pack):
         os.remove(pack.data.path)
         os.remove(pack.index.path)
+        try:
+            del self._pack_cache[os.path.basename(pack._basename)]
+        except KeyError:
+            pass
 
     def _get_pack_basepath(self, entries):
         suffix = iter_sha1(entry[0] for entry in entries)
@@ -676,7 +690,7 @@ class DiskObjectStore(PackBasedObjectStore):
         # Add the pack to the store and return it.
         final_pack = Pack(pack_base_name)
         final_pack.check_length_and_checksum()
-        self._add_known_pack(pack_base_name, final_pack)
+        self._add_cached_pack(pack_base_name, final_pack)
         return final_pack
 
     def add_thin_pack(self, read_all, read_some):
@@ -714,12 +728,9 @@ class DiskObjectStore(PackBasedObjectStore):
             basename = self._get_pack_basepath(entries)
             with GitFile(basename+".idx", "wb") as f:
                 write_pack_index_v2(f, entries, p.get_stored_checksum())
-        if self._pack_cache is None or self._pack_cache_stale():
-            self._update_pack_cache()
-        try:
-            return self._pack_cache[basename]
-        except KeyError:
-            pass
+        for pack in self.packs:
+            if pack._basename == basename:
+                return pack
         target_pack = basename + '.pack'
         if sys.platform == 'win32':
             # Windows might have the target pack file lingering. Attempt
@@ -731,7 +742,7 @@ class DiskObjectStore(PackBasedObjectStore):
                     raise
         os.rename(path, target_pack)
         final_pack = Pack(basename)
-        self._add_known_pack(basename, final_pack)
+        self._add_cached_pack(basename, final_pack)
         return final_pack
 
     def add_pack(self):
@@ -967,6 +978,8 @@ class ObjectStoreIterator(ObjectIterator):
 
         :param needle: SHA1 of the object to check for
         """
+        if needle == ZERO_SHA:
+            return False
         return needle in self.store
 
     def __getitem__(self, key):

+ 33 - 0
dulwich/objectspec.py

@@ -162,6 +162,29 @@ def parse_commit_range(repo, committishs):
     return iter([parse_commit(repo, committishs)])
 
 
+class AmbiguousShortId(Exception):
+    """The short id is ambiguous."""
+
+    def __init__(self, prefix, options):
+        self.prefix = prefix
+        self.options = options
+
+
+def scan_for_short_id(object_store, prefix):
+    """Scan an object store for a short id."""
+    # TODO(jelmer): This could short-circuit looking for objects
+    # starting with a certain prefix.
+    ret = []
+    for object_id in object_store:
+        if object_id.startswith(prefix):
+            ret.append(object_store[object_id])
+    if not ret:
+        raise KeyError(prefix)
+    if len(ret) == 1:
+        return ret[0]
+    raise AmbiguousShortId(prefix, ret)
+
+
 def parse_commit(repo, committish):
     """Parse a string referring to a single commit.
 
@@ -180,6 +203,16 @@ def parse_commit(repo, committish):
         return repo[parse_ref(repo, committish)]
     except KeyError:
         pass
+    if len(committish) >= 4 and len(committish) < 40:
+        try:
+            int(committish, 16)
+        except ValueError:
+            pass
+        else:
+            try:
+                return scan_for_short_id(repo.object_store, committish)
+            except KeyError:
+                pass
     raise KeyError(committish)
 
 

+ 13 - 1
dulwich/pack.py

@@ -111,6 +111,12 @@ def take_msb_bytes(read, crc32=None):
     return ret, crc32
 
 
+class PackFileDisappeared(Exception):
+
+    def __init__(self, obj):
+        self.obj = obj
+
+
 class UnpackedObject(object):
     """Class encapsulating an object unpacked from a pack file.
 
@@ -391,7 +397,13 @@ class PackIndex(object):
         """
         if len(sha) == 40:
             sha = hex_to_sha(sha)
-        return self._object_index(sha)
+        try:
+            return self._object_index(sha)
+        except ValueError:
+            closed = getattr(self._contents, 'closed', None)
+            if closed in (None, True):
+                raise PackFileDisappeared(self)
+            raise
 
     def object_sha1(self, index):
         """Return the SHA1 corresponding to the index in the pack file.

+ 15 - 6
dulwich/porcelain.py

@@ -225,7 +225,7 @@ def archive(repo, committish=None, outstream=default_bytes_out_stream,
     if committish is None:
         committish = "HEAD"
     with open_repo_closing(repo) as repo_obj:
-        c = repo_obj[committish]
+        c = parse_commit(repo_obj, committish)
         for chunk in tar_stream(
                 repo_obj.object_store, repo_obj.object_store[c.tree],
                 c.commit_time):
@@ -882,7 +882,11 @@ def status(repo=".", ignored=False):
         tracked_changes = get_tree_changes(r)
         # 2. Get status of unstaged
         index = r.open_index()
-        unstaged_changes = list(get_unstaged_changes(index, r.path))
+        normalizer = r.get_blob_normalizer()
+        filter_callback = normalizer.checkin_normalize
+        unstaged_changes = list(
+            get_unstaged_changes(index, r.path, filter_callback)
+        )
         ignore_manager = IgnoreFilterManager.from_repo(r)
         untracked_paths = get_untracked_paths(r.path, r.path, index)
         if ignored:
@@ -1094,7 +1098,7 @@ def branch_list(repo):
 
 def fetch(repo, remote_location, remote_name=b'origin', outstream=sys.stdout,
           errstream=default_bytes_err_stream, message=None, depth=None,
-          **kwargs):
+          prune=False, prune_tags=False, **kwargs):
     """Fetch objects from a remote server.
 
     :param repo: Path to the repository
@@ -1104,6 +1108,8 @@ def fetch(repo, remote_location, remote_name=b'origin', outstream=sys.stdout,
     :param errstream: Error stream (defaults to stderr)
     :param message: Reflog message (defaults to b"fetch: from <remote_name>")
     :param depth: Depth to fetch at
+    :param prune: Prune remote removed refs
+    :param prune_tags: Prune reomte removed tags
     :return: Dictionary with refs on the remote
     """
     if message is None:
@@ -1118,12 +1124,15 @@ def fetch(repo, remote_location, remote_name=b'origin', outstream=sys.stdout,
             n[len(b'refs/heads/'):]: v for (n, v) in stripped_refs.items()
             if n.startswith(b'refs/heads/')}
         r.refs.import_refs(
-            b'refs/remotes/' + remote_name, branches, message=message)
+            b'refs/remotes/' + remote_name, branches, message=message,
+            prune=prune)
         tags = {
             n[len(b'refs/tags/'):]: v for (n, v) in stripped_refs.items()
             if n.startswith(b'refs/tags/') and
             not n.endswith(ANNOTATED_TAG_SUFFIX)}
-        r.refs.import_refs(b'refs/tags', tags, message=message)
+        r.refs.import_refs(
+            b'refs/tags', tags, message=message,
+            prune=prune_tags)
     return fetch_result.refs
 
 
@@ -1409,7 +1418,7 @@ def get_object_by_path(repo, path, committish=None):
         committish = "HEAD"
     # Get the repository
     with open_repo_closing(repo) as r:
-        commit = parse_commit(repo, committish)
+        commit = parse_commit(r, committish)
         base_tree = commit.tree
         if not isinstance(path, bytes):
             path = path.encode(commit.encoding or DEFAULT_ENCODING)

+ 12 - 1
dulwich/refs.py

@@ -138,10 +138,21 @@ class RefsContainer(object):
         return None
 
     def import_refs(self, base, other, committer=None, timestamp=None,
-                    timezone=None, message=None):
+                    timezone=None, message=None, prune=False):
+        if prune:
+            to_delete = set(self.subkeys(base))
+        else:
+            to_delete = set()
         for name, value in other.items():
             self.set_if_equals(b'/'.join((base, name)), None, value,
                                message=message)
+            if to_delete:
+                try:
+                    to_delete.remove(name)
+                except KeyError:
+                    pass
+        for ref in to_delete:
+            self.remove_if_equals(b'/'.join((base, ref)), None)
 
     def allkeys(self):
         """All refs present in this container."""

+ 72 - 26
dulwich/repo.py

@@ -72,6 +72,8 @@ from dulwich.hooks import (
     CommitMsgShellHook,
     )
 
+from dulwich.line_ending import BlobNormalizer
+
 from dulwich.refs import (  # noqa: F401
     ANNOTATED_TAG_SUFFIX,
     check_ref_format,
@@ -118,6 +120,60 @@ class InvalidUserIdentity(Exception):
         self.identity = identity
 
 
+def _get_default_identity():
+    import getpass
+    import socket
+    username = getpass.getuser()
+    try:
+        import pwd
+    except ImportError:
+        fullname = None
+    else:
+        try:
+            gecos = pwd.getpwnam(username).pw_gecos
+        except KeyError:
+            fullname = None
+        else:
+            fullname = gecos.split(',')[0]
+    if not fullname:
+        fullname = username
+    email = os.environ.get('EMAIL')
+    if email is None:
+        email = "{}@{}".format(username, socket.gethostname())
+    return (fullname, email)
+
+
+def get_user_identity(config, kind=None):
+    """Determine the identity to use for new commits.
+    """
+    if kind:
+        user = os.environ.get("GIT_" + kind + "_NAME")
+        if user is not None:
+            user = user.encode('utf-8')
+        email = os.environ.get("GIT_" + kind + "_EMAIL")
+        if email is not None:
+            email = email.encode('utf-8')
+    else:
+        user = None
+        email = None
+    if user is None:
+        try:
+            user = config.get(("user", ), "name")
+        except KeyError:
+            user = None
+    if email is None:
+        try:
+            email = config.get(("user", ), "email")
+        except KeyError:
+            email = None
+    default_user, default_email = _get_default_identity()
+    if user is None:
+        user = default_user.encode('utf-8')
+    if email is None:
+        email = default_email.encode('utf-8')
+    return (user + b" <" + email + b">")
+
+
 def check_user_identity(identity):
     """Verify that a user identity is formatted correctly.
 
@@ -612,30 +668,11 @@ class BaseRepo(object):
         else:
             raise ValueError(name)
 
-    def _get_user_identity(self, config):
+    def _get_user_identity(self, config, kind=None):
         """Determine the identity to use for new commits.
         """
-        user = os.environ.get("GIT_COMMITTER_NAME")
-        email = os.environ.get("GIT_COMMITTER_EMAIL")
-        if user is None:
-            try:
-                user = config.get(("user", ), "name")
-            except KeyError:
-                user = None
-        if email is None:
-            try:
-                email = config.get(("user", ), "email")
-            except KeyError:
-                email = None
-        if user is None:
-            import getpass
-            user = getpass.getuser().encode(sys.getdefaultencoding())
-        if email is None:
-            import getpass
-            import socket
-            email = ("{}@{}".format(getpass.getuser(), socket.gethostname())
-                     .encode(sys.getdefaultencoding()))
-        return (user + b" <" + email + b">")
+        # TODO(jelmer): Deprecate this function in favor of get_user_identity
+        return get_user_identity(config)
 
     def _add_graftpoints(self, updated_graftpoints):
         """Add or modify graftpoints
@@ -709,7 +746,7 @@ class BaseRepo(object):
         if merge_heads is None:
             merge_heads = self._read_heads('MERGE_HEADS')
         if committer is None:
-            committer = self._get_user_identity(config)
+            committer = get_user_identity(config, kind='COMMITTER')
         check_user_identity(committer)
         c.committer = committer
         if commit_timestamp is None:
@@ -721,9 +758,7 @@ class BaseRepo(object):
             commit_timezone = 0
         c.commit_timezone = commit_timezone
         if author is None:
-            # FIXME: Support GIT_AUTHOR_NAME/GIT_AUTHOR_EMAIL environment
-            # variables
-            author = committer
+            author = get_user_identity(config, kind='AUTHOR')
         c.author = author
         check_user_identity(author)
         if author_timestamp is None:
@@ -1020,6 +1055,7 @@ class Repo(BaseRepo):
             _fs_to_tree_path,
             )
         index = self.open_index()
+        blob_normalizer = self.get_blob_normalizer()
         for fs_path in fs_paths:
             if not isinstance(fs_path, bytes):
                 fs_path = fs_path.encode(sys.getfilesystemencoding())
@@ -1040,6 +1076,7 @@ class Repo(BaseRepo):
             else:
                 if not stat.S_ISDIR(st.st_mode):
                     blob = blob_from_path_and_stat(full_path, st)
+                    blob = blob_normalizer.checkin_normalize(blob, fs_path)
                     self.object_store.add_object(blob)
                     index[tree_path] = index_entry_from_stat(st, blob.id, 0)
                 else:
@@ -1261,6 +1298,15 @@ class Repo(BaseRepo):
     def __exit__(self, exc_type, exc_val, exc_tb):
         self.close()
 
+    def get_blob_normalizer(self):
+        """ Return a BlobNormalizer object
+        """
+        # TODO Parse the git attributes files
+        git_attributes = {}
+        return BlobNormalizer(
+            self.get_config_stack(), git_attributes
+        )
+
 
 class MemoryRepo(BaseRepo):
     """Repo that stores refs, objects, and named files in memory.

+ 80 - 0
dulwich/tests/test_client.py

@@ -984,6 +984,86 @@ class HttpGitClientTests(TestCase):
         expected_basic_auth = 'Basic %s' % str(b64_credentials)
         self.assertEqual(basic_auth, expected_basic_auth)
 
+    def test_url_redirect_location(self):
+
+        from urllib3.response import HTTPResponse
+
+        test_data = {
+            'https://gitlab.com/inkscape/inkscape/': {
+                'redirect_url': 'https://gitlab.com/inkscape/inkscape.git/',
+                'refs_data': (b'001e# service=git-upload-pack\n00000032'
+                              b'fb2bebf4919a011f0fd7cec085443d0031228e76 '
+                              b'HEAD\n0000')
+            },
+            'https://github.com/jelmer/dulwich/': {
+                'redirect_url': 'https://github.com/jelmer/dulwich/',
+                'refs_data': (b'001e# service=git-upload-pack\n00000032'
+                              b'3ff25e09724aa4d86ea5bca7d5dd0399a3c8bfcf '
+                              b'HEAD\n0000')
+            }
+        }
+
+        tail = 'info/refs?service=git-upload-pack'
+
+        # we need to mock urllib3.PoolManager as this test will fail
+        # otherwise without an active internet connection
+        class PoolManagerMock():
+
+            def __init__(self):
+                self.headers = {}
+
+            def request(self, method, url, fields=None, headers=None,
+                        redirect=True):
+                base_url = url[:-len(tail)]
+                redirect_base_url = test_data[base_url]['redirect_url']
+                redirect_url = redirect_base_url + tail
+                headers = {
+                    'Content-Type':
+                    'application/x-git-upload-pack-advertisement'
+                }
+                body = test_data[base_url]['refs_data']
+                # urllib3 handles automatic redirection by default
+                status = 200
+                request_url = redirect_url
+                # simulate urllib3 behavior when redirect parameter is False
+                if redirect is False:
+                    request_url = url
+                    if redirect_base_url != base_url:
+                        body = ''
+                        headers['location'] = redirect_url
+                        status = 301
+                return HTTPResponse(body=body,
+                                    headers=headers,
+                                    request_method=method,
+                                    request_url=request_url,
+                                    status=status)
+
+        pool_manager = PoolManagerMock()
+
+        for base_url in test_data.keys():
+            # instantiate HttpGitClient with mocked pool manager
+            c = HttpGitClient(base_url, pool_manager=pool_manager,
+                              config=None)
+            # call method that detects url redirection
+            _, _, processed_url = c._discover_references(b'git-upload-pack',
+                                                         base_url)
+
+            # send the same request as the method above without redirection
+            resp = c.pool_manager.request('GET', base_url + tail,
+                                          redirect=False)
+
+            # check expected behavior of urllib3
+            redirect_location = resp.get_redirect_location()
+            if resp.status == 200:
+                self.assertFalse(redirect_location)
+
+            if redirect_location:
+                # check that url redirection has been correctly detected
+                self.assertEqual(processed_url, redirect_location[:-len(tail)])
+            else:
+                # check also the no redirection case
+                self.assertEqual(processed_url, base_url)
+
 
 class TCPGitClientTests(TestCase):
 

+ 112 - 0
dulwich/tests/test_line_ending.py

@@ -23,11 +23,13 @@
 """Tests for the line ending conversion."""
 
 from dulwich.line_ending import (
+    normalize_blob,
     convert_crlf_to_lf,
     convert_lf_to_crlf,
     get_checkin_filter_autocrlf,
     get_checkout_filter_autocrlf,
 )
+from dulwich.objects import Blob
 from dulwich.tests import TestCase
 
 
@@ -92,3 +94,113 @@ class GetLineEndingAutocrlfFilters(TestCase):
         checkout_filter = get_checkout_filter_autocrlf(b"input")
 
         self.assertEqual(checkout_filter, None)
+
+
+class NormalizeBlobTestCase(TestCase):
+    def test_normalize_to_lf_no_op(self):
+        base_content = b"line1\nline2"
+        base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_crlf_to_lf, binary_detection=False
+        )
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
+
+    def test_normalize_to_lf(self):
+        base_content = b"line1\r\nline2"
+        base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_crlf_to_lf, binary_detection=False
+        )
+
+        normalized_content = b"line1\nline2"
+        normalized_sha = "f8be7bb828880727816015d21abcbc37d033f233"
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
+
+    def test_normalize_to_lf_binary(self):
+        base_content = b"line1\r\nline2\0"
+        base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_crlf_to_lf, binary_detection=True
+        )
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
+
+    def test_normalize_to_crlf_no_op(self):
+        base_content = b"line1\r\nline2"
+        base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_lf_to_crlf, binary_detection=False
+        )
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
+
+    def test_normalize_to_crlf(self):
+        base_content = b"line1\nline2"
+        base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_lf_to_crlf, binary_detection=False
+        )
+
+        normalized_content = b"line1\r\nline2"
+        normalized_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
+
+    def test_normalize_to_crlf_binary(self):
+        base_content = b"line1\r\nline2\0"
+        base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_lf_to_crlf, binary_detection=True
+        )
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)

+ 20 - 0
dulwich/tests/test_objectspec.py

@@ -28,6 +28,7 @@ from dulwich.objects import (
     )
 from dulwich.objectspec import (
     parse_object,
+    parse_commit,
     parse_commit_range,
     parse_ref,
     parse_refs,
@@ -72,7 +73,26 @@ class ParseCommitRangeTests(TestCase):
         self.assertEqual([c1], list(parse_commit_range(r, c1.id)))
 
 
+class ParseCommitTests(TestCase):
+    """Test parse_commit."""
+
+    def test_nonexistent(self):
+        r = MemoryRepo()
+        self.assertRaises(KeyError, parse_commit, r, "thisdoesnotexist")
+
+    def test_commit_by_sha(self):
+        r = MemoryRepo()
+        [c1] = build_commit_graph(r.object_store, [[1]])
+        self.assertEqual(c1, parse_commit(r, c1.id))
+
+    def test_commit_by_short_sha(self):
+        r = MemoryRepo()
+        [c1] = build_commit_graph(r.object_store, [[1]])
+        self.assertEqual(c1, parse_commit(r, c1.id[:10]))
+
+
 class ParseRefTests(TestCase):
+
     def test_nonexistent(self):
         r = {}
         self.assertRaises(KeyError, parse_ref, r, b"thisdoesnotexist")

+ 71 - 0
dulwich/tests/test_porcelain.py

@@ -341,6 +341,26 @@ class AddTests(PorcelainTestCase):
             paths=["../foo"])
         self.assertEqual([], list(self.repo.open_index()))
 
+    def test_add_file_clrf_conversion(self):
+        # Set the right configuration to the repo
+        c = self.repo.get_config()
+        c.set("core", "autocrlf", "input")
+        c.write_to_path()
+
+        # Add a file with CRLF line-ending
+        fullpath = os.path.join(self.repo.path, 'foo')
+        with open(fullpath, 'wb') as f:
+            f.write(b"line1\r\nline2")
+        porcelain.add(self.repo.path, paths=[fullpath])
+
+        # The line-endings should have been converted to LF
+        index = self.repo.open_index()
+        self.assertIn(b"foo", index)
+
+        entry = index[b"foo"]
+        blob = self.repo[entry.sha]
+        self.assertEqual(blob.data, b"line1\nline2")
+
 
 class RemoveTests(PorcelainTestCase):
 
@@ -908,6 +928,57 @@ class StatusTests(PorcelainTestCase):
         self.assertListEqual(results.unstaged, [b'blye'])
         self.assertListEqual(results.untracked, ['blyat'])
 
+    def test_status_crlf_mismatch(self):
+        # First make a commit as if the file has been added on a Linux system
+        # or with core.autocrlf=True
+        file_path = os.path.join(self.repo.path, 'crlf')
+        with open(file_path, 'wb') as f:
+            f.write(b'line1\nline2')
+        porcelain.add(repo=self.repo.path, paths=[file_path])
+        porcelain.commit(repo=self.repo.path, message=b'test status',
+                         author=b'author <email>',
+                         committer=b'committer <email>')
+
+        # Then update the file as if it was created by CGit on a Windows
+        # system with core.autocrlf=true
+        with open(file_path, 'wb') as f:
+            f.write(b'line1\r\nline2')
+
+        results = porcelain.status(self.repo)
+        self.assertDictEqual(
+            {'add': [], 'delete': [], 'modify': []},
+            results.staged)
+        self.assertListEqual(results.unstaged, [b'crlf'])
+        self.assertListEqual(results.untracked, [])
+
+    def test_status_crlf_convert(self):
+        # First make a commit as if the file has been added on a Linux system
+        # or with core.autocrlf=True
+        file_path = os.path.join(self.repo.path, 'crlf')
+        with open(file_path, 'wb') as f:
+            f.write(b'line1\nline2')
+        porcelain.add(repo=self.repo.path, paths=[file_path])
+        porcelain.commit(repo=self.repo.path, message=b'test status',
+                         author=b'author <email>',
+                         committer=b'committer <email>')
+
+        # Then update the file as if it was created by CGit on a Windows
+        # system with core.autocrlf=true
+        with open(file_path, 'wb') as f:
+            f.write(b'line1\r\nline2')
+
+        # TODO: It should be set automatically by looking at the configuration
+        c = self.repo.get_config()
+        c.set("core", "autocrlf", True)
+        c.write_to_path()
+
+        results = porcelain.status(self.repo)
+        self.assertDictEqual(
+            {'add': [], 'delete': [], 'modify': []},
+            results.staged)
+        self.assertListEqual(results.unstaged, [])
+        self.assertListEqual(results.untracked, [])
+
     def test_get_tree_changes_add(self):
         """Unit test for get_tree_changes add."""
 

+ 26 - 0
dulwich/tests/test_refs.py

@@ -288,6 +288,32 @@ class RefsContainerTests(object):
             b'refs/tags/refs-0.2', ZERO_SHA))
         self.assertFalse(b'refs/tags/refs-0.2' in self._refs)
 
+    def test_import_refs_name(self):
+        self._refs[b'refs/remotes/origin/other'] = (
+            b'48d01bd4b77fed026b154d16493e5deab78f02ec')
+        self._refs.import_refs(
+            b'refs/remotes/origin',
+            {b'master': b'42d06bd4b77fed026b154d16493e5deab78f02ec'})
+        self.assertEqual(
+            b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+            self._refs[b'refs/remotes/origin/master'])
+        self.assertEqual(
+            b'48d01bd4b77fed026b154d16493e5deab78f02ec',
+            self._refs[b'refs/remotes/origin/other'])
+
+    def test_import_refs_name_prune(self):
+        self._refs[b'refs/remotes/origin/other'] = (
+            b'48d01bd4b77fed026b154d16493e5deab78f02ec')
+        self._refs.import_refs(
+            b'refs/remotes/origin',
+            {b'master': b'42d06bd4b77fed026b154d16493e5deab78f02ec'},
+            prune=True)
+        self.assertEqual(
+            b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+            self._refs[b'refs/remotes/origin/master'])
+        self.assertNotIn(
+            b'refs/remotes/origin/other', self._refs)
+
 
 class DictRefsContainerTests(RefsContainerTests, TestCase):
 

+ 27 - 0
dulwich/tests/test_repository.py

@@ -863,6 +863,33 @@ class BuildRepoRootTests(TestCase):
             b"Jelmer <jelmer@apache.org>",
             r[commit_sha].committer)
 
+    def overrideEnv(self, name, value):
+        def restore():
+            if oldval is not None:
+                os.environ[name] = oldval
+            else:
+                del os.environ[name]
+        oldval = os.environ.get(name)
+        os.environ[name] = value
+        self.addCleanup(restore)
+
+    def test_commit_config_identity_from_env(self):
+        # commit falls back to the users' identity if it wasn't specified
+        self.overrideEnv('GIT_COMMITTER_NAME', 'joe')
+        self.overrideEnv('GIT_COMMITTER_EMAIL', 'joe@example.com')
+        r = self._repo
+        c = r.get_config()
+        c.set((b"user", ), b"name", b"Jelmer")
+        c.set((b"user", ), b"email", b"jelmer@apache.org")
+        c.write_to_path()
+        commit_sha = r.do_commit(b'message')
+        self.assertEqual(
+            b"Jelmer <jelmer@apache.org>",
+            r[commit_sha].author)
+        self.assertEqual(
+            b"joe <joe@example.com>",
+            r[commit_sha].committer)
+
     def test_commit_fail_ref(self):
         r = self._repo
 

+ 2 - 1
setup.py

@@ -15,7 +15,7 @@ import io
 import os
 import sys
 
-dulwich_version_string = '0.19.10'
+dulwich_version_string = '0.19.11'
 
 include_dirs = []
 # Windows MSVC support
@@ -78,6 +78,7 @@ if has_setuptools:
     setup_kwargs['extras_require'] = {
         'fastimport': ['fastimport'],
         'https': ['urllib3[secure]>=1.23'],
+        'pgp': ['gpg'],
         }
     setup_kwargs['install_requires'] = ['urllib3>=1.23', 'certifi']
     setup_kwargs['include_package_data'] = True