浏览代码

New upstream version 0.19.0

Jelmer Vernooij 7 年之前
父节点
当前提交
1d5f81137b
共有 50 个文件被更改,包括 1771 次插入755 次删除
  1. 0 1
      .travis.yml
  2. 6 0
      AUTHORS
  3. 11 7
      CONTRIBUTING.md
  4. 0 18
      MANIFEST.in
  5. 1 1
      Makefile
  6. 59 1
      NEWS
  7. 1 1
      PKG-INFO
  8. 9 3
      README.md
  9. 5 1
      TODO
  10. 0 32
      dulwich.egg-info/PKG-INFO
  11. 0 200
      dulwich.egg-info/SOURCES.txt
  12. 0 1
      dulwich.egg-info/dependency_links.txt
  13. 0 1
      dulwich.egg-info/top_level.txt
  14. 1 1
      dulwich/__init__.py
  15. 15 1
      dulwich/archive.py
  16. 223 104
      dulwich/client.py
  17. 113 19
      dulwich/config.py
  18. 23 9
      dulwich/contrib/paramiko_vendor.py
  19. 2 2
      dulwich/contrib/swift.py
  20. 7 7
      dulwich/contrib/test_swift_smoke.py
  21. 14 6
      dulwich/diff_tree.py
  22. 7 0
      dulwich/errors.py
  23. 9 10
      dulwich/fastexport.py
  24. 9 9
      dulwich/ignore.py
  25. 56 0
      dulwich/index.py
  26. 128 42
      dulwich/object_store.py
  27. 75 36
      dulwich/objects.py
  28. 65 5
      dulwich/pack.py
  29. 10 9
      dulwich/patch.py
  30. 88 47
      dulwich/porcelain.py
  31. 1 1
      dulwich/reflog.py
  32. 94 25
      dulwich/refs.py
  33. 107 26
      dulwich/repo.py
  34. 7 6
      dulwich/tests/compat/test_client.py
  35. 35 4
      dulwich/tests/test_archive.py
  36. 212 59
      dulwich/tests/test_client.py
  37. 20 2
      dulwich/tests/test_config.py
  38. 10 0
      dulwich/tests/test_diff_tree.py
  39. 18 0
      dulwich/tests/test_fastexport.py
  40. 4 2
      dulwich/tests/test_index.py
  41. 39 2
      dulwich/tests/test_object_store.py
  42. 59 0
      dulwich/tests/test_objects.py
  43. 22 0
      dulwich/tests/test_pack.py
  44. 3 3
      dulwich/tests/test_patch.py
  45. 138 33
      dulwich/tests/test_porcelain.py
  46. 25 0
      dulwich/tests/test_refs.py
  47. 20 2
      dulwich/tests/test_repository.py
  48. 1 4
      dulwich/web.py
  49. 0 5
      setup.cfg
  50. 19 7
      setup.py

+ 0 - 1
.travis.yml

@@ -10,7 +10,6 @@ python:
   - 3.5-dev
   - 3.6
   - 3.6-dev
-  - 3.7-dev
   - pypy3.3-5.2-alpha1
 
 env:

+ 6 - 0
AUTHORS

@@ -130,5 +130,11 @@ dzhuang <dzhuang.scut@gmail.com>
 Antoine Pietri <antoine.pietri1@gmail.com>
 Taras Postument <trane9991@gmail.com>
 Earl Chew <earl_chew@yahoo.com>
+Daniel Andersson <dandersson@users.noreply.github.com>
+Fabian Grünbichler <fabian.gruenbichler@student.tuwien.ac.at>
+Kenneth Lareau <kenneth.lareau@couchbase.com>
+Antoine R. Dumont (@ardumont) <antoine.romain.dumont@gmail.com>
+Alistair Broomhead <alistair.broomhead@gmail.com>
+Marcel Schnirring <mschnirring@marcel-schnirring.de>
 
 If you contributed but are missing from this list, please send me an e-mail.

+ 11 - 7
CONTRIBUTING.md

@@ -33,19 +33,23 @@ Like Linux, Git treats filenames as arbitrary bytestrings. There is no prescribe
 encoding for these strings, and although it is fairly common to use UTF-8, any
 raw byte strings are supported.
 
-For this reason, Dulwich internally treats git-based filenames as bytestrings. It is up
-to the Dulwich API user to encode and decode them if necessary.
+For this reason, Dulwich internally treats git-based filenames as bytestrings.
+It is up to the Dulwich API user to encode and decode them if necessary. In the
+future, the porcelain may accept unicode strings and convert them to bytestrings
+as necessary on the fly (using sys.getfilesystemencoding()).
 
 * git-repository related filenames: bytes
 * object sha1 digests (20 bytes long): bytes
-* object sha1 hexdigests (40 bytes long): str (bytestrings on python2, strings on python3)
+* object sha1 hexdigests (40 bytes long): str (bytestrings on python2, strings
+  on python3)
 
 Merge requests
 --------------
-Please either send pull requests to the maintainer (jelmer@jelmer.uk) or create new pull
-requests on GitHub.
+Please either send pull requests to the maintainer (jelmer@jelmer.uk) or create
+new pull requests on GitHub.
 
 Licensing
 ---------
-All contributions should be made under the same license that Dulwich itself comes under:
-both Apache License, version 2.0 or later and GNU General Public License, version 2.0 or later.
+All contributions should be made under the same license that Dulwich itself
+comes under: both Apache License, version 2.0 or later and GNU General Public
+License, version 2.0 or later.

+ 0 - 18
MANIFEST.in

@@ -1,18 +0,0 @@
-include NEWS
-include AUTHORS
-include README.md
-include README.swift.md
-include Makefile
-include COPYING
-include CONTRIBUTING.md
-include TODO
-include setup.cfg
-include dulwich/stdint.h
-recursive-include docs conf.py *.txt Makefile make.bat
-recursive-include examples *.py
-graft dulwich/tests/data
-include tox.ini
-include dulwich.cfg
-include appveyor.yml
-include .testr.conf
-include .travis.yml

+ 1 - 1
Makefile

@@ -1,4 +1,4 @@
-PYTHON = python
+PYTHON = python -Werror
 PYFLAKES = pyflakes
 PEP8 = pep8
 FLAKE8 ?= flake8

+ 59 - 1
NEWS

@@ -1,3 +1,61 @@
+0.19.0	2018-03-10
+
+ BUG FIXES
+
+  * Make `dulwich.archive` set the gzip header file modification time so that
+    archives created from the same Git tree are always identical.
+    (#577, Jonas Haag)
+
+  * Allow comment characters (#, ;) within configuration file strings
+    (Daniel Andersson, #579)
+
+  * Raise exception when passing in invalid author/committer values
+    to Repo.do_commit(). (Jelmer Vernooij, #602)
+
+ IMPROVEMENTS
+
+  * Add a fastimport ``extra``. (Jelmer Vernooij)
+
+  * Start writing reflog entries. (Jelmer Vernooij)
+
+  * Add ability to use password and keyfile ssh options with SSHVendor. (Filipp Kucheryavy)
+
+  * Add ``change_type_same`` flag to ``tree_changes``.
+    (Jelmer Vernooij)
+
+ API CHANGES
+
+  * ``GitClient.send_pack`` now accepts a ``generate_pack_data``
+    rather than a ``generate_pack_contents`` function for
+    performance reasons. (Jelmer Vernooij)
+
+  * Dulwich now uses urllib3 internally for HTTP requests.
+    The `opener` argument to `dulwich.client.HttpGitClient` that took a
+    `urllib2` opener instance has been replaced by a `pool_manager` argument
+    that takes a `urllib3` pool manager instance.
+    (Daniel Andersson)
+
+0.18.6	2017-11-11
+
+ BUG FIXES
+
+  * Fix handling of empty repositories in ``porcelain.clone``.
+   (#570, Jelmer Vernooij)
+
+  * Raise an error when attempting to add paths that are not under the
+    repository. (Jelmer Vernooij)
+
+  * Fix error message for missing trailing ]. (Daniel Andersson)
+
+  * Raise EmptyFileException when corruption (in the form of an empty
+    file) is detected. (Antoine R. Dumont, #582)
+
+  IMPROVEMENTS
+
+  * Enforce date field parsing consistency. This also add checks on
+    those date fields for potential overflow.
+    (Antoine R. Dumont, #567)
+
 0.18.5	2017-10-29
 
  BUG FIXES
@@ -9,7 +67,7 @@
 
   * Prevent setting SSH arguments from SSH URLs when using SSH through a
     subprocess. Note that Dulwich doesn't support cloning submodules.
-    (CVE 2017-1000117) (Jelmer Vernooij)
+    (CVE-2017-16228) (Jelmer Vernooij)
 
  IMPROVEMENTS
 

+ 1 - 1
PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: dulwich
-Version: 0.18.5
+Version: 0.19.0
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
 Author: UNKNOWN

+ 9 - 3
README.md

@@ -1,5 +1,5 @@
-[![Build Status](https://travis-ci.org/jelmer/dulwich.png?branch=master)](https://travis-ci.org/jelmer/dulwich)
-[![Windows Build status](https://ci.appveyor.com/api/projects/status/mob7g4vnrfvvoweb?svg=true)](https://ci.appveyor.com/project/jelmer/dulwich/branch/master)
+[![Build Status](https://travis-ci.org/dulwich/dulwich.png?branch=master)](https://travis-ci.org/dulwich/dulwich)
+[![Windows Build status](https://ci.appveyor.com/api/projects/status/mob7g4vnrfvvoweb?svg=true)](https://ci.appveyor.com/project/dulwich/dulwich/branch/master)
 
 This is the Dulwich project.
 
@@ -28,6 +28,12 @@ or if you are installing from pip::
 
     $ pip install dulwich --global-option="--pure"
 
+Note that you can also specify --global-option in a
+[requirements.txt](https://pip.pypa.io/en/stable/reference/pip_install/#requirement-specifiers)
+file, e.g. like this::
+
+    dulwich --global-option=--pure
+
 Getting started
 ---------------
 
@@ -80,7 +86,7 @@ Contributing
 For a full list of contributors, see the git logs or [AUTHORS](AUTHORS).
 
 If you'd like to contribute to Dulwich, see the [CONTRIBUTING](CONTRIBUTING.md)
-file and [list of open issues](https://github.com/jelmer/dulwich/issues).
+file and [list of open issues](https://github.com/dulwich/dulwich/issues).
 
 Supported versions of Python
 ----------------------------

+ 5 - 1
TODO

@@ -1,2 +1,6 @@
 - 'git annotate' equivalent
-- repacking
+- reflog handling
+
+Performance:
+ - more efficient pack iteration
+ - more efficient delta generation

+ 0 - 32
dulwich.egg-info/PKG-INFO

@@ -1,32 +0,0 @@
-Metadata-Version: 1.1
-Name: dulwich
-Version: 0.18.5
-Summary: Python Git Library
-Home-page: https://www.dulwich.io/
-Author: UNKNOWN
-Author-email: jelmer@jelmer.uk
-License: Apachev2 or later or GPLv2
-Description: 
-              Python implementation of the Git file formats and protocols,
-              without the need to have git installed.
-        
-              All functionality is available in pure Python. Optional
-              C extensions can be built for improved performance.
-        
-              The project is named after the part of London that Mr. and Mrs. Git live
-              in in the particular Monty Python sketch.
-              
-Keywords: git
-Platform: UNKNOWN
-Classifier: Development Status :: 4 - Beta
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Programming Language :: Python :: 2.7
-Classifier: Programming Language :: Python :: 3.3
-Classifier: Programming Language :: Python :: 3.4
-Classifier: Programming Language :: Python :: 3.5
-Classifier: Programming Language :: Python :: 3.6
-Classifier: Programming Language :: Python :: Implementation :: CPython
-Classifier: Programming Language :: Python :: Implementation :: PyPy
-Classifier: Operating System :: POSIX
-Classifier: Operating System :: Microsoft :: Windows
-Classifier: Topic :: Software Development :: Version Control

+ 0 - 200
dulwich.egg-info/SOURCES.txt

@@ -1,200 +0,0 @@
-.testr.conf
-.travis.yml
-AUTHORS
-CONTRIBUTING.md
-COPYING
-MANIFEST.in
-Makefile
-NEWS
-README.md
-README.swift.md
-TODO
-appveyor.yml
-dulwich.cfg
-setup.cfg
-setup.py
-tox.ini
-bin/dul-receive-pack
-bin/dul-upload-pack
-bin/dulwich
-docs/Makefile
-docs/conf.py
-docs/index.txt
-docs/make.bat
-docs/performance.txt
-docs/protocol.txt
-docs/tutorial/Makefile
-docs/tutorial/conclusion.txt
-docs/tutorial/encoding.txt
-docs/tutorial/file-format.txt
-docs/tutorial/index.txt
-docs/tutorial/introduction.txt
-docs/tutorial/object-store.txt
-docs/tutorial/porcelain.txt
-docs/tutorial/remote.txt
-docs/tutorial/repo.txt
-docs/tutorial/tag.txt
-dulwich/__init__.py
-dulwich/_diff_tree.c
-dulwich/_objects.c
-dulwich/_pack.c
-dulwich/archive.py
-dulwich/client.py
-dulwich/config.py
-dulwich/diff_tree.py
-dulwich/errors.py
-dulwich/fastexport.py
-dulwich/file.py
-dulwich/greenthreads.py
-dulwich/hooks.py
-dulwich/ignore.py
-dulwich/index.py
-dulwich/log_utils.py
-dulwich/lru_cache.py
-dulwich/object_store.py
-dulwich/objects.py
-dulwich/objectspec.py
-dulwich/pack.py
-dulwich/patch.py
-dulwich/porcelain.py
-dulwich/protocol.py
-dulwich/reflog.py
-dulwich/refs.py
-dulwich/repo.py
-dulwich/server.py
-dulwich/stdint.h
-dulwich/walk.py
-dulwich/web.py
-dulwich.egg-info/PKG-INFO
-dulwich.egg-info/SOURCES.txt
-dulwich.egg-info/dependency_links.txt
-dulwich.egg-info/top_level.txt
-dulwich/contrib/__init__.py
-dulwich/contrib/paramiko_vendor.py
-dulwich/contrib/release_robot.py
-dulwich/contrib/swift.py
-dulwich/contrib/test_release_robot.py
-dulwich/contrib/test_swift.py
-dulwich/contrib/test_swift_smoke.py
-dulwich/tests/__init__.py
-dulwich/tests/test_archive.py
-dulwich/tests/test_blackbox.py
-dulwich/tests/test_client.py
-dulwich/tests/test_config.py
-dulwich/tests/test_diff_tree.py
-dulwich/tests/test_fastexport.py
-dulwich/tests/test_file.py
-dulwich/tests/test_grafts.py
-dulwich/tests/test_greenthreads.py
-dulwich/tests/test_hooks.py
-dulwich/tests/test_ignore.py
-dulwich/tests/test_index.py
-dulwich/tests/test_lru_cache.py
-dulwich/tests/test_missing_obj_finder.py
-dulwich/tests/test_object_store.py
-dulwich/tests/test_objects.py
-dulwich/tests/test_objectspec.py
-dulwich/tests/test_pack.py
-dulwich/tests/test_patch.py
-dulwich/tests/test_porcelain.py
-dulwich/tests/test_protocol.py
-dulwich/tests/test_reflog.py
-dulwich/tests/test_refs.py
-dulwich/tests/test_repository.py
-dulwich/tests/test_server.py
-dulwich/tests/test_utils.py
-dulwich/tests/test_walk.py
-dulwich/tests/test_web.py
-dulwich/tests/utils.py
-dulwich/tests/compat/__init__.py
-dulwich/tests/compat/server_utils.py
-dulwich/tests/compat/test_client.py
-dulwich/tests/compat/test_pack.py
-dulwich/tests/compat/test_repository.py
-dulwich/tests/compat/test_server.py
-dulwich/tests/compat/test_utils.py
-dulwich/tests/compat/test_web.py
-dulwich/tests/compat/utils.py
-dulwich/tests/data/blobs/11/11111111111111111111111111111111111111
-dulwich/tests/data/blobs/6f/670c0fb53f9463760b7295fbb814e965fb20c8
-dulwich/tests/data/blobs/95/4a536f7819d40e6f637f849ee187dd10066349
-dulwich/tests/data/blobs/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
-dulwich/tests/data/commits/0d/89f20333fbb1d2f3a94da77f4981373d8f4310
-dulwich/tests/data/commits/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc
-dulwich/tests/data/commits/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e
-dulwich/tests/data/indexes/index
-dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx
-dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack
-dulwich/tests/data/repos/.gitattributes
-dulwich/tests/data/repos/issue88_expect_ack_nak_client.export
-dulwich/tests/data/repos/issue88_expect_ack_nak_other.export
-dulwich/tests/data/repos/issue88_expect_ack_nak_server.export
-dulwich/tests/data/repos/server_new.export
-dulwich/tests/data/repos/server_old.export
-dulwich/tests/data/repos/a.git/HEAD
-dulwich/tests/data/repos/a.git/packed-refs
-dulwich/tests/data/repos/a.git/objects/28/237f4dc30d0d462658d6b937b08a0f0b6ef55a
-dulwich/tests/data/repos/a.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91
-dulwich/tests/data/repos/a.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec
-dulwich/tests/data/repos/a.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9
-dulwich/tests/data/repos/a.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364
-dulwich/tests/data/repos/a.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005
-dulwich/tests/data/repos/a.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097
-dulwich/tests/data/repos/a.git/objects/b0/931cadc54336e78a1d980420e3268903b57a50
-dulwich/tests/data/repos/a.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f
-dulwich/tests/data/repos/a.git/refs/heads/master
-dulwich/tests/data/repos/a.git/refs/tags/mytag
-dulwich/tests/data/repos/empty.git/HEAD
-dulwich/tests/data/repos/empty.git/config
-dulwich/tests/data/repos/empty.git/objects/info/.gitignore
-dulwich/tests/data/repos/empty.git/objects/pack/.gitignore
-dulwich/tests/data/repos/empty.git/refs/heads/.gitignore
-dulwich/tests/data/repos/empty.git/refs/tags/.gitignore
-dulwich/tests/data/repos/ooo_merge.git/HEAD
-dulwich/tests/data/repos/ooo_merge.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b
-dulwich/tests/data/repos/ooo_merge.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520
-dulwich/tests/data/repos/ooo_merge.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8
-dulwich/tests/data/repos/ooo_merge.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6
-dulwich/tests/data/repos/ooo_merge.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1
-dulwich/tests/data/repos/ooo_merge.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870
-dulwich/tests/data/repos/ooo_merge.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349
-dulwich/tests/data/repos/ooo_merge.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa
-dulwich/tests/data/repos/ooo_merge.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614
-dulwich/tests/data/repos/ooo_merge.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c
-dulwich/tests/data/repos/ooo_merge.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5
-dulwich/tests/data/repos/ooo_merge.git/refs/heads/master
-dulwich/tests/data/repos/refs.git/HEAD
-dulwich/tests/data/repos/refs.git/packed-refs
-dulwich/tests/data/repos/refs.git/objects/3b/9e5457140e738c2dcd39bf6d7acf88379b90d1
-dulwich/tests/data/repos/refs.git/objects/3e/c9c43c84ff242e3ef4a9fc5bc111fd780a76a8
-dulwich/tests/data/repos/refs.git/objects/42/d06bd4b77fed026b154d16493e5deab78f02ec
-dulwich/tests/data/repos/refs.git/objects/a1/8114c31713746a33a2e70d9914d1ef3e781425
-dulwich/tests/data/repos/refs.git/objects/cd/a609072918d7b70057b6bef9f4c2537843fcfe
-dulwich/tests/data/repos/refs.git/objects/df/6800012397fb85c56e7418dd4eb9405dee075c
-dulwich/tests/data/repos/refs.git/refs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa
-dulwich/tests/data/repos/refs.git/refs/heads/loop
-dulwich/tests/data/repos/refs.git/refs/heads/master
-dulwich/tests/data/repos/refs.git/refs/tags/refs-0.2
-dulwich/tests/data/repos/simple_merge.git/HEAD
-dulwich/tests/data/repos/simple_merge.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310
-dulwich/tests/data/repos/simple_merge.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853
-dulwich/tests/data/repos/simple_merge.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b
-dulwich/tests/data/repos/simple_merge.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6
-dulwich/tests/data/repos/simple_merge.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc
-dulwich/tests/data/repos/simple_merge.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e
-dulwich/tests/data/repos/simple_merge.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8
-dulwich/tests/data/repos/simple_merge.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6
-dulwich/tests/data/repos/simple_merge.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870
-dulwich/tests/data/repos/simple_merge.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349
-dulwich/tests/data/repos/simple_merge.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd
-dulwich/tests/data/repos/simple_merge.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7
-dulwich/tests/data/repos/simple_merge.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86
-dulwich/tests/data/repos/simple_merge.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
-dulwich/tests/data/repos/simple_merge.git/refs/heads/master
-dulwich/tests/data/repos/submodule/dotgit
-dulwich/tests/data/tags/71/033db03a03c6a36721efcf1968dd8f8e0cf023
-dulwich/tests/data/trees/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6
-examples/clone.py
-examples/config.py
-examples/diff.py
-examples/latest_change.py

+ 0 - 1
dulwich.egg-info/dependency_links.txt

@@ -1 +0,0 @@
-

+ 0 - 1
dulwich.egg-info/top_level.txt

@@ -1 +0,0 @@
-dulwich

+ 1 - 1
dulwich/__init__.py

@@ -22,4 +22,4 @@
 
 """Python implementation of the Git file formats and protocols."""
 
-__version__ = (0, 18, 5)
+__version__ = (0, 19, 0)

+ 15 - 1
dulwich/archive.py

@@ -26,6 +26,8 @@
 import posixpath
 import stat
 import tarfile
+import struct
+from os import SEEK_END
 from io import BytesIO
 from contextlib import closing
 
@@ -76,12 +78,24 @@ def tar_stream(store, tree, mtime, format=''):
     :param store: Object store to retrieve objects from
     :param tree: Tree object for the tree root
     :param mtime: UNIX timestamp that is assigned as the modification time for
-        all files
+        all files, and the gzip header modification time if format='gz'
     :param format: Optional compression format for tarball
     :return: Bytestrings
     """
     buf = BytesIO()
     with closing(tarfile.open(None, "w:%s" % format, buf)) as tar:
+        if format == 'gz':
+            # Manually correct the gzip header file modification time so that
+            # archives created from the same Git tree are always identical.
+            # The gzip header file modification time is not currenctly
+            # accessible from the tarfile API, see:
+            # https://bugs.python.org/issue31526
+            buf.seek(0)
+            assert buf.read(2) == b'\x1f\x8b', 'Invalid gzip header'
+            buf.seek(4)
+            buf.write(struct.pack('<L', mtime))
+            buf.seek(0, SEEK_END)
+
         for entry_abspath, entry in _walk_tree(store, tree):
             try:
                 blob = store[entry.sha]

+ 223 - 104
dulwich/client.py

@@ -60,6 +60,10 @@ except ImportError:
     import urllib.request as urllib2
     import urllib.parse as urlparse
 
+import certifi
+import urllib3
+import urllib3.util
+
 import dulwich
 from dulwich.errors import (
     GitProtocolError,
@@ -100,6 +104,7 @@ from dulwich.protocol import (
     parse_capability,
     )
 from dulwich.pack import (
+    write_pack_data,
     write_pack_objects,
     )
 from dulwich.refs import (
@@ -107,6 +112,13 @@ from dulwich.refs import (
     )
 
 
+if sys.version_info < (2, 7, 9):
+    # Before Python 2.7.9 the `ssl` module lacks SNI support and lags behind in
+    # security updates. Use pyOpenSSL instead.
+    import urllib3.contrib.pyopenssl
+    urllib3.contrib.pyopenssl.inject_into_urllib3()
+
+
 def _fileno_can_read(fileno):
     """Check if a file descriptor is readable."""
     return len(select.select([fileno], [], [], 0)[0]) > 0
@@ -203,7 +215,7 @@ def read_pkt_refs(proto):
         refs[ref] = sha
 
     if len(refs) == 0:
-        return None, set([])
+        return {}, set([])
     if refs == {CAPABILITIES_REF: ZERO_SHA}:
         refs = {}
     return refs, set(server_capabilities)
@@ -308,19 +320,17 @@ class GitClient(object):
         """
         raise NotImplementedError(cls.from_parsedurl)
 
-    def send_pack(self, path, update_refs, generate_pack_contents,
-                  progress=None, write_pack=write_pack_objects):
+    def send_pack(self, path, update_refs, generate_pack_data,
+                  progress=None):
         """Upload a pack to a remote repository.
 
         :param path: Repository path (as bytestring)
         :param update_refs: Function to determine changes to remote refs.
             Receive dict with existing remote refs, returns dict with
             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
-        :param generate_pack_contents: Function that can return a sequence of
-            the shas of the objects to upload.
+        :param generate_pack_data: Function that can return a tuple
+            with number of objects and list of pack data to include
         :param progress: Optional progress function
-        :param write_pack: Function called with (file, iterable of objects) to
-            write the objects returned by generate_pack_contents to the server.
 
         :raises SendPackError: if server rejects the pack data
         :raises UpdateRefsError: if the server supports report-status
@@ -361,7 +371,7 @@ class GitClient(object):
             result = self.fetch_pack(
                 path, determine_wants, target.get_graph_walker(), f.write,
                 progress)
-        except:
+        except BaseException:
             abort()
             raise
         else:
@@ -635,19 +645,17 @@ class TraditionalGitClient(GitClient):
         """
         raise NotImplementedError()
 
-    def send_pack(self, path, update_refs, generate_pack_contents,
-                  progress=None, write_pack=write_pack_objects):
+    def send_pack(self, path, update_refs, generate_pack_data,
+                  progress=None):
         """Upload a pack to a remote repository.
 
         :param path: Repository path (as bytestring)
         :param update_refs: Function to determine changes to remote refs.
             Receive dict with existing remote refs, returns dict with
             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
-        :param generate_pack_contents: Function that can return a sequence of
-            the shas of the objects to upload.
+        :param generate_pack_data: Function that can return a tuple with
+            number of objects and pack data to upload.
         :param progress: Optional callback called with progress updates
-        :param write_pack: Function called with (file, iterable of objects) to
-            write the objects returned by generate_pack_contents to the server.
 
         :raises SendPackError: if server rejects the pack data
         :raises UpdateRefsError: if the server supports report-status
@@ -666,7 +674,7 @@ class TraditionalGitClient(GitClient):
 
             try:
                 new_refs = orig_new_refs = update_refs(dict(old_refs))
-            except:
+            except BaseException:
                 proto.write_pkt_line(None)
                 raise
 
@@ -698,14 +706,16 @@ class TraditionalGitClient(GitClient):
             if (not want and
                     set(new_refs.items()).issubset(set(old_refs.items()))):
                 return new_refs
-            objects = generate_pack_contents(have, want)
+            pack_data_count, pack_data = generate_pack_data(
+                have, want,
+                ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities))
 
-            dowrite = len(objects) > 0
+            dowrite = bool(pack_data_count)
             dowrite = dowrite or any(old_refs.get(ref) != sha
                                      for (ref, sha) in new_refs.items()
                                      if sha != ZERO_SHA)
             if dowrite:
-                write_pack(proto.write_file(), objects)
+                write_pack_data(proto.write_file(), pack_data_count, pack_data)
 
             self._handle_receive_pack_tail(
                 proto, negotiated_capabilities, progress)
@@ -737,7 +747,7 @@ class TraditionalGitClient(GitClient):
 
             try:
                 wants = determine_wants(refs)
-            except:
+            except BaseException:
                 proto.write_pkt_line(None)
                 raise
             if wants is not None:
@@ -948,19 +958,17 @@ class LocalGitClient(GitClient):
             path = path.decode(sys.getfilesystemencoding())
         return closing(Repo(path))
 
-    def send_pack(self, path, update_refs, generate_pack_contents,
-                  progress=None, write_pack=write_pack_objects):
+    def send_pack(self, path, update_refs, generate_pack_data,
+                  progress=None):
         """Upload a pack to a remote repository.
 
         :param path: Repository path (as bytestring)
         :param update_refs: Function to determine changes to remote refs.
             Receive dict with existing remote refs, returns dict with
             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
-        :param generate_pack_contents: Function that can return a sequence of
-            the shas of the objects to upload.
+        :param generate_pack_data: Function that can return a tuple
+            with number of items and pack data to upload.
         :param progress: Optional progress function
-        :param write_pack: Function called with (file, iterable of objects) to
-            write the objects returned by generate_pack_contents to the server.
 
         :raises SendPackError: if server rejects the pack data
         :raises UpdateRefsError: if the server supports report-status
@@ -988,7 +996,8 @@ class LocalGitClient(GitClient):
                     set(new_refs.items()).issubset(set(old_refs.items()))):
                 return new_refs
 
-            target.object_store.add_objects(generate_pack_contents(have, want))
+            target.object_store.add_pack_data(
+                *generate_pack_data(have, want, ofs_delta=True))
 
             for refname, new_sha1 in new_refs.items():
                 old_sha1 = old_refs.get(refname, ZERO_SHA)
@@ -1012,11 +1021,13 @@ class LocalGitClient(GitClient):
             to fetch. Receives dictionary of name->sha, should return
             list of shas to fetch. Defaults to all shas.
         :param progress: Optional progress function
-        :return: Dictionary with all remote refs (not just those fetched)
+        :return: FetchPackResult object
         """
         with self._open_repo(path) as r:
-            return r.fetch(target, determine_wants=determine_wants,
+            refs = r.fetch(target, determine_wants=determine_wants,
                            progress=progress)
+            return FetchPackResult(refs, r.refs.get_symrefs(),
+                                   agent_string())
 
     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
                    progress=None):
@@ -1041,7 +1052,8 @@ class LocalGitClient(GitClient):
             # Note that the client still expects a 0-object pack in most cases.
             if objects_iter is None:
                 return FetchPackResult(None, symrefs, agent)
-            write_pack_objects(ProtocolFile(None, pack_data), objects_iter)
+            protocol = ProtocolFile(None, pack_data)
+            write_pack_objects(protocol, objects_iter)
             return FetchPackResult(r.get_refs(), symrefs, agent)
 
     def get_refs(self, path):
@@ -1058,15 +1070,18 @@ default_local_git_client_cls = LocalGitClient
 class SSHVendor(object):
     """A client side SSH implementation."""
 
-    def connect_ssh(self, host, command, username=None, port=None):
+    def connect_ssh(self, host, command, username=None, port=None,
+                    password=None, key_filename=None):
         # This function was deprecated in 0.9.1
         import warnings
         warnings.warn(
             "SSHVendor.connect_ssh has been renamed to SSHVendor.run_command",
             DeprecationWarning)
-        return self.run_command(host, command, username=username, port=port)
+        return self.run_command(host, command, username=username, port=port,
+                                password=password, key_filename=key_filename)
 
-    def run_command(self, host, command, username=None, port=None):
+    def run_command(self, host, command, username=None, port=None,
+                    password=None, key_filename=None):
         """Connect to an SSH server.
 
         Run a command remotely and return a file-like object for interaction
@@ -1076,6 +1091,8 @@ class SSHVendor(object):
         :param command: Command to run (as argv array)
         :param username: Optional ame of user to log in as
         :param port: Optional SSH port to use
+        :param password: Optional ssh password for login or private key
+        :param key_filename: Optional path to private keyfile
         """
         raise NotImplementedError(self.run_command)
 
@@ -1090,16 +1107,71 @@ class StrangeHostname(Exception):
 class SubprocessSSHVendor(SSHVendor):
     """SSH vendor that shells out to the local 'ssh' command."""
 
-    def run_command(self, host, command, username=None, port=None):
-        # FIXME: This has no way to deal with passwords..
+    def run_command(self, host, command, username=None, port=None,
+                    password=None, key_filename=None):
+
+        if password:
+            raise NotImplementedError(
+                "You can't set password or passphrase for ssh key "
+                "with SubprocessSSHVendor, use ParamikoSSHVendor instead"
+            )
+
         args = ['ssh', '-x']
-        if port is not None:
+
+        if port:
             args.extend(['-p', str(port)])
-        if username is not None:
+
+        if key_filename:
+            args.extend(['-i', str(key_filename)])
+
+        if username:
+            host = '%s@%s' % (username, host)
+        if host.startswith('-'):
+            raise StrangeHostname(hostname=host)
+        args.append(host)
+
+        proc = subprocess.Popen(args + [command], bufsize=0,
+                                stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE)
+        return SubprocessWrapper(proc)
+
+
+class PuttySSHVendor(SSHVendor):
+    """SSH vendor that shells out to the local 'putty' command."""
+
+    def run_command(self, host, command, username=None, port=None,
+                    password=None, key_filename=None):
+
+        if password and key_filename:
+            raise NotImplementedError(
+                "You can't set passphrase for ssh key "
+                "with PuttySSHVendor, use ParamikoSSHVendor instead"
+            )
+
+        if sys.platform == 'win32':
+            args = ['putty.exe', '-ssh']
+        else:
+            args = ['putty', '-ssh']
+
+        if password:
+            import warnings
+            warnings.warn(
+                "Invoking Putty with a password exposes the password in the "
+                "process list.")
+            args.extend(['-pw', str(password)])
+
+        if port:
+            args.extend(['-P', str(port)])
+
+        if key_filename:
+            args.extend(['-i', str(key_filename)])
+
+        if username:
             host = '%s@%s' % (username, host)
         if host.startswith('-'):
             raise StrangeHostname(hostname=host)
         args.append(host)
+
         proc = subprocess.Popen(args + [command], bufsize=0,
                                 stdin=subprocess.PIPE,
                                 stdout=subprocess.PIPE)
@@ -1122,10 +1194,12 @@ get_ssh_vendor = SubprocessSSHVendor
 class SSHGitClient(TraditionalGitClient):
 
     def __init__(self, host, port=None, username=None, vendor=None,
-                 config=None, **kwargs):
+                 config=None, password=None, key_filename=None, **kwargs):
         self.host = host
         self.port = port
         self.username = username
+        self.password = password
+        self.key_filename = key_filename
         super(SSHGitClient, self).__init__(**kwargs)
         self.alternative_paths = {}
         if vendor is not None:
@@ -1163,7 +1237,8 @@ class SSHGitClient(TraditionalGitClient):
         argv = (self._get_cmd_path(cmd).decode(self._remote_path_encoding) +
                 " '" + path + "'")
         con = self.ssh_vendor.run_command(
-            self.host, argv, port=self.port, username=self.username)
+            self.host, argv, port=self.port, username=self.username,
+            password=self.password, key_filename=self.key_filename)
         return (Protocol(con.read, con.write, con.close,
                          report_activity=self._report_activity),
                 con.can_read)
@@ -1175,47 +1250,72 @@ def default_user_agent_string():
     return "git/dulwich/%s" % ".".join([str(x) for x in dulwich.__version__])
 
 
-def default_urllib2_opener(config):
+def default_urllib3_manager(config, verify_ssl=True):
+    """Return `urllib3` connection pool manager.
+
+    Honour detected proxy configurations.
+
+    :param config: `dulwich.config.ConfigDict` instance with Git configuration.
+    :param verify_ssl: Whether SSL verification is enabled.
+    :return: `urllib3.ProxyManager` instance for proxy configurations,
+        `urllib3.PoolManager` otherwise.
+    """
+    proxy_server = user_agent = None
+
     if config is not None:
         try:
             proxy_server = config.get(b"http", b"proxy")
         except KeyError:
-            proxy_server = None
-    else:
-        proxy_server = None
-    handlers = []
-    if proxy_server is not None:
-        handlers.append(urllib2.ProxyHandler({"http": proxy_server}))
-    opener = urllib2.build_opener(*handlers)
-    if config is not None:
+            pass
         try:
             user_agent = config.get(b"http", b"useragent")
         except KeyError:
-            user_agent = None
-    else:
-        user_agent = None
+            pass
+
+    ssl_kwargs = {}
+    if verify_ssl:
+        ssl_kwargs.update(cert_reqs="CERT_REQUIRED", ca_certs=certifi.where())
+
     if user_agent is None:
         user_agent = default_user_agent_string()
-    opener.addheaders = [('User-agent', user_agent)]
-    return opener
+
+    headers = {"User-agent": user_agent}
+
+    if proxy_server is not None:
+        # `urllib3` requires a `str` object in both Python 2 and 3, while
+        # `ConfigDict` coerces entries to `bytes` on Python 3. Compensate.
+        if not isinstance(proxy_server, str):
+            proxy_server = proxy_server.decode()
+        manager = urllib3.ProxyManager(proxy_server, headers=headers,
+                                       **ssl_kwargs)
+    else:
+        manager = urllib3.PoolManager(headers=headers, **ssl_kwargs)
+
+    return manager
 
 
 class HttpGitClient(GitClient):
 
-    def __init__(self, base_url, dumb=None, opener=None, config=None,
+    def __init__(self, base_url, dumb=None, pool_manager=None, config=None,
                  username=None, password=None, **kwargs):
         self._base_url = base_url.rstrip("/") + "/"
         self._username = username
         self._password = password
         self.dumb = dumb
-        if opener is None:
-            self.opener = default_urllib2_opener(config)
+        self.headers = {}
+
+        if pool_manager is None:
+            self.pool_manager = default_urllib3_manager(config)
         else:
-            self.opener = opener
+            self.pool_manager = pool_manager
+
         if username is not None:
-            pass_man = urllib2.HTTPPasswordMgrWithDefaultRealm()
-            pass_man.add_password(None, base_url, username, password)
-            self.opener.add_handler(urllib2.HTTPBasicAuthHandler(pass_man))
+            # No escaping needed: ":" is not allowed in username:
+            # https://tools.ietf.org/html/rfc2617#section-2
+            credentials = "%s:%s" % (username, password)
+            basic_auth = urllib3.util.make_headers(basic_auth=credentials)
+            self.pool_manager.headers.update(basic_auth)
+
         GitClient.__init__(self, **kwargs)
 
     def get_url(self, path):
@@ -1247,28 +1347,52 @@ class HttpGitClient(GitClient):
             path = path.decode(sys.getfilesystemencoding())
         return urlparse.urljoin(self._base_url, path).rstrip("/") + "/"
 
-    def _http_request(self, url, headers={}, data=None,
+    def _http_request(self, url, headers=None, data=None,
                       allow_compression=False):
-        if headers is None:
-            headers = dict(headers.items())
-        headers["Pragma"] = "no-cache"
+        """Perform HTTP request.
+
+        :param url: Request URL.
+        :param headers: Optional custom headers to override defaults.
+        :param data: Request data.
+        :param allow_compression: Allow GZipped communication.
+        :return: Tuple (`response`, `read`), where response is an `urllib3`
+            response object with additional `content_type` and
+            `redirect_location` properties, and `read` is a consumable read
+            method for the response data.
+        """
+        req_headers = self.pool_manager.headers.copy()
+        if headers is not None:
+            req_headers.update(headers)
+        req_headers["Pragma"] = "no-cache"
         if allow_compression:
-            headers["Accept-Encoding"] = "gzip"
+            req_headers["Accept-Encoding"] = "gzip"
         else:
-            headers["Accept-Encoding"] = "identity"
-        req = urllib2.Request(url, headers=headers, data=data)
-        try:
-            resp = self.opener.open(req)
-        except urllib2.HTTPError as e:
-            if e.code == 404:
-                raise NotGitRepository()
-            if e.code != 200:
-                raise GitProtocolError("unexpected http response %d for %s" %
-                                       (e.code, url))
-        if resp.info().get('Content-Encoding') == 'gzip':
-            read = gzip.GzipFile(fileobj=BytesIO(resp.read())).read
+            req_headers["Accept-Encoding"] = "identity"
+
+        if data is None:
+            resp = self.pool_manager.request("GET", url, headers=req_headers)
         else:
-            read = resp.read
+            resp = self.pool_manager.request("POST", url, headers=req_headers,
+                                             body=data)
+
+        if resp.status == 404:
+            raise NotGitRepository()
+        elif resp.status != 200:
+            raise GitProtocolError("unexpected http resp %d for %s" %
+                                   (resp.status, url))
+
+        # TODO: Optimization available by adding `preload_content=False` to the
+        # request and just passing the `read` method on instead of going via
+        # `BytesIO`, if we can guarantee that the entire response is consumed
+        # before issuing the next to still allow for connection reuse from the
+        # pool.
+        if resp.getheader("Content-Encoding") == "gzip":
+            read = gzip.GzipFile(fileobj=BytesIO(resp.data)).read
+        else:
+            read = BytesIO(resp.data).read
+
+        resp.content_type = resp.getheader("Content-Type")
+        resp.redirect_location = resp.get_redirect_location()
 
         return resp, read
 
@@ -1281,20 +1405,16 @@ class HttpGitClient(GitClient):
         url = urlparse.urljoin(base_url, tail)
         resp, read = self._http_request(url, headers, allow_compression=True)
 
-        if url != resp.geturl():
+        if resp.redirect_location:
             # Something changed (redirect!), so let's update the base URL
-            if not resp.geturl().endswith(tail):
+            if not resp.redirect_location.endswith(tail):
                 raise GitProtocolError(
                         "Redirected from URL %s to URL %s without %s" % (
-                            url, resp.geturl(), tail))
-            base_url = resp.geturl()[:-len(tail)]
+                            url, resp.redirect_location, tail))
+            base_url = resp.redirect_location[:-len(tail)]
 
         try:
-            content_type = resp.info().gettype()
-        except AttributeError:
-            content_type = resp.info().get_content_type()
-        try:
-            self.dumb = (not content_type.startswith("application/x-git-"))
+            self.dumb = not resp.content_type.startswith("application/x-git-")
             if not self.dumb:
                 proto = Protocol(read, None)
                 # The first line should mention the service
@@ -1315,33 +1435,29 @@ class HttpGitClient(GitClient):
     def _smart_request(self, service, url, data):
         assert url[-1] == "/"
         url = urlparse.urljoin(url, service)
+        result_content_type = "application/x-%s-result" % service
         headers = {
-            "Content-Type": "application/x-%s-request" % service
+            "Content-Type": "application/x-%s-request" % service,
+            "Accept": result_content_type,
+            "Content-Length": str(len(data)),
         }
         resp, read = self._http_request(url, headers, data)
-        try:
-            content_type = resp.info().gettype()
-        except AttributeError:
-            content_type = resp.info().get_content_type()
-        if content_type != (
-                "application/x-%s-result" % service):
+        if resp.content_type != result_content_type:
             raise GitProtocolError("Invalid content-type from server: %s"
-                                   % content_type)
+                                   % resp.content_type)
         return resp, read
 
-    def send_pack(self, path, update_refs, generate_pack_contents,
-                  progress=None, write_pack=write_pack_objects):
+    def send_pack(self, path, update_refs, generate_pack_data,
+                  progress=None):
         """Upload a pack to a remote repository.
 
         :param path: Repository path (as bytestring)
         :param update_refs: Function to determine changes to remote refs.
             Receive dict with existing remote refs, returns dict with
             changed refs (name -> sha, where sha=ZERO_SHA for deletions)
-        :param generate_pack_contents: Function that can return a sequence of
-            the shas of the objects to upload.
+        :param generate_pack_data: Function that can return a tuple
+            with number of elements and pack data to upload.
         :param progress: Optional progress function
-        :param write_pack: Function called with (file, iterable of objects) to
-            write the objects returned by generate_pack_contents to the server.
 
         :raises SendPackError: if server rejects the pack data
         :raises UpdateRefsError: if the server supports report-status
@@ -1354,6 +1470,7 @@ class HttpGitClient(GitClient):
             b"git-receive-pack", url)
         negotiated_capabilities = self._negotiate_receive_pack_capabilities(
                 server_capabilities)
+        negotiated_capabilities.add(capability_agent())
 
         if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
             self._report_status_parser = ReportStatusParser()
@@ -1370,13 +1487,15 @@ class HttpGitClient(GitClient):
             req_proto, negotiated_capabilities, old_refs, new_refs)
         if not want and set(new_refs.items()).issubset(set(old_refs.items())):
             return new_refs
-        objects = generate_pack_contents(have, want)
-        if len(objects) > 0:
-            write_pack(req_proto.write_file(), objects)
+        pack_data_count, pack_data = generate_pack_data(
+                have, want,
+                ofs_delta=(CAPABILITY_OFS_DELTA in negotiated_capabilities))
+        if pack_data_count:
+            write_pack_data(req_proto.write_file(), pack_data_count, pack_data)
         resp, read = self._smart_request("git-receive-pack", url,
                                          data=req_data.getvalue())
         try:
-            resp_proto = Protocol(resp.read, None)
+            resp_proto = Protocol(read, None)
             self._handle_receive_pack_tail(
                 resp_proto, negotiated_capabilities, progress)
             return new_refs

+ 113 - 19
dulwich/config.py

@@ -28,8 +28,10 @@ TODO:
 
 import errno
 import os
+import sys
 
 from collections import (
+    Iterable,
     OrderedDict,
     MutableMapping,
     )
@@ -38,6 +40,72 @@ from collections import (
 from dulwich.file import GitFile
 
 
+SENTINAL = object()
+
+
+def lower_key(key):
+    if isinstance(key, (bytes, str)):
+        return key.lower()
+
+    if isinstance(key, Iterable):
+        return type(key)(
+            map(lower_key, key)
+        )
+
+    return key
+
+
+class CaseInsensitiveDict(OrderedDict):
+
+    @classmethod
+    def make(cls, dict_in=None):
+
+        if isinstance(dict_in, cls):
+            return dict_in
+
+        out = cls()
+
+        if dict_in is None:
+            return out
+
+        if not isinstance(dict_in, MutableMapping):
+            raise TypeError
+
+        for key, value in dict_in.items():
+            out[key] = value
+
+        return out
+
+    def __setitem__(self, key, value, **kwargs):
+        key = lower_key(key)
+
+        super(CaseInsensitiveDict, self).__setitem__(key, value,  **kwargs)
+
+    def __getitem__(self, item):
+        key = lower_key(item)
+
+        return super(CaseInsensitiveDict, self).__getitem__(key)
+
+    def get(self, key, default=SENTINAL):
+        try:
+            return self[key]
+        except KeyError:
+            pass
+
+        if default is SENTINAL:
+            return type(self)()
+
+        return default
+
+    def setdefault(self, key, default=SENTINAL):
+        try:
+            return self[key]
+        except KeyError:
+            self[key] = self.get(key, default)
+
+        return self[key]
+
+
 class Config(object):
     """A Git configuration."""
 
@@ -107,11 +175,12 @@ class Config(object):
 class ConfigDict(Config, MutableMapping):
     """Git configuration stored in a dictionary."""
 
-    def __init__(self, values=None):
+    def __init__(self, values=None, encoding=None):
         """Create a new ConfigDict."""
-        if values is None:
-            values = OrderedDict()
-        self._values = values
+        if encoding is None:
+            encoding = sys.getdefaultencoding()
+        self.encoding = encoding
+        self._values = CaseInsensitiveDict.make(values)
 
     def __repr__(self):
         return "%s(%r)" % (self.__class__.__name__, self._values)
@@ -144,27 +213,42 @@ class ConfigDict(Config, MutableMapping):
         else:
             return (parts[0], None, parts[1])
 
-    def get(self, section, name):
+    def _check_section_and_name(self, section, name):
         if not isinstance(section, tuple):
             section = (section, )
+
+        section = tuple([
+            subsection.encode(self.encoding)
+            if not isinstance(subsection, bytes) else subsection
+            for subsection in section
+            ])
+
+        if not isinstance(name, bytes):
+            name = name.encode(self.encoding)
+
+        return section, name
+
+    def get(self, section, name):
+        section, name = self._check_section_and_name(section, name)
+
         if len(section) > 1:
             try:
                 return self._values[section][name]
             except KeyError:
                 pass
+
         return self._values[(section[0],)][name]
 
     def set(self, section, name, value):
-        if not isinstance(section, tuple):
-            section = (section, )
-        if not isinstance(name, bytes):
-            raise TypeError(name)
+        section, name = self._check_section_and_name(section, name)
+
         if type(value) not in (bool, bytes):
-            raise TypeError(value)
-        self._values.setdefault(section, OrderedDict())[name] = value
+            value = value.encode(self.encoding)
+
+        self._values.setdefault(section)[name] = value
 
     def iteritems(self, section):
-        return self._values.get(section, OrderedDict()).items()
+        return self._values.get(section).items()
 
     def itersections(self):
         return self._values.keys()
@@ -262,8 +346,16 @@ def _check_section_name(name):
 
 
 def _strip_comments(line):
-    line = line.split(b"#")[0]
-    line = line.split(b";")[0]
+    comment_bytes = {ord(b"#"), ord(b";")}
+    quote = ord(b'"')
+    string_open = False
+    # Normalize line to bytearray for simple 2/3 compatibility
+    for i, character in enumerate(bytearray(line)):
+        # Comment characters outside balanced quotes denote comment start
+        if character == quote:
+            string_open = not string_open
+        elif not string_open and character in comment_bytes:
+            return line[:i]
     return line
 
 
@@ -283,12 +375,12 @@ class ConfigFile(ConfigDict):
                 # Parse section header ("[bla]")
                 if len(line) > 0 and line[:1] == b"[":
                     line = _strip_comments(line).rstrip()
-                    last = line.index(b"]")
-                    if last == -1:
+                    try:
+                        last = line.index(b"]")
+                    except ValueError:
                         raise ValueError("expected trailing ]")
                     pts = line[1:last].split(b" ", 1)
                     line = line[last+1:]
-                    pts[0] = pts[0].lower()
                     if len(pts) == 2:
                         if pts[1][:1] != b"\"" or pts[1][-1:] != b"\"":
                             raise ValueError(
@@ -308,7 +400,7 @@ class ConfigFile(ConfigDict):
                             section = (pts[0], pts[1])
                         else:
                             section = (pts[0], )
-                    ret._values[section] = OrderedDict()
+                    ret._values.setdefault(section)
                 if _strip_comments(line).strip() == b"":
                     continue
                 if section is None:
@@ -318,7 +410,7 @@ class ConfigFile(ConfigDict):
                 except ValueError:
                     setting = line
                     value = b"true"
-                setting = setting.strip().lower()
+                setting = setting.strip()
                 if not _check_variable_name(setting):
                     raise ValueError("invalid variable name %s" % setting)
                 if value.endswith(b"\\\n"):
@@ -421,6 +513,8 @@ class StackedConfig(Config):
         return backends
 
     def get(self, section, name):
+        if not isinstance(section, tuple):
+            section = (section, )
         for backend in self.backends:
             try:
                 return backend.get(section, name)

+ 23 - 9
dulwich/contrib/paramiko_vendor.py

@@ -111,22 +111,36 @@ class _ParamikoWrapper(object):
 
 
 class ParamikoSSHVendor(object):
+    # http://docs.paramiko.org/en/2.4/api/client.html
 
-    def __init__(self):
-        self.ssh_kwargs = {}
+    def __init__(self, **kwargs):
+        self.kwargs = kwargs
 
-    def run_command(self, host, command, username=None, port=None,
-                    progress_stderr=None):
-        # Paramiko needs an explicit port. None is not valid
-        if port is None:
-            port = 22
+    def run_command(self, host, command,
+                    username=None, port=None,
+                    progress_stderr=None,
+                    password=None, pkey=None,
+                    key_filename=None, **kwargs):
 
         client = paramiko.SSHClient()
 
+        connection_kwargs = {'hostname': host}
+        connection_kwargs.update(self.kwargs)
+        if username:
+            connection_kwargs['username'] = username
+        if port:
+            connection_kwargs['port'] = port
+        if password:
+            connection_kwargs['password'] = password
+        if pkey:
+            connection_kwargs['pkey'] = pkey
+        if key_filename:
+            connection_kwargs['key_filename'] = key_filename
+        connection_kwargs.update(kwargs)
+
         policy = paramiko.client.MissingHostKeyPolicy()
         client.set_missing_host_key_policy(policy)
-        client.connect(host, username=username, port=port,
-                       **self.ssh_kwargs)
+        client.connect(**connection_kwargs)
 
         # Open SSH session
         channel = client.get_transport().open_session()

+ 2 - 2
dulwich/contrib/swift.py

@@ -520,9 +520,9 @@ class SwiftPackReader(object):
     def _read(self, more=False):
         if more:
             self.buff_length = self.buff_length * 2
-        l = self.base_offset
+        offset = self.base_offset
         r = min(self.base_offset + self.buff_length, self.pack_length)
-        ret = self.scon.get_object(self.filename, range="%s-%s" % (l, r))
+        ret = self.scon.get_object(self.filename, range="%s-%s" % (offset, r))
         self.buff = ret
 
     def read(self, length):

+ 7 - 7
dulwich/contrib/test_swift_smoke.py

@@ -142,7 +142,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo("fakerepo", self.conf)
         remote_sha = swift_repo.refs.read_loose_ref('refs/heads/master')
         self.assertEqual(sha, remote_sha)
@@ -162,7 +162,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack("/fakerepo",
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo(self.fakerepo, self.conf)
         remote_sha = swift_repo.refs.read_loose_ref('refs/heads/mybranch')
         self.assertEqual(sha, remote_sha)
@@ -189,7 +189,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo("fakerepo", self.conf)
         for branch in ('master', 'mybranch', 'pullr-108'):
             remote_shas[branch] = swift_repo.refs.read_loose_ref(
@@ -214,7 +214,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo("fakerepo", self.conf)
         commit_sha = swift_repo.refs.read_loose_ref('refs/heads/master')
         otype, data = swift_repo.object_store.get_raw(commit_sha)
@@ -261,7 +261,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                              ref='refs/heads/master')
         tcp_client.send_pack("/fakerepo",
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
 
     def test_push_remove_branch(self):
         def determine_wants(*args):
@@ -277,7 +277,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo("fakerepo", self.conf)
         self.assertNotIn('refs/heads/pullr-108', swift_repo.refs.allkeys())
 
@@ -304,7 +304,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
                                          port=self.port)
         tcp_client.send_pack(self.fakerepo,
                              determine_wants,
-                             local_repo.object_store.generate_pack_contents)
+                             local_repo.object_store.generate_pack_data)
         swift_repo = swift.SwiftRepo(self.fakerepo, self.conf)
         tag_sha = swift_repo.refs.read_loose_ref('refs/tags/v1.0')
         otype, data = swift_repo.object_store.get_raw(tag_sha)

+ 14 - 6
dulwich/diff_tree.py

@@ -153,14 +153,15 @@ def walk_trees(store, tree1_id, tree2_id, prune_identical=False):
         yield entry1, entry2
 
 
-def _skip_tree(entry):
-    if entry.mode is None or stat.S_ISDIR(entry.mode):
+def _skip_tree(entry, include_trees):
+    if entry.mode is None or (not include_trees and stat.S_ISDIR(entry.mode)):
         return _NULL_ENTRY
     return entry
 
 
 def tree_changes(store, tree1_id, tree2_id, want_unchanged=False,
-                 rename_detector=None):
+                 rename_detector=None, include_trees=False,
+                 change_type_same=False):
     """Find the differences between the contents of two trees.
 
     :param store: An ObjectStore for looking up objects.
@@ -168,10 +169,16 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False,
     :param tree2_id: The SHA of the target tree.
     :param want_unchanged: If True, include TreeChanges for unmodified entries
         as well.
+    :param include_trees: Whether to include trees
     :param rename_detector: RenameDetector object for detecting renames.
+    :param change_type_same: Whether to report change types in the same
+        entry or as delete+add.
     :return: Iterator over TreeChange instances for each change between the
         source and target tree.
     """
+    if include_trees and rename_detector is not None:
+        raise NotImplementedError(
+            'rename_detector and include_trees are mutually exclusive')
     if (rename_detector is not None and tree1_id is not None and
             tree2_id is not None):
         for change in rename_detector.changes_with_renames(
@@ -186,11 +193,12 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False,
             continue
 
         # Treat entries for trees as missing.
-        entry1 = _skip_tree(entry1)
-        entry2 = _skip_tree(entry2)
+        entry1 = _skip_tree(entry1, include_trees)
+        entry2 = _skip_tree(entry2, include_trees)
 
         if entry1 != _NULL_ENTRY and entry2 != _NULL_ENTRY:
-            if stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode):
+            if (stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode)
+                    and not change_type_same):
                 # File type changed: report as delete/add.
                 yield TreeChange.delete(entry1)
                 entry1 = _NULL_ENTRY

+ 7 - 0
dulwich/errors.py

@@ -164,6 +164,13 @@ class ObjectFormatException(FileFormatException):
     """Indicates an error parsing an object."""
 
 
+class EmptyFileException(FileFormatException):
+    """Indicates an empty file instead of the object's disk
+       representation.
+
+    """
+
+
 class NoIndexPresent(Exception):
     """No index is present."""
 

+ 9 - 10
dulwich/fastexport.py

@@ -133,6 +133,11 @@ class GitImportProcessor(processor.ImportProcessor):
         self.markers = {}
         self._contents = {}
 
+    def lookup_object(self, objectish):
+        if objectish.startswith(b":"):
+            return self.markers[objectish[1:]]
+        return objectish
+
     def import_stream(self, stream):
         p = parser.ImportParser(stream)
         self.process(p.iter_commands)
@@ -168,6 +173,7 @@ class GitImportProcessor(processor.ImportProcessor):
         commit.message = cmd.message
         commit.parents = []
         if cmd.from_:
+            cmd.from_ = self.lookup_object(cmd.from_)
             self._reset_base(cmd.from_)
         for filecmd in cmd.iter_files():
             if filecmd.name == b"filemodify":
@@ -176,10 +182,7 @@ class GitImportProcessor(processor.ImportProcessor):
                     self.repo.object_store.add(blob)
                     blob_id = blob.id
                 else:
-                    assert filecmd.dataref.startswith(b":"), \
-                           ("non-marker refs not supported yet (%r)" %
-                            filecmd.dataref)
-                    blob_id = self.markers[filecmd.dataref[1:]]
+                    blob_id = self.lookup_object(filecmd.dataref)
                 self._contents[filecmd.path] = (filecmd.mode, blob_id)
             elif filecmd.name == b"filedelete":
                 del self._contents[filecmd.path]
@@ -201,9 +204,7 @@ class GitImportProcessor(processor.ImportProcessor):
         if self.last_commit != ZERO_SHA:
             commit.parents.append(self.last_commit)
         for merge in cmd.merges:
-            if merge.startswith(b':'):
-                merge = self.markers[merge[1:]]
-            commit.parents.append(merge)
+            commit.parents.append(self.lookup_object(merge))
         self.repo.object_store.add_object(commit)
         self.repo[cmd.ref] = commit.id
         self.last_commit = commit.id
@@ -230,9 +231,7 @@ class GitImportProcessor(processor.ImportProcessor):
         if cmd.from_ is None:
             from_ = ZERO_SHA
         else:
-            from_ = cmd.from_
-            if from_.startswith(b":"):
-                from_ = self.markers[from_[1:]]
+            from_ = self.lookup_object(cmd.from_)
         self._reset_base(from_)
         self.repo.refs[cmd.ref] = from_
 

+ 9 - 9
dulwich/ignore.py

@@ -106,23 +106,23 @@ def read_ignore_patterns(f):
     :return: List of patterns
     """
 
-    for l in f:
-        l = l.rstrip(b"\r\n")
+    for line in f:
+        line = line.rstrip(b"\r\n")
 
         # Ignore blank lines, they're used for readability.
-        if not l:
+        if not line:
             continue
 
-        if l.startswith(b'#'):
+        if line.startswith(b'#'):
             # Comment
             continue
 
         # Trailing spaces are ignored unless they are quoted with a backslash.
-        while l.endswith(b' ') and not l.endswith(b'\\ '):
-            l = l[:-1]
-        l = l.replace(b'\\ ', b' ')
+        while line.endswith(b' ') and not line.endswith(b'\\ '):
+            line = line[:-1]
+        line = line.replace(b'\\ ', b' ')
 
-        yield l
+        yield line
 
 
 def match_pattern(path, pattern, ignorecase=False):
@@ -257,7 +257,7 @@ def default_user_ignore_filter_path(config):
     :return: Path to a global ignore file
     """
     try:
-        return config.get(('core', ), 'excludesFile')
+        return config.get((b'core', ), b'excludesFile')
     except KeyError:
         pass
 

+ 56 - 0
dulwich/index.py

@@ -48,6 +48,11 @@ IndexEntry = collections.namedtuple(
         'flags'])
 
 
+FLAG_STAGEMASK = 0x3000
+FLAG_VALID = 0x8000
+FLAG_EXTENDED = 0x4000
+
+
 def pathsplit(path):
     """Split a /-delimited path into a directory part and a basename.
 
@@ -379,6 +384,7 @@ def changes_from_tree(names, lookup_entry, object_store, tree,
     :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
         (oldsha, newsha)
     """
+    # TODO(jelmer): Support a include_trees option
     other_names = set(names)
 
     if tree is not None:
@@ -646,3 +652,53 @@ def _fs_to_tree_path(fs_path, fs_encoding=None):
     else:
         tree_path = fs_path_bytes
     return tree_path
+
+
+def iter_fresh_entries(index, root_path):
+    """Iterate over current versions of index entries on disk.
+
+    :param index: Index file
+    :param root_path: Root path to access from
+    :return: Iterator over path, index_entry
+    """
+    for path in set(index):
+        p = _tree_to_fs_path(root_path, path)
+        try:
+            st = os.lstat(p)
+            blob = blob_from_path_and_stat(p, st)
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                del index[path]
+            else:
+                raise
+        except IOError as e:
+            if e.errno == errno.EISDIR:
+                del index[path]
+            else:
+                raise
+        else:
+            yield path, index_entry_from_stat(st, blob.id, 0)
+
+
+def iter_fresh_blobs(index, root_path):
+    """Iterate over versions of blobs on disk referenced by index.
+
+    :param index: Index file
+    :param root_path: Root path to access from
+    :return: Iterator over path, sha, mode
+    """
+    for path, entry in iter_fresh_entries(index, root_path):
+        entry = IndexEntry(*entry)
+        yield path, entry.sha, cleanup_mode(entry.mode)
+
+
+def refresh_index(index, root_path):
+    """Refresh the contents of an index.
+
+    This is the equivalent to running 'git commit -a'.
+
+    :param index: Index to update
+    :param root_path: Root filesystem path
+    """
+    for path, entry in iter_fresh_entries(index, root_path):
+        index[path] = path

+ 128 - 42
dulwich/object_store.py

@@ -56,10 +56,11 @@ from dulwich.pack import (
     PackData,
     PackInflater,
     iter_sha1,
+    pack_objects_to_data,
     write_pack_header,
     write_pack_index_v2,
+    write_pack_data,
     write_pack_object,
-    write_pack_objects,
     compute_file_sha,
     PackIndexer,
     PackStreamCopier,
@@ -135,17 +136,41 @@ class BaseObjectStore(object):
         """
         raise NotImplementedError(self.add_objects)
 
-    def tree_changes(self, source, target, want_unchanged=False):
+    def add_pack_data(self, count, pack_data):
+        """Add pack data to this object store.
+
+        :param num_items: Number of items to add
+        :param pack_data: Iterator over pack data tuples
+        """
+        if count == 0:
+            # Don't bother writing an empty pack file
+            return
+        f, commit, abort = self.add_pack()
+        try:
+            write_pack_data(f, count, pack_data)
+        except BaseException:
+            abort()
+            raise
+        else:
+            return commit()
+
+    def tree_changes(self, source, target, want_unchanged=False,
+                     include_trees=False, change_type_same=False):
         """Find the differences between the contents of two trees
 
         :param source: SHA1 of the source tree
         :param target: SHA1 of the target tree
         :param want_unchanged: Whether unchanged files should be reported
+        :param include_trees: Whether to include trees
+        :param change_type_same: Whether to report files changing
+            type in the same entry.
         :return: Iterator over tuples with
             (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
         """
         for change in tree_changes(self, source, target,
-                                   want_unchanged=want_unchanged):
+                                   want_unchanged=want_unchanged,
+                                   include_trees=include_trees,
+                                   change_type_same=change_type_same):
             yield ((change.old.path, change.new.path),
                    (change.old.mode, change.new.mode),
                    (change.old.sha, change.new.sha))
@@ -161,7 +186,8 @@ class BaseObjectStore(object):
             tree.
         """
         for entry, _ in walk_trees(self, tree_id, None):
-            if not stat.S_ISDIR(entry.mode) or include_trees:
+            if ((entry.mode is not None and
+                 not stat.S_ISDIR(entry.mode)) or include_trees):
                 yield entry
 
     def find_missing_objects(self, haves, wants, progress=None,
@@ -207,6 +233,18 @@ class BaseObjectStore(object):
         """
         return self.iter_shas(self.find_missing_objects(have, want, progress))
 
+    def generate_pack_data(self, have, want, progress=None, ofs_delta=True):
+        """Generate pack data objects for a set of wants/haves.
+
+        :param have: List of SHA1s of objects that should not be sent
+        :param want: List of SHA1s of objects that should be sent
+        :param ofs_delta: Whether OFS deltas can be included
+        :param progress: Optional progress reporting method
+        """
+        # TODO(jelmer): More efficient implementation
+        return pack_objects_to_data(
+            self.generate_pack_contents(have, want, progress))
+
     def peel_sha(self, sha):
         """Peel all tags from a SHA.
 
@@ -392,7 +430,7 @@ class PackBasedObjectStore(BaseObjectStore):
         return self._get_loose_object(sha) is not None
 
     def get_raw(self, name):
-        """Obtain the raw text for an object.
+        """Obtain the raw fulltext for an object.
 
         :param name: sha for the object.
         :return: tuple with numeric type and object contents.
@@ -429,17 +467,7 @@ class PackBasedObjectStore(BaseObjectStore):
             __len__.
         :return: Pack object of the objects written.
         """
-        if len(objects) == 0:
-            # Don't bother writing an empty pack file
-            return
-        f, commit, abort = self.add_pack()
-        try:
-            write_pack_objects(f, objects)
-        except:
-            abort()
-            raise
-        else:
-            return commit()
+        return self.add_pack_data(*pack_objects_to_data(objects))
 
 
 class DiskObjectStore(PackBasedObjectStore):
@@ -477,14 +505,14 @@ class DiskObjectStore(PackBasedObjectStore):
                 return
             raise
         with f:
-            for l in f.readlines():
-                l = l.rstrip(b"\n")
-                if l[0] == b"#":
+            for line in f.readlines():
+                line = line.rstrip(b"\n")
+                if line[0] == b"#":
                     continue
-                if os.path.isabs(l):
-                    yield l.decode(sys.getfilesystemencoding())
+                if os.path.isabs(line):
+                    yield line.decode(sys.getfilesystemencoding())
                 else:
-                    yield os.path.join(self.path, l).decode(
+                    yield os.path.join(self.path, line).decode(
                         sys.getfilesystemencoding())
 
     def add_alternate_path(self, path):
@@ -679,6 +707,14 @@ class DiskObjectStore(PackBasedObjectStore):
             basename = self._get_pack_basepath(entries)
             with GitFile(basename+".idx", "wb") as f:
                 write_pack_index_v2(f, entries, p.get_stored_checksum())
+        if self._pack_cache is None or self._pack_cache_stale():
+            self._update_pack_cache()
+        try:
+            return self._pack_cache[basename]
+        except KeyError:
+            pass
+        else:
+            os.unlink(path)
         os.rename(path, basename + ".pack")
         final_pack = Pack(basename)
         self._add_known_pack(basename, final_pack)
@@ -863,32 +899,13 @@ class MemoryObjectStore(BaseObjectStore):
                                       delta_iter=indexer)
             copier.verify()
             self._complete_thin_pack(f, indexer)
-        except:
+        except BaseException:
             abort()
             raise
         else:
             commit()
 
 
-class ObjectImporter(object):
-    """Interface for importing objects."""
-
-    def __init__(self, count):
-        """Create a new ObjectImporter.
-
-        :param count: Number of objects that's going to be imported.
-        """
-        self.count = count
-
-    def add_object(self, object):
-        """Add an object."""
-        raise NotImplementedError(self.add_object)
-
-    def finish(self, object):
-        """Finish the import and write objects to disk."""
-        raise NotImplementedError(self.finish)
-
-
 class ObjectIterator(object):
     """Interface for iterating over objects."""
 
@@ -951,6 +968,19 @@ class ObjectStoreIterator(ObjectIterator):
         """Return the number of objects."""
         return len(list(self.itershas()))
 
+    def empty(self):
+        iter = self.itershas()
+        try:
+            iter()
+        except StopIteration:
+            return True
+        else:
+            return False
+
+    def __bool__(self):
+        """Indicate whether this object has contents."""
+        return not self.empty()
+
 
 def tree_lookup_path(lookup_obj, root_sha, path):
     """Look up an object in a Git tree.
@@ -1218,3 +1248,59 @@ def commit_tree_changes(object_store, tree, changes):
             tree[name] = (stat.S_IFDIR, subtree.id)
     object_store.add_object(tree)
     return tree
+
+
+class OverlayObjectStore(BaseObjectStore):
+    """Object store that can overlay multiple object stores."""
+
+    def __init__(self, bases, add_store=None):
+        self.bases = bases
+        self.add_store = add_store
+
+    def add_object(self, object):
+        if self.add_store is None:
+            raise NotImplementedError(self.add_object)
+        return self.add_store.add_object(object)
+
+    def add_objects(self, objects):
+        if self.add_store is None:
+            raise NotImplementedError(self.add_object)
+        return self.add_store.add_objects(objects)
+
+    @property
+    def packs(self):
+        ret = []
+        for b in self.bases:
+            ret.extend(b.packs)
+        return ret
+
+    def __iter__(self):
+        done = set()
+        for b in self.bases:
+            for o_id in b:
+                if o_id not in done:
+                    yield o_id
+                    done.add(o_id)
+
+    def get_raw(self, sha_id):
+        for b in self.bases:
+            try:
+                return b.get_raw(sha_id)
+            except KeyError:
+                pass
+        else:
+            raise KeyError(sha_id)
+
+    def contains_packed(self, sha):
+        for b in self.bases:
+            if b.contains_packed(sha):
+                return True
+        else:
+            return False
+
+    def contains_loose(self, sha):
+        for b in self.bases:
+            if b.contains_loose(sha):
+                return True
+        else:
+            return False

+ 75 - 36
dulwich/objects.py

@@ -27,6 +27,7 @@ from collections import namedtuple
 import os
 import posixpath
 import stat
+import sys
 import warnings
 import zlib
 from hashlib import sha1
@@ -38,6 +39,7 @@ from dulwich.errors import (
     NotTagError,
     NotTreeError,
     ObjectFormatException,
+    EmptyFileException,
     )
 from dulwich.file import GitFile
 
@@ -63,6 +65,9 @@ _TAGGER_HEADER = b'tagger'
 S_IFGITLINK = 0o160000
 
 
+MAX_TIME = 9223372036854775807  # (2**63) - 1 - signed long int max
+
+
 def S_ISGITLINK(m):
     """Check if a mode indicates a submodule.
 
@@ -190,8 +195,22 @@ def check_identity(identity, error_msg):
         raise ObjectFormatException(error_msg)
 
 
+def check_time(time_seconds):
+    """Check if the specified time is not prone to overflow error.
+
+    This will raise an exception if the time is not valid.
+
+    :param time_info: author/committer/tagger info
+
+    """
+    # Prevent overflow error
+    if time_seconds > MAX_TIME:
+        raise ObjectFormatException(
+            'Date field should not exceed %s' % MAX_TIME)
+
+
 def git_line(*items):
-    """Formats items into a space sepreated line."""
+    """Formats items into a space separated line."""
     return b' '.join(items) + b'\n'
 
 
@@ -286,9 +305,14 @@ class ShaFile(object):
         """
         return b''.join(self.as_raw_chunks())
 
-    def __str__(self):
-        """Return raw string serialization of this object."""
-        return self.as_raw_string()
+    if sys.version_info[0] >= 3:
+        def __bytes__(self):
+            """Return raw string serialization of this object."""
+            return self.as_raw_string()
+    else:
+        def __str__(self):
+            """Return raw string serialization of this object."""
+            return self.as_raw_string()
 
     def __hash__(self):
         """Return unique hash for this object."""
@@ -345,6 +369,9 @@ class ShaFile(object):
     @classmethod
     def _parse_file(cls, f):
         map = f.read()
+        if not map:
+            raise EmptyFileException('Corrupted empty file detected')
+
         if cls._is_legacy_object(map):
             obj = cls._parse_legacy_object_header(map, f)
             obj._parse_legacy_object(map)
@@ -622,18 +649,18 @@ def _parse_message(chunks):
     #
     # Headers can contain newlines. The next line is indented with a space.
     # We store the latest key as 'k', and the accumulated value as 'v'.
-    for l in f:
-        if l.startswith(b' '):
+    for line in f:
+        if line.startswith(b' '):
             # Indented continuation of the previous line
-            v += l[1:]
+            v += line[1:]
         else:
             if k is not None:
                 # We parsed a new header, return its value
                 yield (k, _strip_last_newline(v))
-            if l == b'\n':
+            if line == b'\n':
                 # Empty line indicates end of headers
                 break
-            (k, v) = l.split(b' ', 1)
+            (k, v) = line.split(b' ', 1)
 
     else:
         # We reached end of file before the headers ended. We still need to
@@ -694,6 +721,9 @@ class Tag(ShaFile):
         if getattr(self, "_tagger", None):
             check_identity(self._tagger, "invalid tagger")
 
+        self._check_has_member("_tag_time", "missing tag time")
+        check_time(self._tag_time)
+
         last = None
         for field, _ in _parse_message(self._chunked_text):
             if field == _OBJECT_HEADER and last is not None:
@@ -742,23 +772,10 @@ class Tag(ShaFile):
             elif field == _TAG_HEADER:
                 self._name = value
             elif field == _TAGGER_HEADER:
-                try:
-                    sep = value.index(b'> ')
-                except ValueError:
-                    self._tagger = value
-                    self._tag_time = None
-                    self._tag_timezone = None
-                    self._tag_timezone_neg_utc = False
-                else:
-                    self._tagger = value[0:sep+1]
-                    try:
-                        (timetext, timezonetext) = (
-                                value[sep+2:].rsplit(b' ', 1))
-                        self._tag_time = int(timetext)
-                        self._tag_timezone, self._tag_timezone_neg_utc = (
-                                parse_timezone(timezonetext))
-                    except ValueError as e:
-                        raise ObjectFormatException(e)
+                (self._tagger,
+                 self._tag_time,
+                 (self._tag_timezone,
+                  self._tag_timezone_neg_utc)) = parse_time_entry(value)
             elif field is None:
                 self._message = value
             else:
@@ -810,8 +827,8 @@ def parse_tree(text, strict=False):
     :raise ObjectFormatException: if the object was malformed in some way
     """
     count = 0
-    l = len(text)
-    while count < l:
+    length = len(text)
+    while count < length:
         mode_end = text.index(b' ', count)
         mode_text = text[count:mode_end]
         if strict and mode_text.startswith(b'0'):
@@ -1084,6 +1101,29 @@ def format_timezone(offset, unnecessary_negative_timezone=False):
             (sign, offset / 3600, (offset / 60) % 60)).encode('ascii')
 
 
+def parse_time_entry(value):
+    """Parse time entry behavior
+
+    :param value: Bytes representing a git commit/tag line
+    :raise: ObjectFormatException in case of parsing error (malformed
+            field date)
+    :return: Tuple of (author, time, (timezone, timezone_neg_utc))
+    """
+    try:
+        sep = value.index(b'> ')
+    except ValueError:
+        return (value, None, (None, False))
+    try:
+        person = value[0:sep+1]
+        rest = value[sep+2:]
+        timetext, timezonetext = rest.rsplit(b' ', 1)
+        time = int(timetext)
+        timezone, timezone_neg_utc = parse_timezone(timezonetext)
+    except ValueError as e:
+        raise ObjectFormatException(e)
+    return person, time, (timezone, timezone_neg_utc)
+
+
 def parse_commit(chunks):
     """Parse a commit object from chunks.
 
@@ -1108,14 +1148,9 @@ def parse_commit(chunks):
         elif field == _PARENT_HEADER:
             parents.append(value)
         elif field == _AUTHOR_HEADER:
-            author, timetext, timezonetext = value.rsplit(b' ', 2)
-            author_time = int(timetext)
-            author_info = (author, author_time, parse_timezone(timezonetext))
+            author_info = parse_time_entry(value)
         elif field == _COMMITTER_HEADER:
-            committer, timetext, timezonetext = value.rsplit(b' ', 2)
-            commit_time = int(timetext)
-            commit_info = (
-                    committer, commit_time, parse_timezone(timezonetext))
+            commit_info = parse_time_entry(value)
         elif field == _ENCODING_HEADER:
             encoding = value
         elif field == _MERGETAG_HEADER:
@@ -1177,7 +1212,8 @@ class Commit(ShaFile):
         self._check_has_member("_tree", "missing tree")
         self._check_has_member("_author", "missing author")
         self._check_has_member("_committer", "missing committer")
-        # times are currently checked when set
+        self._check_has_member("_author_time", "missing author time")
+        self._check_has_member("_commit_time", "missing commit time")
 
         for parent in self._parents:
             check_hexsha(parent, "invalid parent sha")
@@ -1186,6 +1222,9 @@ class Commit(ShaFile):
         check_identity(self._author, "invalid author")
         check_identity(self._committer, "invalid committer")
 
+        check_time(self._author_time)
+        check_time(self._commit_time)
+
         last = None
         for field, _ in _parse_message(self._chunked_text):
             if field == _TREE_HEADER and last is not None:

+ 65 - 5
dulwich/pack.py

@@ -393,6 +393,16 @@ class PackIndex(object):
             sha = hex_to_sha(sha)
         return self._object_index(sha)
 
+    def object_sha1(self, index):
+        """Return the SHA1 corresponding to the index in the pack file.
+        """
+        # PERFORMANCE/TODO(jelmer): Avoid scanning entire index
+        for (name, offset, crc32) in self.iterentries():
+            if offset == index:
+                return name
+        else:
+            raise KeyError(index)
+
     def _object_index(self, sha):
         """See object_index.
 
@@ -422,8 +432,10 @@ class MemoryPackIndex(PackIndex):
         :param pack_checksum: Optional pack checksum
         """
         self._by_sha = {}
+        self._by_index = {}
         for name, idx, crc32 in entries:
             self._by_sha[name] = idx
+            self._by_index[idx] = name
         self._entries = entries
         self._pack_checksum = pack_checksum
 
@@ -436,6 +448,9 @@ class MemoryPackIndex(PackIndex):
     def _object_index(self, sha):
         return self._by_sha[sha][0]
 
+    def object_sha1(self, index):
+        return self._by_index[index]
+
     def _itersha(self):
         return iter(self._by_sha)
 
@@ -1220,6 +1235,19 @@ class PackData(object):
         if actual != stored:
             raise ChecksumMismatch(stored, actual)
 
+    def get_compressed_data_at(self, offset):
+        """Given offset in the packfile return compressed data that is there.
+
+        Using the associated index the location of an object can be looked up,
+        and then the packfile can be asked directly for that object using this
+        function.
+        """
+        assert offset >= self._header_size
+        self._file.seek(offset)
+        unpacked, _ = unpack_object(self._file.read, include_comp=True)
+        return (unpacked.pack_type_num, unpacked.delta_base,
+                unpacked.comp_chunks)
+
     def get_object_at(self, offset):
         """Given an offset in to the packfile return the object that is there.
 
@@ -1524,6 +1552,7 @@ def deltify_pack_objects(objects, window_size=None):
     :return: Iterator over type_num, object id, delta_base, content
         delta_base is None for full text entries
     """
+    # TODO(jelmer): Use threads
     if window_size is None:
         window_size = DEFAULT_PACK_DELTA_WINDOW_SIZE
     # Build a list of objects ordered by the magic Linus heuristic
@@ -1552,7 +1581,19 @@ def deltify_pack_objects(objects, window_size=None):
             possible_bases.pop()
 
 
-def write_pack_objects(f, objects, delta_window_size=None, deltify=False):
+def pack_objects_to_data(objects):
+    """Create pack data from objects
+
+    :param objects: Pack objects
+    :return: Tuples with (type_num, hexdigest, delta base, object chunks)
+    """
+    count = len(objects)
+    return (count,
+            ((o.type_num, o.sha().digest(), None, o.as_raw_string())
+             for (o, path) in objects))
+
+
+def write_pack_objects(f, objects, delta_window_size=None, deltify=None):
     """Write a new pack data file.
 
     :param f: File to write to
@@ -1563,14 +1604,17 @@ def write_pack_objects(f, objects, delta_window_size=None, deltify=False):
     :param deltify: Whether to deltify objects
     :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
     """
+    if deltify is None:
+        # PERFORMANCE/TODO(jelmer): This should be enabled but is *much* too
+        # slow at the moment.
+        deltify = False
     if deltify:
         pack_contents = deltify_pack_objects(objects, delta_window_size)
+        pack_contents_count = len(objects)
     else:
-        pack_contents = (
-            (o.type_num, o.sha().digest(), None, o.as_raw_string())
-            for (o, path) in objects)
+        pack_contents_count, pack_contents = pack_objects_to_data(objects)
 
-    return write_pack_data(f, len(objects), pack_contents)
+    return write_pack_data(f, pack_contents_count, pack_contents)
 
 
 def write_pack_data(f, num_records, records):
@@ -1919,6 +1963,22 @@ class Pack(object):
         except KeyError:
             return False
 
+    def get_raw_unresolved(self, sha1):
+        """Get raw unresolved data for a SHA.
+
+        :param sha1: SHA to return data for
+        :return: Tuple with pack object type, delta base (if applicable),
+            list of data chunks
+        """
+        offset = self.index.object_index(sha1)
+        (obj_type, delta_base, chunks) = self.data.get_compressed_data_at(
+                offset)
+        if obj_type == OFS_DELTA:
+            delta_base = sha_to_hex(
+                    self.index.object_sha1(offset - delta_base))
+            obj_type = REF_DELTA
+        return (obj_type, delta_base, chunks)
+
     def get_raw(self, sha1):
         offset = self.index.object_index(sha1)
         obj_type, obj = self.data.get_object_at(offset)

+ 10 - 9
dulwich/patch.py

@@ -291,6 +291,7 @@ def git_am_patch_split(f, encoding=None):
     :return: Tuple with commit object, diff contents and git version
     """
     encoding = encoding or getattr(f, "encoding", "ascii")
+    encoding = encoding or "ascii"
     contents = f.read()
     if (isinstance(contents, bytes) and
             getattr(email.parser, "BytesParser", None)):
@@ -326,22 +327,22 @@ def parse_patch_message(msg, encoding=None):
     lines = body.splitlines(True)
     line_iter = iter(lines)
 
-    for l in line_iter:
-        if l == b"---\n":
+    for line in line_iter:
+        if line == b"---\n":
             break
         if first:
-            if l.startswith(b"From: "):
-                c.author = l[len(b"From: "):].rstrip()
+            if line.startswith(b"From: "):
+                c.author = line[len(b"From: "):].rstrip()
             else:
-                c.message += b"\n" + l
+                c.message += b"\n" + line
             first = False
         else:
-            c.message += l
+            c.message += line
     diff = b""
-    for l in line_iter:
-        if l == b"-- \n":
+    for line in line_iter:
+        if line == b"-- \n":
             break
-        diff += l
+        diff += line
     try:
         version = next(line_iter).rstrip(b"\n")
     except StopIteration:

+ 88 - 47
dulwich/porcelain.py

@@ -50,6 +50,8 @@ Currently implemented:
 
 These functions are meant to behave similarly to the git subcommands.
 Differences in behaviour are considered bugs.
+
+Functions should generally accept both unicode strings and bytestrings
 """
 
 from collections import namedtuple
@@ -116,7 +118,10 @@ from dulwich.protocol import (
     Protocol,
     ZERO_SHA,
     )
-from dulwich.refs import ANNOTATED_TAG_SUFFIX
+from dulwich.refs import (
+    ANNOTATED_TAG_SUFFIX,
+    strip_peeled_refs,
+)
 from dulwich.repo import (BaseRepo, Repo)
 from dulwich.server import (
     FileSystemBackend,
@@ -215,13 +220,13 @@ def symbolic_ref(repo, ref_name, force=False):
     :param force: force settings without checking if it exists in refs/heads
     """
     with open_repo_closing(repo) as repo_obj:
-        ref_path = b'refs/heads/' + ref_name
+        ref_path = _make_branch_ref(ref_name)
         if not force and ref_path not in repo_obj.refs.keys():
             raise ValueError('fatal: ref `%s` is not a ref' % ref_name)
         repo_obj.refs.set_symbolic_ref(b'HEAD', ref_path)
 
 
-def commit(repo=".", message=None, author=None, committer=None):
+def commit(repo=".", message=None, author=None, committer=None, encoding=None):
     """Create a new commit.
 
     :param repo: Path to repository
@@ -232,8 +237,16 @@ def commit(repo=".", message=None, author=None, committer=None):
     """
     # FIXME: Support --all argument
     # FIXME: Support --signoff argument
+    if getattr(message, 'encode', None):
+        message = message.encode(encoding or DEFAULT_ENCODING)
+    if getattr(author, 'encode', None):
+        author = author.encode(encoding or DEFAULT_ENCODING)
+    if getattr(committer, 'encode', None):
+        committer = committer.encode(encoding or DEFAULT_ENCODING)
     with open_repo_closing(repo) as r:
-        return r.do_commit(message=message, author=author, committer=committer)
+        return r.do_commit(
+                message=message, author=author, committer=committer,
+                encoding=encoding)
 
 
 def commit_tree(repo, tree, message=None, author=None, committer=None):
@@ -267,7 +280,7 @@ def init(path=".", bare=False):
 
 def clone(source, target=None, bare=False, checkout=None,
           errstream=default_bytes_err_stream, outstream=None,
-          origin=b"origin"):
+          origin=b"origin", **kwargs):
     """Clone a local or remote git repository.
 
     :param source: Path or URL for source repository
@@ -292,7 +305,7 @@ def clone(source, target=None, bare=False, checkout=None,
         raise ValueError("checkout and bare are incompatible")
 
     config = StackedConfig.default()
-    client, host_path = get_transport_and_path(source, config=config)
+    client, host_path = get_transport_and_path(source, config=config, **kwargs)
 
     if target is None:
         target = host_path.split("/")[-1]
@@ -305,22 +318,21 @@ def clone(source, target=None, bare=False, checkout=None,
     else:
         r = Repo.init(target)
     try:
-        remote_refs = client.fetch(
+        fetch_result = client.fetch(
             host_path, r, determine_wants=r.object_store.determine_wants_all,
             progress=errstream.write)
+        ref_message = b"clone: from " + source.encode('utf-8')
         r.refs.import_refs(
             b'refs/remotes/' + origin,
-            {n[len(b'refs/heads/'):]: v for (n, v) in remote_refs.items()
-                if n.startswith(b'refs/heads/')})
+            {n[len(b'refs/heads/'):]: v for (n, v) in fetch_result.refs.items()
+                if n.startswith(b'refs/heads/')},
+            message=ref_message)
         r.refs.import_refs(
             b'refs/tags',
-            {n[len(b'refs/tags/'):]: v for (n, v) in remote_refs.items()
+            {n[len(b'refs/tags/'):]: v for (n, v) in fetch_result.refs.items()
                 if n.startswith(b'refs/tags/') and
-                not n.endswith(ANNOTATED_TAG_SUFFIX)})
-        if b"HEAD" in remote_refs and not bare:
-            # TODO(jelmer): Support symref capability,
-            # https://github.com/jelmer/dulwich/issues/485
-            r[b"HEAD"] = remote_refs[b"HEAD"]
+                not n.endswith(ANNOTATED_TAG_SUFFIX)},
+            message=ref_message)
         target_config = r.get_config()
         if not isinstance(source, bytes):
             source = source.encode(DEFAULT_ENCODING)
@@ -329,10 +341,18 @@ def clone(source, target=None, bare=False, checkout=None,
             (b'remote', origin), b'fetch',
             b'+refs/heads/*:refs/remotes/' + origin + b'/*')
         target_config.write_to_path()
-        if checkout and b"HEAD" in r.refs:
-            errstream.write(b'Checking out HEAD\n')
-            r.reset_index()
-    except:
+        # TODO(jelmer): Support symref capability,
+        # https://github.com/jelmer/dulwich/issues/485
+        try:
+            head = r[fetch_result.refs[b"HEAD"]]
+        except KeyError:
+            head = None
+        else:
+            r[b'HEAD'] = head.id
+        if checkout and not bare and head is not None:
+            errstream.write(b'Checking out ' + head.id + b'\n')
+            r.reset_index(head.tree)
+    except BaseException:
         r.close()
         raise
 
@@ -357,6 +377,8 @@ def add(repo=".", paths=None):
             paths = [paths]
         for p in paths:
             relpath = os.path.relpath(p, r.path)
+            if relpath.startswith('../'):
+                raise ValueError('path %r is not in repo' % relpath)
             # FIXME: Support patterns, directories.
             if ignore_manager.is_ignored(relpath):
                 ignored.add(relpath)
@@ -681,7 +703,7 @@ def tag_create(
         else:
             tag_id = object.id
 
-        r.refs[b'refs/tags/' + tag] = tag_id
+        r.refs[_make_tag_ref(tag)] = tag_id
 
 
 def list_tags(*args, **kwargs):
@@ -716,7 +738,7 @@ def tag_delete(repo, name):
         else:
             raise TypeError("Unexpected tag name type %r" % name)
         for name in names:
-            del r.refs[b"refs/tags/" + name]
+            del r.refs[_make_tag_ref(name)]
 
 
 def reset(repo, mode, treeish="HEAD"):
@@ -737,7 +759,7 @@ def reset(repo, mode, treeish="HEAD"):
 
 def push(repo, remote_location, refspecs,
          outstream=default_bytes_out_stream,
-         errstream=default_bytes_err_stream):
+         errstream=default_bytes_err_stream, **kwargs):
     """Remote push with dulwich via dulwich.client
 
     :param repo: Path to repository
@@ -752,7 +774,7 @@ def push(repo, remote_location, refspecs,
 
         # Get the client and path
         client, path = get_transport_and_path(
-                remote_location, config=r.get_config_stack())
+                remote_location, config=r.get_config_stack(), **kwargs)
 
         selected_refs = []
 
@@ -771,7 +793,8 @@ def push(repo, remote_location, refspecs,
         remote_location_bytes = client.get_url(path).encode(err_encoding)
         try:
             client.send_pack(
-                path, update_refs, r.object_store.generate_pack_contents,
+                path, update_refs,
+                generate_pack_data=r.object_store.generate_pack_data,
                 progress=errstream.write)
             errstream.write(
                 b"Push to " + remote_location_bytes + b" successful.\n")
@@ -783,7 +806,7 @@ def push(repo, remote_location, refspecs,
 
 def pull(repo, remote_location=None, refspecs=None,
          outstream=default_bytes_out_stream,
-         errstream=default_bytes_err_stream):
+         errstream=default_bytes_err_stream, **kwargs):
     """Pull from remote via dulwich.client
 
     :param repo: Path to repository
@@ -807,13 +830,13 @@ def pull(repo, remote_location=None, refspecs=None,
                 parse_reftuples(remote_refs, r.refs, refspecs))
             return [remote_refs[lh] for (lh, rh, force) in selected_refs]
         client, path = get_transport_and_path(
-                remote_location, config=r.get_config_stack())
-        remote_refs = client.fetch(
+                remote_location, config=r.get_config_stack(), **kwargs)
+        fetch_result = client.fetch(
             path, r, progress=errstream.write, determine_wants=determine_wants)
         for (lh, rh, force) in selected_refs:
-            r.refs[rh] = remote_refs[lh]
+            r.refs[rh] = fetch_result.refs[lh]
         if selected_refs:
-            r[b'HEAD'] = remote_refs[selected_refs[0][1]]
+            r[b'HEAD'] = fetch_result.refs[selected_refs[0][1]]
 
         # Perform 'git checkout .' - syncs staged changes
         tree = r[b"HEAD"].tree
@@ -988,6 +1011,18 @@ def receive_pack(path=".", inf=None, outf=None):
     return 0
 
 
+def _make_branch_ref(name):
+    if getattr(name, 'encode', None):
+        name = name.encode(DEFAULT_ENCODING)
+    return b"refs/heads/" + name
+
+
+def _make_tag_ref(name):
+    if getattr(name, 'encode', None):
+        name = name.encode(DEFAULT_ENCODING)
+    return b"refs/tags/" + name
+
+
 def branch_delete(repo, name):
     """Delete a branch.
 
@@ -995,14 +1030,12 @@ def branch_delete(repo, name):
     :param name: Name of the branch
     """
     with open_repo_closing(repo) as r:
-        if isinstance(name, bytes):
-            names = [name]
-        elif isinstance(name, list):
+        if isinstance(name, list):
             names = name
         else:
-            raise TypeError("Unexpected branch name type %r" % name)
+            names = [name]
         for name in names:
-            del r.refs[b"refs/heads/" + name]
+            del r.refs[_make_branch_ref(name)]
 
 
 def branch_create(repo, name, objectish=None, force=False):
@@ -1017,10 +1050,13 @@ def branch_create(repo, name, objectish=None, force=False):
         if objectish is None:
             objectish = "HEAD"
         object = parse_object(r, objectish)
-        refname = b"refs/heads/" + name
-        if refname in r.refs and not force:
-            raise KeyError("Branch with name %s already exists." % name)
-        r.refs[refname] = object.id
+        refname = _make_branch_ref(name)
+        ref_message = b"branch: Created from " + objectish.encode('utf-8')
+        if force:
+            r.refs.set_if_equals(refname, None, object.id, message=ref_message)
+        else:
+            if not r.refs.add_if_new(refname, object.id, message=ref_message):
+                raise KeyError("Branch with name %s already exists." % name)
 
 
 def branch_list(repo):
@@ -1032,31 +1068,36 @@ def branch_list(repo):
         return r.refs.keys(base=b"refs/heads/")
 
 
-def fetch(repo, remote_location, outstream=sys.stdout,
-          errstream=default_bytes_err_stream):
+def fetch(repo, remote_location, remote_name=b'origin', outstream=sys.stdout,
+          errstream=default_bytes_err_stream, **kwargs):
     """Fetch objects from a remote server.
 
     :param repo: Path to the repository
     :param remote_location: String identifying a remote server
+    :param remote_name: Name for remote server
     :param outstream: Output stream (defaults to stdout)
     :param errstream: Error stream (defaults to stderr)
     :return: Dictionary with refs on the remote
     """
     with open_repo_closing(repo) as r:
         client, path = get_transport_and_path(
-                remote_location, config=r.get_config_stack())
-        remote_refs = client.fetch(path, r, progress=errstream.write)
-    return remote_refs
+                remote_location, config=r.get_config_stack(), **kwargs)
+        fetch_result = client.fetch(path, r, progress=errstream.write)
+        ref_name = b'refs/remotes/' + remote_name
+        r.refs.import_refs(ref_name, strip_peeled_refs(fetch_result.refs))
+    return fetch_result.refs
 
 
-def ls_remote(remote):
+def ls_remote(remote, config=None, **kwargs):
     """List the refs in a remote.
 
     :param remote: Remote repository location
+    :param config: Configuration to use
     :return: Dictionary with remote refs
     """
-    config = StackedConfig.default()
-    client, host_path = get_transport_and_path(remote, config=config)
+    if config is None:
+        config = StackedConfig.default()
+    client, host_path = get_transport_and_path(remote, config=config, **kwargs)
     return client.get_refs(host_path)
 
 
@@ -1166,7 +1207,7 @@ def update_head(repo, target, detached=False, new_branch=None):
     """
     with open_repo_closing(repo) as r:
         if new_branch is not None:
-            to_set = b"refs/heads/" + new_branch.encode(DEFAULT_ENCODING)
+            to_set = _make_branch_ref(new_branch)
         else:
             to_set = b"HEAD"
         if detached:

+ 1 - 1
dulwich/reflog.py

@@ -48,7 +48,7 @@ def format_reflog_line(old_sha, new_sha, committer, timestamp, timezone,
     if old_sha is None:
         old_sha = ZERO_SHA
     return (old_sha + b' ' + new_sha + b' ' + committer + b' ' +
-            str(timestamp).encode('ascii') + b' ' +
+            str(int(timestamp)).encode('ascii') + b' ' +
             format_timezone(timezone) + b'\t' + message)
 
 

+ 94 - 25
dulwich/refs.py

@@ -94,11 +94,25 @@ def check_ref_format(refname):
 class RefsContainer(object):
     """A container for refs."""
 
-    def set_symbolic_ref(self, name, other):
+    def __init__(self, logger=None):
+        self._logger = logger
+
+    def _log(self, ref, old_sha, new_sha, committer=None, timestamp=None,
+             timezone=None, message=None):
+        if self._logger is None:
+            return
+        if message is None:
+            return
+        self._logger(ref, old_sha, new_sha, committer, timestamp,
+                     timezone, message)
+
+    def set_symbolic_ref(self, name, other, committer=None, timestamp=None,
+                         timezone=None, message=None):
         """Make a ref point at another ref.
 
         :param name: Name of the ref to set
         :param other: Name of the ref to point at
+        :param message: Optional message
         """
         raise NotImplementedError(self.set_symbolic_ref)
 
@@ -122,9 +136,11 @@ class RefsContainer(object):
         """
         return None
 
-    def import_refs(self, base, other):
+    def import_refs(self, base, other, committer=None, timestamp=None,
+                    timezone=None, message=None):
         for name, value in other.items():
-            self[b'/'.join((base, name))] = value
+            self.set_if_equals(b'/'.join((base, name)), None, value,
+                               message=message)
 
     def allkeys(self):
         """All refs present in this container."""
@@ -256,7 +272,8 @@ class RefsContainer(object):
             raise KeyError(name)
         return sha
 
-    def set_if_equals(self, name, old_ref, new_ref):
+    def set_if_equals(self, name, old_ref, new_ref, committer=None,
+                      timestamp=None, timezone=None, message=None):
         """Set a refname to new_ref only if it currently equals old_ref.
 
         This method follows all symbolic references if applicable for the
@@ -267,12 +284,18 @@ class RefsContainer(object):
         :param old_ref: The old sha the refname must refer to, or None to set
             unconditionally.
         :param new_ref: The new sha the refname will refer to.
+        :param message: Message for reflog
         :return: True if the set was successful, False otherwise.
         """
         raise NotImplementedError(self.set_if_equals)
 
     def add_if_new(self, name, ref):
-        """Add a new reference only if it does not already exist."""
+        """Add a new reference only if it does not already exist.
+
+        :param name: Ref name
+        :param ref: Ref value
+        :param message: Message for reflog
+        """
         raise NotImplementedError(self.add_if_new)
 
     def __setitem__(self, name, ref):
@@ -289,7 +312,8 @@ class RefsContainer(object):
         """
         self.set_if_equals(name, None, ref)
 
-    def remove_if_equals(self, name, old_ref):
+    def remove_if_equals(self, name, old_ref, committer=None,
+                         timestamp=None, timezone=None, message=None):
         """Remove a refname only if it currently equals old_ref.
 
         This method does not follow symbolic references, even if applicable for
@@ -299,6 +323,7 @@ class RefsContainer(object):
         :param name: The refname to delete.
         :param old_ref: The old sha the refname must refer to, or None to
             delete unconditionally.
+        :param message: Message for reflog
         :return: True if the delete was successful, False otherwise.
         """
         raise NotImplementedError(self.remove_if_equals)
@@ -340,7 +365,8 @@ class DictRefsContainer(RefsContainer):
     threadsafe.
     """
 
-    def __init__(self, refs):
+    def __init__(self, refs, logger=None):
+        super(DictRefsContainer, self).__init__(logger=logger)
         self._refs = refs
         self._peeled = {}
 
@@ -353,31 +379,46 @@ class DictRefsContainer(RefsContainer):
     def get_packed_refs(self):
         return {}
 
-    def set_symbolic_ref(self, name, other):
+    def set_symbolic_ref(self, name, other, committer=None,
+                         timestamp=None, timezone=None, message=None):
+        old = self.follow(name)[-1]
         self._refs[name] = SYMREF + other
+        self._log(name, old, old, committer=committer, timestamp=timestamp,
+                  timezone=timezone, message=message)
 
-    def set_if_equals(self, name, old_ref, new_ref):
+    def set_if_equals(self, name, old_ref, new_ref, committer=None,
+                      timestamp=None, timezone=None, message=None):
         if old_ref is not None and self._refs.get(name, ZERO_SHA) != old_ref:
             return False
         realnames, _ = self.follow(name)
         for realname in realnames:
             self._check_refname(realname)
+            old = self._refs.get(realname)
             self._refs[realname] = new_ref
+            self._log(realname, old, new_ref, committer=committer,
+                      timestamp=timestamp, timezone=timezone, message=message)
         return True
 
-    def add_if_new(self, name, ref):
+    def add_if_new(self, name, ref, committer=None, timestamp=None,
+                   timezone=None, message=None):
         if name in self._refs:
             return False
         self._refs[name] = ref
+        self._log(name, None, ref, committer=committer, timestamp=timestamp,
+                  timezone=timezone, message=message)
         return True
 
-    def remove_if_equals(self, name, old_ref):
+    def remove_if_equals(self, name, old_ref, committer=None, timestamp=None,
+                         timezone=None, message=None):
         if old_ref is not None and self._refs.get(name, ZERO_SHA) != old_ref:
             return False
         try:
-            del self._refs[name]
+            old = self._refs.pop(name)
         except KeyError:
             pass
+        else:
+            self._log(name, old, None, committer=committer,
+                      timestamp=timestamp, timezone=timezone, message=message)
         return True
 
     def get_peeled(self, name):
@@ -431,7 +472,8 @@ class InfoRefsContainer(RefsContainer):
 class DiskRefsContainer(RefsContainer):
     """Refs container that reads refs from disk."""
 
-    def __init__(self, path, worktree_path=None):
+    def __init__(self, path, worktree_path=None, logger=None):
+        super(DiskRefsContainer, self).__init__(logger=logger)
         self.path = path
         self.worktree_path = worktree_path or path
         self._packed_refs = None
@@ -589,11 +631,13 @@ class DiskRefsContainer(RefsContainer):
         finally:
             f.abort()
 
-    def set_symbolic_ref(self, name, other):
+    def set_symbolic_ref(self, name, other, committer=None, timestamp=None,
+                         timezone=None, message=None):
         """Make a ref point at another ref.
 
         :param name: Name of the ref to set
         :param other: Name of the ref to point at
+        :param message: Optional message to describe the change
         """
         self._check_refname(name)
         self._check_refname(other)
@@ -605,10 +649,16 @@ class DiskRefsContainer(RefsContainer):
             except (IOError, OSError):
                 f.abort()
                 raise
+            else:
+                sha = self.follow(name)[-1]
+                self._log(name, sha, sha, committer=committer,
+                          timestamp=timestamp, timezone=timezone,
+                          message=message)
         finally:
             f.close()
 
-    def set_if_equals(self, name, old_ref, new_ref):
+    def set_if_equals(self, name, old_ref, new_ref, committer=None,
+                      timestamp=None, timezone=None, message=None):
         """Set a refname to new_ref only if it currently equals old_ref.
 
         This method follows all symbolic references, and can be used to perform
@@ -618,6 +668,7 @@ class DiskRefsContainer(RefsContainer):
         :param old_ref: The old sha the refname must refer to, or None to set
             unconditionally.
         :param new_ref: The new sha the refname will refer to.
+        :param message: Set message for reflog
         :return: True if the set was successful, False otherwise.
         """
         self._check_refname(name)
@@ -647,9 +698,12 @@ class DiskRefsContainer(RefsContainer):
             except (OSError, IOError):
                 f.abort()
                 raise
+            self._log(realname, old_ref, new_ref, committer=committer,
+                      timestamp=timestamp, timezone=timezone, message=message)
         return True
 
-    def add_if_new(self, name, ref):
+    def add_if_new(self, name, ref, committer=None, timestamp=None,
+                   timezone=None, message=None):
         """Add a new reference only if it does not already exist.
 
         This method follows symrefs, and only ensures that the last ref in the
@@ -657,6 +711,7 @@ class DiskRefsContainer(RefsContainer):
 
         :param name: The refname to set.
         :param ref: The new sha the refname will refer to.
+        :param message: Optional message for reflog
         :return: True if the add was successful, False otherwise.
         """
         try:
@@ -678,9 +733,14 @@ class DiskRefsContainer(RefsContainer):
             except (OSError, IOError):
                 f.abort()
                 raise
+            else:
+                self._log(name, None, ref, committer=committer,
+                          timestamp=timestamp, timezone=timezone,
+                          message=message)
         return True
 
-    def remove_if_equals(self, name, old_ref):
+    def remove_if_equals(self, name, old_ref, committer=None, timestamp=None,
+                         timezone=None, message=None):
         """Remove a refname only if it currently equals old_ref.
 
         This method does not follow symbolic references. It can be used to
@@ -689,6 +749,7 @@ class DiskRefsContainer(RefsContainer):
         :param name: The refname to delete.
         :param old_ref: The old sha the refname must refer to, or None to
             delete unconditionally.
+        :param message: Optional message
         :return: True if the delete was successful, False otherwise.
         """
         self._check_refname(name)
@@ -709,6 +770,8 @@ class DiskRefsContainer(RefsContainer):
                 if e.errno != errno.ENOENT:
                     raise
             self._remove_packed_ref(name)
+            self._log(name, old_ref, None, committer=committer,
+                      timestamp=timestamp, timezone=timezone, message=message)
         finally:
             # never write, we just wanted the lock
             f.abort()
@@ -753,23 +816,23 @@ def read_packed_refs_with_peeled(f):
     :param f: file-like object to read from, seek'ed to the second line
     """
     last = None
-    for l in f:
-        if l[0] == b'#':
+    for line in f:
+        if line[0] == b'#':
             continue
-        l = l.rstrip(b'\r\n')
-        if l.startswith(b'^'):
+        line = line.rstrip(b'\r\n')
+        if line.startswith(b'^'):
             if not last:
                 raise PackedRefsException("unexpected peeled ref line")
-            if not valid_hexsha(l[1:]):
-                raise PackedRefsException("Invalid hex sha %r" % l[1:])
+            if not valid_hexsha(line[1:]):
+                raise PackedRefsException("Invalid hex sha %r" % line[1:])
             sha, name = _split_ref_line(last)
             last = None
-            yield (sha, name, l[1:])
+            yield (sha, name, line[1:])
         else:
             if last:
                 sha, name = _split_ref_line(last)
                 yield (sha, name, None)
-            last = l
+            last = line
     if last:
         sha, name = _split_ref_line(last)
         yield (sha, name, None)
@@ -819,3 +882,9 @@ def write_info_refs(refs, store):
 
 def is_local_branch(x):
     return x.startswith(b'refs/heads/')
+
+
+def strip_peeled_refs(refs):
+    """Remove all peeled refs"""
+    return {ref: sha for (ref, sha) in refs.items()
+            if not ref.endswith(ANNOTATED_TAG_SUFFIX)}

+ 107 - 26
dulwich/repo.py

@@ -33,6 +33,7 @@ import errno
 import os
 import sys
 import stat
+import time
 
 from dulwich.errors import (
     NoIndexPresent,
@@ -106,6 +107,27 @@ BASE_DIRECTORIES = [
 DEFAULT_REF = b'refs/heads/master'
 
 
+class InvalidUserIdentity(Exception):
+    """User identity is not of the format 'user <email>'"""
+
+    def __init__(self, identity):
+        self.identity = identity
+
+
+def check_user_identity(identity):
+    """Verify that a user identity is formatted correctly.
+
+    :param identity: User identity bytestring
+    :raise InvalidUserIdentity: Raised when identity is invalid
+    """
+    try:
+        fst, snd = identity.split(b' <', 1)
+    except ValueError:
+        raise InvalidUserIdentity(identity)
+    if b'>' not in snd:
+        raise InvalidUserIdentity(identity)
+
+
 def parse_graftpoints(graftpoints):
     """Convert a list of graftpoints into a dict
 
@@ -194,14 +216,14 @@ class BaseRepo(object):
         self._put_named_file('description', b"Unnamed repository")
         f = BytesIO()
         cf = ConfigFile()
-        cf.set(b"core", b"repositoryformatversion", b"0")
+        cf.set("core", "repositoryformatversion", "0")
         if self._determine_file_mode():
-            cf.set(b"core", b"filemode", True)
+            cf.set("core", "filemode", True)
         else:
-            cf.set(b"core", b"filemode", False)
+            cf.set("core", "filemode", False)
 
-        cf.set(b"core", b"bare", bare)
-        cf.set(b"core", b"logallrefupdates", True)
+        cf.set("core", "bare", bare)
+        cf.set("core", "logallrefupdates", True)
         cf.write_to_file(f)
         self._put_named_file('config', f.getvalue())
         self._put_named_file(os.path.join('info', 'exclude'), b'')
@@ -516,9 +538,28 @@ class BaseRepo(object):
     def _get_user_identity(self):
         """Determine the identity to use for new commits.
         """
+        user = os.environ.get("GIT_COMMITTER_NAME")
+        email = os.environ.get("GIT_COMMITTER_EMAIL")
         config = self.get_config_stack()
-        return (config.get((b"user", ), b"name") + b" <" +
-                config.get((b"user", ), b"email") + b">")
+        if user is None:
+            try:
+                user = config.get(("user", ), "name")
+            except KeyError:
+                user = None
+        if email is None:
+            try:
+                email = config.get(("user", ), "email")
+            except KeyError:
+                email = None
+        if user is None:
+            import getpass
+            user = getpass.getuser().encode(sys.getdefaultencoding())
+        if email is None:
+            import getpass
+            import socket
+            email = ("{}@{}".format(getpass.getuser(), socket.gethostname())
+                     .encode(sys.getdefaultencoding()))
+        return (user + b" <" + email + b">")
 
     def _add_graftpoints(self, updated_graftpoints):
         """Add or modify graftpoints
@@ -585,9 +626,8 @@ class BaseRepo(object):
             # FIXME: Read merge heads from .git/MERGE_HEADS
             merge_heads = []
         if committer is None:
-            # FIXME: Support GIT_COMMITTER_NAME/GIT_COMMITTER_EMAIL environment
-            # variables
             committer = self._get_user_identity()
+        check_user_identity(committer)
         c.committer = committer
         if commit_timestamp is None:
             # FIXME: Support GIT_COMMITTER_DATE environment variable
@@ -602,6 +642,7 @@ class BaseRepo(object):
             # variables
             author = committer
         c.author = author
+        check_user_identity(author)
         if author_timestamp is None:
             # FIXME: Support GIT_AUTHOR_DATE environment variable
             author_timestamp = commit_timestamp
@@ -633,11 +674,17 @@ class BaseRepo(object):
                 old_head = self.refs[ref]
                 c.parents = [old_head] + merge_heads
                 self.object_store.add_object(c)
-                ok = self.refs.set_if_equals(ref, old_head, c.id)
+                ok = self.refs.set_if_equals(
+                    ref, old_head, c.id, message=b"commit: " + message,
+                    committer=committer, timestamp=commit_timestamp,
+                    timezone=commit_timezone)
             except KeyError:
                 c.parents = merge_heads
                 self.object_store.add_object(c)
-                ok = self.refs.add_if_new(ref, c.id)
+                ok = self.refs.add_if_new(
+                        ref, c.id, message=b"commit: " + message,
+                        committer=committer, timestamp=commit_timestamp,
+                        timezone=commit_timezone)
             if not ok:
                 # Fail if the atomic compare-and-swap failed, leaving the
                 # commit and all its objects as garbage.
@@ -707,7 +754,8 @@ class Repo(BaseRepo):
         self.path = root
         object_store = DiskObjectStore(
             os.path.join(self.commondir(), OBJECTDIR))
-        refs = DiskRefsContainer(self.commondir(), self._controldir)
+        refs = DiskRefsContainer(self.commondir(), self._controldir,
+                                 logger=self._write_reflog)
         BaseRepo.__init__(self, object_store, refs)
 
         self._graftpoints = {}
@@ -726,6 +774,28 @@ class Repo(BaseRepo):
         self.hooks['commit-msg'] = CommitMsgShellHook(self.controldir())
         self.hooks['post-commit'] = PostCommitShellHook(self.controldir())
 
+    def _write_reflog(self, ref, old_sha, new_sha, committer, timestamp,
+                      timezone, message):
+        from .reflog import format_reflog_line
+        path = os.path.join(
+                self.controldir(), 'logs',
+                ref.decode(sys.getfilesystemencoding()))
+        try:
+            os.makedirs(os.path.dirname(path))
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+        if committer is None:
+            committer = self._get_user_identity()
+        check_user_identity(committer)
+        if timestamp is None:
+            timestamp = int(time.time())
+        if timezone is None:
+            timezone = 0  # FIXME
+        with open(path, 'ab') as f:
+            f.write(format_reflog_line(old_sha, new_sha, committer,
+                    timestamp, timezone, message) + b'\n')
+
     @classmethod
     def discover(cls, start='.'):
         """Iterate parent directories to discover a repository
@@ -896,27 +966,33 @@ class Repo(BaseRepo):
         else:
             target = self.init_bare(target_path, mkdir=mkdir)
         self.fetch(target)
+        encoded_path = self.path
+        if not isinstance(encoded_path, bytes):
+            encoded_path = encoded_path.encode(sys.getfilesystemencoding())
+        ref_message = b"clone: from " + encoded_path
         target.refs.import_refs(
-            b'refs/remotes/' + origin, self.refs.as_dict(b'refs/heads'))
+            b'refs/remotes/' + origin, self.refs.as_dict(b'refs/heads'),
+            message=ref_message)
         target.refs.import_refs(
-            b'refs/tags', self.refs.as_dict(b'refs/tags'))
+            b'refs/tags', self.refs.as_dict(b'refs/tags'),
+            message=ref_message)
         try:
-            target.refs.add_if_new(DEFAULT_REF, self.refs[DEFAULT_REF])
+            target.refs.add_if_new(
+                    DEFAULT_REF, self.refs[DEFAULT_REF],
+                    message=ref_message)
         except KeyError:
             pass
         target_config = target.get_config()
-        encoded_path = self.path
-        if not isinstance(encoded_path, bytes):
-            encoded_path = encoded_path.encode(sys.getfilesystemencoding())
-        target_config.set((b'remote', b'origin'), b'url', encoded_path)
-        target_config.set((b'remote', b'origin'), b'fetch',
-                          b'+refs/heads/*:refs/remotes/origin/*')
+        target_config.set(('remote', 'origin'), 'url', encoded_path)
+        target_config.set(('remote', 'origin'), 'fetch',
+                          '+refs/heads/*:refs/remotes/origin/*')
         target_config.write_to_path()
 
         # Update target head
         head_chain, head_sha = self.refs.follow(b'HEAD')
         if head_chain and head_sha is not None:
-            target.refs.set_symbolic_ref(b'HEAD', head_chain[-1])
+            target.refs.set_symbolic_ref(b'HEAD', head_chain[-1],
+                                         message=ref_message)
             target[b'HEAD'] = head_sha
 
             if not bare:
@@ -939,8 +1015,8 @@ class Repo(BaseRepo):
             tree = self[b'HEAD'].tree
         config = self.get_config()
         honor_filemode = config.get_boolean(
-            'core', 'filemode', os.name != "nt")
-        if config.get_boolean('core', 'core.protectNTFS', os.name == "nt"):
+            b'core', b'filemode', os.name != "nt")
+        if config.get_boolean(b'core', b'core.protectNTFS', os.name == "nt"):
             validate_path_element = validate_path_element_ntfs
         else:
             validate_path_element = validate_path_element_default
@@ -1092,12 +1168,17 @@ class MemoryRepo(BaseRepo):
 
     def __init__(self):
         from dulwich.config import ConfigFile
-        BaseRepo.__init__(self, MemoryObjectStore(), DictRefsContainer({}))
+        self._reflog = []
+        refs_container = DictRefsContainer({}, logger=self._append_reflog)
+        BaseRepo.__init__(self, MemoryObjectStore(), refs_container)
         self._named_files = {}
         self.bare = True
         self._config = ConfigFile()
         self._description = None
 
+    def _append_reflog(self, *args):
+        self._reflog.append(args)
+
     def set_description(self, description):
         self._description = description
 
@@ -1161,6 +1242,6 @@ class MemoryRepo(BaseRepo):
         for obj in objects:
             ret.object_store.add_object(obj)
         for refname, sha in refs.items():
-            ret.refs[refname] = sha
+            ret.refs.add_if_new(refname, sha)
         ret._init_files(bare=True)
         return ret

+ 7 - 6
dulwich/tests/compat/test_client.py

@@ -105,7 +105,7 @@ class DulwichClientTestBase(object):
             sendrefs = dict(src.get_refs())
             del sendrefs[b'HEAD']
             c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
-                        src.object_store.generate_pack_contents)
+                        src.object_store.generate_pack_data)
 
     def test_send_pack(self):
         self._do_send_pack()
@@ -125,7 +125,7 @@ class DulwichClientTestBase(object):
             sendrefs = dict(src.get_refs())
             del sendrefs[b'HEAD']
             c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
-                        src.object_store.generate_pack_contents)
+                        src.object_store.generate_pack_data)
             self.assertDestEqualsSrc()
 
     def make_dummy_commit(self, dest):
@@ -152,7 +152,7 @@ class DulwichClientTestBase(object):
     def compute_send(self, src):
         sendrefs = dict(src.get_refs())
         del sendrefs[b'HEAD']
-        return sendrefs, src.object_store.generate_pack_contents
+        return sendrefs, src.object_store.generate_pack_data
 
     def test_send_pack_one_error(self):
         dest, dummy_commit = self.disable_ff_and_make_dummy_commit()
@@ -251,8 +251,8 @@ class DulwichClientTestBase(object):
             sendrefs[b'refs/heads/abranch'] = b"00" * 20
             del sendrefs[b'HEAD']
 
-            def gen_pack(have, want):
-                return []
+            def gen_pack(have, want, ofs_delta=False):
+                return 0, []
             c = self._client()
             self.assertEqual(dest.refs[b"refs/heads/abranch"], dummy_commit)
             c.send_pack(
@@ -327,7 +327,8 @@ class DulwichTCPClientTest(CompatTestCase, DulwichClientTestBase):
 class TestSSHVendor(object):
 
     @staticmethod
-    def run_command(host, command, username=None, port=None):
+    def run_command(host, command, username=None, port=None,
+                    password=None, key_filename=None):
         cmd, path = command.split(' ')
         cmd = cmd.split('-', 1)
         path = path.replace("'", "")

+ 35 - 4
dulwich/tests/test_archive.py

@@ -22,6 +22,8 @@
 
 from io import BytesIO
 import tarfile
+import struct
+from unittest import skipUnless
 
 from dulwich.archive import tar_stream
 from dulwich.object_store import (
@@ -38,6 +40,11 @@ from dulwich.tests.utils import (
     build_commit_graph,
     )
 
+try:
+    from mock import patch
+except ImportError:
+    patch = None
+
 
 class ArchiveTests(TestCase):
 
@@ -51,15 +58,39 @@ class ArchiveTests(TestCase):
         self.addCleanup(tf.close)
         self.assertEqual([], tf.getnames())
 
-    def test_simple(self):
+    def _get_example_tar_stream(self, *tar_stream_args, **tar_stream_kwargs):
         store = MemoryObjectStore()
         b1 = Blob.from_string(b"somedata")
         store.add_object(b1)
         t1 = Tree()
         t1.add(b"somename", 0o100644, b1.id)
         store.add_object(t1)
-        stream = b''.join(tar_stream(store, t1, 10))
-        out = BytesIO(stream)
-        tf = tarfile.TarFile(fileobj=out)
+        stream = b''.join(
+            tar_stream(store, t1, *tar_stream_args, **tar_stream_kwargs))
+        return BytesIO(stream)
+
+    def test_simple(self):
+        stream = self._get_example_tar_stream(mtime=0)
+        tf = tarfile.TarFile(fileobj=stream)
         self.addCleanup(tf.close)
         self.assertEqual(["somename"], tf.getnames())
+
+    def test_gzip_mtime(self):
+        stream = self._get_example_tar_stream(mtime=1234, format='gz')
+        expected_mtime = struct.pack('<L', 1234)
+        self.assertEqual(stream.getvalue()[4:8], expected_mtime)
+
+    @skipUnless(patch, "Required mock.patch")
+    def test_same_file(self):
+        contents = [None, None]
+        for format in ['', 'gz', 'bz2']:
+            for i in [0, 1]:
+                with patch('time.time', return_value=i):
+                    stream = self._get_example_tar_stream(
+                        mtime=0, format=format)
+                    contents[i] = stream.getvalue()
+            self.assertEqual(
+                contents[0],
+                contents[1],
+                "Different file contents for format %r" % format
+                )

+ 212 - 59
dulwich/tests/test_client.py

@@ -19,14 +19,11 @@
 #
 
 from io import BytesIO
+import base64
 import sys
 import shutil
 import tempfile
-
-try:
-    import urllib2
-except ImportError:
-    import urllib.request as urllib2
+import warnings
 
 try:
     from urllib import quote as urlquote
@@ -38,6 +35,9 @@ try:
 except ImportError:
     import urllib.parse as urlparse
 
+import certifi
+import urllib3
+
 import dulwich
 from dulwich import (
     client,
@@ -52,8 +52,9 @@ from dulwich.client import (
     SendPackError,
     StrangeHostname,
     SubprocessSSHVendor,
+    PuttySSHVendor,
     UpdateRefsError,
-    default_urllib2_opener,
+    default_urllib3_manager,
     get_transport_and_path,
     get_transport_and_path_from_url,
     )
@@ -68,6 +69,8 @@ from dulwich.protocol import (
     Protocol,
     )
 from dulwich.pack import (
+    pack_objects_to_data,
+    write_pack_data,
     write_pack_objects,
     )
 from dulwich.objects import (
@@ -82,6 +85,7 @@ from dulwich.tests import skipIf
 from dulwich.tests.utils import (
     open_repo,
     tear_down_repo,
+    setup_warning_catcher,
     )
 
 
@@ -97,6 +101,23 @@ class DummyClient(TraditionalGitClient):
         return Protocol(self.read, self.write), self.can_read
 
 
+class DummyPopen():
+
+    def __init__(self, *args, **kwards):
+        self.stdin = BytesIO(b"stdin")
+        self.stdout = BytesIO(b"stdout")
+        self.stderr = BytesIO(b"stderr")
+        self.returncode = 0
+        self.args = args
+        self.kwargs = kwards
+
+    def communicate(self, *args, **kwards):
+        return ('Running', '')
+
+    def wait(self, *args, **kwards):
+        return False
+
+
 # TODO(durin42): add unit-level tests of GitClient
 class GitClientTests(TestCase):
 
@@ -131,10 +152,10 @@ class GitClientTests(TestCase):
         self.rin.seek(0)
 
         def check_heads(heads):
-            self.assertIs(heads, None)
+            self.assertEqual(heads, {})
             return []
         ret = self.client.fetch_pack(b'/', check_heads, None, None)
-        self.assertIs(None, ret.refs)
+        self.assertEqual({}, ret.refs)
         self.assertEqual({}, ret.symrefs)
 
     def test_fetch_pack_ignores_magic_ref(self):
@@ -147,10 +168,10 @@ class GitClientTests(TestCase):
         self.rin.seek(0)
 
         def check_heads(heads):
-            self.assertEquals({}, heads)
+            self.assertEqual({}, heads)
             return []
         ret = self.client.fetch_pack(b'bla', check_heads, None, None, None)
-        self.assertIs(None, ret.refs)
+        self.assertEqual({}, ret.refs)
         self.assertEqual({}, ret.symrefs)
         self.assertEqual(self.rout.getvalue(), b'0000')
 
@@ -198,12 +219,12 @@ class GitClientTests(TestCase):
         def determine_wants(refs):
             return {b'refs/foo/bar': commit.id, }
 
-        def generate_pack_contents(have, want):
-            return [(commit, None), (tree, ''), ]
+        def generate_pack_data(have, want, ofs_delta=False):
+            return pack_objects_to_data([(commit, None), (tree, ''), ])
 
         self.assertRaises(UpdateRefsError,
                           self.client.send_pack, "blah",
-                          determine_wants, generate_pack_contents)
+                          determine_wants, generate_pack_data)
 
     def test_send_pack_none(self):
         self.rin.write(
@@ -219,10 +240,10 @@ class GitClientTests(TestCase):
                     b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
             }
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
-        self.client.send_pack(b'/', determine_wants, generate_pack_contents)
+        self.client.send_pack(b'/', determine_wants, generate_pack_data)
         self.assertEqual(self.rout.getvalue(), b'0000')
 
     def test_send_pack_keep_and_delete(self):
@@ -238,10 +259,10 @@ class GitClientTests(TestCase):
         def determine_wants(refs):
             return {b'refs/heads/master': b'0' * 40}
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
-        self.client.send_pack(b'/', determine_wants, generate_pack_contents)
+        self.client.send_pack(b'/', determine_wants, generate_pack_data)
         self.assertIn(
             self.rout.getvalue(),
             [b'007f310ca9477129b8586fa2afc779c1f57cf64bba6c '
@@ -263,10 +284,10 @@ class GitClientTests(TestCase):
         def determine_wants(refs):
             return {b'refs/heads/master': b'0' * 40}
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
-        self.client.send_pack(b'/', determine_wants, generate_pack_contents)
+        self.client.send_pack(b'/', determine_wants, generate_pack_data)
         self.assertIn(
             self.rout.getvalue(),
             [b'007f310ca9477129b8586fa2afc779c1f57cf64bba6c '
@@ -293,12 +314,12 @@ class GitClientTests(TestCase):
                     b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
             }
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
         f = BytesIO()
         write_pack_objects(f, {})
-        self.client.send_pack('/', determine_wants, generate_pack_contents)
+        self.client.send_pack('/', determine_wants, generate_pack_data)
         self.assertIn(
             self.rout.getvalue(),
             [b'007f0000000000000000000000000000000000000000 '
@@ -336,12 +357,12 @@ class GitClientTests(TestCase):
                     b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
             }
 
-        def generate_pack_contents(have, want):
-            return [(commit, None), (tree, b''), ]
+        def generate_pack_data(have, want, ofs_delta=False):
+            return pack_objects_to_data([(commit, None), (tree, b''), ])
 
         f = BytesIO()
-        write_pack_objects(f, generate_pack_contents(None, None))
-        self.client.send_pack(b'/', determine_wants, generate_pack_contents)
+        write_pack_data(f, *generate_pack_data(None, None))
+        self.client.send_pack(b'/', determine_wants, generate_pack_data)
         self.assertIn(
             self.rout.getvalue(),
             [b'007f0000000000000000000000000000000000000000 ' + commit.id +
@@ -366,12 +387,12 @@ class GitClientTests(TestCase):
         def determine_wants(refs):
             return {b'refs/heads/master': b'0' * 40}
 
-        def generate_pack_contents(have, want):
-            return {}
+        def generate_pack_data(have, want, ofs_delta=False):
+            return 0, []
 
         self.assertRaises(UpdateRefsError,
                           self.client.send_pack, b"/",
-                          determine_wants, generate_pack_contents)
+                          determine_wants, generate_pack_data)
         self.assertEqual(self.rout.getvalue(), b'0000')
 
 
@@ -629,12 +650,17 @@ class TestSSHVendor(object):
         self.command = ""
         self.username = None
         self.port = None
+        self.password = None
+        self.key_filename = None
 
-    def run_command(self, host, command, username=None, port=None):
+    def run_command(self, host, command, username=None, port=None,
+                    password=None, key_filename=None):
         self.host = host
         self.command = command
         self.username = username
         self.port = port
+        self.password = password
+        self.key_filename = key_filename
 
         class Subprocess:
             pass
@@ -747,7 +773,7 @@ class LocalGitClientTests(TestCase):
         t = MemoryRepo()
         s = open_repo('a.git')
         self.addCleanup(tear_down_repo, s)
-        self.assertEqual(s.get_refs(), c.fetch(s.path, t))
+        self.assertEqual(s.get_refs(), c.fetch(s.path, t).refs)
 
     def test_fetch_empty(self):
         c = LocalGitClient()
@@ -826,7 +852,7 @@ class LocalGitClientTests(TestCase):
         ref_name = b"refs/heads/" + branch
         new_refs = client.send_pack(target.path,
                                     lambda _: {ref_name: local.refs[ref_name]},
-                                    local.object_store.generate_pack_contents)
+                                    local.object_store.generate_pack_data)
 
         self.assertEqual(local.refs[ref_name], new_refs[ref_name])
 
@@ -837,6 +863,14 @@ class LocalGitClientTests(TestCase):
 
 class HttpGitClientTests(TestCase):
 
+    @staticmethod
+    def b64encode(s):
+        """Python 2/3 compatible Base64 encoder. Returns string."""
+        try:
+            return base64.b64encode(s)
+        except TypeError:
+            return base64.b64encode(s.encode('latin1')).decode('ascii')
+
     def test_get_url(self):
         base_url = 'https://github.com/jelmer/dulwich'
         path = '/jelmer/dulwich'
@@ -867,13 +901,12 @@ class HttpGitClientTests(TestCase):
         c = HttpGitClient(url, config=None, username='user', password='passwd')
         self.assertEqual('user', c._username)
         self.assertEqual('passwd', c._password)
-        [pw_handler] = [
-            h for h in c.opener.handlers
-            if getattr(h, 'passwd', None) is not None]
-        self.assertEqual(
-            ('user', 'passwd'),
-            pw_handler.passwd.find_user_password(
-                None, 'https://github.com/jelmer/dulwich'))
+
+        basic_auth = c.pool_manager.headers['authorization']
+        auth_string = '%s:%s' % ('user', 'passwd')
+        b64_credentials = self.b64encode(auth_string)
+        expected_basic_auth = 'Basic %s' % b64_credentials
+        self.assertEqual(basic_auth, expected_basic_auth)
 
     def test_init_no_username_passwd(self):
         url = 'https://github.com/jelmer/dulwich'
@@ -881,10 +914,7 @@ class HttpGitClientTests(TestCase):
         c = HttpGitClient(url, config=None)
         self.assertIs(None, c._username)
         self.assertIs(None, c._password)
-        pw_handler = [
-            h for h in c.opener.handlers
-            if getattr(h, 'passwd', None) is not None]
-        self.assertEqual(0, len(pw_handler))
+        self.assertNotIn('authorization', c.pool_manager.headers)
 
     def test_from_parsedurl_on_url_with_quoted_credentials(self):
         original_username = 'john|the|first'
@@ -901,13 +931,12 @@ class HttpGitClientTests(TestCase):
         c = HttpGitClient.from_parsedurl(urlparse.urlparse(url))
         self.assertEqual(original_username, c._username)
         self.assertEqual(original_password, c._password)
-        [pw_handler] = [
-            h for h in c.opener.handlers
-            if getattr(h, 'passwd', None) is not None]
-        self.assertEqual(
-            (original_username, original_password),
-            pw_handler.passwd.find_user_password(
-                None, 'https://github.com/jelmer/dulwich'))
+
+        basic_auth = c.pool_manager.headers['authorization']
+        auth_string = '%s:%s' % (original_username, original_password)
+        b64_credentials = self.b64encode(auth_string)
+        expected_basic_auth = 'Basic %s' % str(b64_credentials)
+        self.assertEqual(basic_auth, expected_basic_auth)
 
 
 class TCPGitClientTests(TestCase):
@@ -930,25 +959,149 @@ class TCPGitClientTests(TestCase):
         self.assertEqual('git://github.com:9090/jelmer/dulwich', url)
 
 
-class DefaultUrllib2OpenerTest(TestCase):
+class DefaultUrllib3ManagerTest(TestCase):
+
+    def assert_verify_ssl(self, manager, assertion=True):
+        pool_keywords = tuple(manager.connection_pool_kw.items())
+        assert_method = self.assertIn if assertion else self.assertNotIn
+        assert_method(('cert_reqs', 'CERT_REQUIRED'), pool_keywords)
+        assert_method(('ca_certs', certifi.where()), pool_keywords)
 
     def test_no_config(self):
-        default_urllib2_opener(config=None)
+        manager = default_urllib3_manager(config=None)
+        self.assert_verify_ssl(manager)
 
     def test_config_no_proxy(self):
-        default_urllib2_opener(config=ConfigDict())
+        manager = default_urllib3_manager(config=ConfigDict())
+        self.assert_verify_ssl(manager)
 
     def test_config_proxy(self):
         config = ConfigDict()
         config.set(b'http', b'proxy', b'http://localhost:3128/')
-        opener = default_urllib2_opener(config=config)
-        self.assertIn(urllib2.ProxyHandler,
-                      list(map(lambda x: x.__class__, opener.handlers)))
+        manager = default_urllib3_manager(config=config)
+
+        self.assertIsInstance(manager, urllib3.ProxyManager)
+        self.assertTrue(hasattr(manager, 'proxy'))
+        self.assertEqual(manager.proxy.scheme, 'http')
+        self.assertEqual(manager.proxy.host, 'localhost')
+        self.assertEqual(manager.proxy.port, 3128)
+        self.assert_verify_ssl(manager)
+
+    def test_config_no_verify_ssl(self):
+        manager = default_urllib3_manager(config=None, verify_ssl=False)
+        self.assert_verify_ssl(manager, assertion=False)
 
 
 class SubprocessSSHVendorTests(TestCase):
 
+    def setUp(self):
+        # Monkey Patch client subprocess popen
+        self._orig_popen = dulwich.client.subprocess.Popen
+        dulwich.client.subprocess.Popen = DummyPopen
+
+    def tearDown(self):
+        dulwich.client.subprocess.Popen = self._orig_popen
+
     def test_run_command_dashes(self):
         vendor = SubprocessSSHVendor()
         self.assertRaises(StrangeHostname, vendor.run_command, '--weird-host',
                           'git-clone-url')
+
+    def test_run_command_password(self):
+        vendor = SubprocessSSHVendor()
+        self.assertRaises(NotImplementedError, vendor.run_command, 'host',
+                          'git-clone-url', password='12345')
+
+    def test_run_command_password_and_privkey(self):
+        vendor = SubprocessSSHVendor()
+        self.assertRaises(NotImplementedError, vendor.run_command,
+                          'host', 'git-clone-url',
+                          password='12345', key_filename='/tmp/id_rsa')
+
+    def test_run_command_with_port_username_and_privkey(self):
+        expected = ['ssh', '-x', '-p', '2200',
+                    '-i', '/tmp/id_rsa', 'user@host', 'git-clone-url']
+
+        vendor = SubprocessSSHVendor()
+        command = vendor.run_command(
+            'host', 'git-clone-url',
+            username='user', port='2200',
+            key_filename='/tmp/id_rsa')
+
+        args = command.proc.args
+
+        self.assertListEqual(expected, args[0])
+
+
+class PuttySSHVendorTests(TestCase):
+
+    def setUp(self):
+        # Monkey Patch client subprocess popen
+        self._orig_popen = dulwich.client.subprocess.Popen
+        dulwich.client.subprocess.Popen = DummyPopen
+
+    def tearDown(self):
+        dulwich.client.subprocess.Popen = self._orig_popen
+
+    def test_run_command_dashes(self):
+        vendor = PuttySSHVendor()
+        self.assertRaises(StrangeHostname, vendor.run_command, '--weird-host',
+                          'git-clone-url')
+
+    def test_run_command_password_and_privkey(self):
+        vendor = PuttySSHVendor()
+        self.assertRaises(NotImplementedError, vendor.run_command,
+                          'host', 'git-clone-url',
+                          password='12345', key_filename='/tmp/id_rsa')
+
+    def test_run_command_password(self):
+        if sys.platform == 'win32':
+            binary = ['putty.exe', '-ssh']
+        else:
+            binary = ['putty', '-ssh']
+        expected = binary + ['-pw', '12345', 'host', 'git-clone-url']
+
+        vendor = PuttySSHVendor()
+
+        warnings.simplefilter("always", UserWarning)
+        self.addCleanup(warnings.resetwarnings)
+        warnings_list, restore_warnings = setup_warning_catcher()
+        self.addCleanup(restore_warnings)
+
+        command = vendor.run_command('host', 'git-clone-url', password='12345')
+
+        expected_warning = UserWarning(
+            'Invoking Putty with a password exposes the password in the '
+            'process list.')
+
+        for w in warnings_list:
+            if (type(w) == type(expected_warning) and
+                    w.args == expected_warning.args):
+                break
+        else:
+            raise AssertionError(
+                'Expected warning %r not in %r' %
+                (expected_warning, warnings_list))
+
+        args = command.proc.args
+
+        self.assertListEqual(expected, args[0])
+
+    def test_run_command_with_port_username_and_privkey(self):
+        if sys.platform == 'win32':
+            binary = ['putty.exe', '-ssh']
+        else:
+            binary = ['putty', '-ssh']
+        expected = binary + [
+            '-P', '2200', '-i', '/tmp/id_rsa',
+            'user@host', 'git-clone-url']
+
+        vendor = PuttySSHVendor()
+        command = vendor.run_command(
+            'host', 'git-clone-url',
+            username='user', port='2200',
+            key_filename='/tmp/id_rsa')
+
+        args = command.proc.args
+
+        self.assertListEqual(expected, args[0])

+ 20 - 2
dulwich/tests/test_config.py

@@ -81,21 +81,39 @@ class ConfigFileTests(TestCase):
         cf = self.from_file(b"[section]\nbar= foo # a comment\n")
         self.assertEqual(ConfigFile({(b"section", ): {b"bar": b"foo"}}), cf)
 
+    def test_comment_character_within_value_string(self):
+        cf = self.from_file(b"[section]\nbar= \"foo#bar\"\n")
+        self.assertEqual(
+            ConfigFile({(b"section", ): {b"bar": b"foo#bar"}}), cf)
+
+    def test_comment_character_within_section_string(self):
+        cf = self.from_file(b"[branch \"foo#bar\"] # a comment\nbar= foo\n")
+        self.assertEqual(
+            ConfigFile({(b"branch", b"foo#bar"): {b"bar": b"foo"}}), cf)
+
     def test_from_file_section(self):
         cf = self.from_file(b"[core]\nfoo = bar\n")
         self.assertEqual(b"bar", cf.get((b"core", ), b"foo"))
         self.assertEqual(b"bar", cf.get((b"core", b"foo"), b"foo"))
 
-    def test_from_file_section_case_insensitive(self):
+    def test_from_file_section_case_insensitive_lower(self):
         cf = self.from_file(b"[cOre]\nfOo = bar\n")
         self.assertEqual(b"bar", cf.get((b"core", ), b"foo"))
         self.assertEqual(b"bar", cf.get((b"core", b"foo"), b"foo"))
 
+    def test_from_file_section_case_insensitive_mixed(self):
+        cf = self.from_file(b"[cOre]\nfOo = bar\n")
+        self.assertEqual(b"bar", cf.get((b"core", ), b"fOo"))
+        self.assertEqual(b"bar", cf.get((b"cOre", b"fOo"), b"fOo"))
+
     def test_from_file_with_mixed_quoted(self):
         cf = self.from_file(b"[core]\nfoo = \"bar\"la\n")
         self.assertEqual(b"barla", cf.get((b"core", ), b"foo"))
 
-    def test_from_file_with_open_quoted(self):
+    def test_from_file_section_with_open_brackets(self):
+        self.assertRaises(ValueError, self.from_file, b"[core\nfoo = bar\n")
+
+    def test_from_file_value_with_open_quoted(self):
         self.assertRaises(ValueError, self.from_file, b"[core]\nfoo = \"bar\n")
 
     def test_from_file_with_quotes(self):

+ 10 - 0
dulwich/tests/test_diff_tree.py

@@ -220,6 +220,16 @@ class TreeChangesTest(DiffTestCase):
              TreeChange.add((b'a', 0o120000, blob_a2.id))],
             tree1, tree2)
 
+    def test_tree_changes_change_type_same(self):
+        blob_a1 = make_object(Blob, data=b'a')
+        blob_a2 = make_object(Blob, data=b'/foo/bar')
+        tree1 = self.commit_tree([(b'a', blob_a1, 0o100644)])
+        tree2 = self.commit_tree([(b'a', blob_a2, 0o120000)])
+        self.assertChangesEqual(
+            [TreeChange(CHANGE_MODIFY, (b'a', 0o100644, blob_a1.id),
+                        (b'a', 0o120000, blob_a2.id))],
+            tree1, tree2, change_type_same=True)
+
     def test_tree_changes_to_tree(self):
         blob_a = make_object(Blob, data=b'a')
         blob_x = make_object(Blob, data=b'x')

+ 18 - 0
dulwich/tests/test_fastexport.py

@@ -144,6 +144,24 @@ class GitImportProcessorTests(TestCase):
         self.assertEqual(3600, commit.author_timezone)
         self.assertEqual(commit, self.repo[b"refs/heads/foo"])
 
+    def test_commit_handler_markers(self):
+        from fastimport import commands
+        [c1, c2, c3] = build_commit_graph(self.repo.object_store,
+                                          [[1], [2], [3]])
+        self.processor.markers[b'10'] = c1.id
+        self.processor.markers[b'42'] = c2.id
+        self.processor.markers[b'98'] = c3.id
+        cmd = commands.CommitCommand(
+                b"refs/heads/foo",  b"mrkr",
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                b"FOO", b':10', [b':42', b':98'], [])
+        self.processor.commit_handler(cmd)
+        commit = self.repo[self.processor.last_commit]
+        self.assertEqual(c1.id, commit.parents[0])
+        self.assertEqual(c2.id, commit.parents[1])
+        self.assertEqual(c3.id, commit.parents[2])
+
     def test_import_stream(self):
         markers = self.processor.import_stream(BytesIO(b"""blob
 mark :1

+ 4 - 2
dulwich/tests/test_index.py

@@ -587,7 +587,8 @@ class GetUnstagedChangesTests(TestCase):
                 f.write(b'origstuff')
 
             repo.stage(['foo1', 'foo2'])
-            repo.do_commit(b'test status', author=b'', committer=b'')
+            repo.do_commit(b'test status', author=b'author <email>',
+                           committer=b'committer <email>')
 
             with open(foo1_fullpath, 'wb') as f:
                 f.write(b'newstuff')
@@ -612,7 +613,8 @@ class GetUnstagedChangesTests(TestCase):
                 f.write(b'origstuff')
 
             repo.stage(['foo1'])
-            repo.do_commit(b'test status', author=b'', committer=b'')
+            repo.do_commit(b'test status', author=b'author <email>',
+                           committer=b'committer <email>')
 
             os.unlink(foo1_fullpath)
 

+ 39 - 2
dulwich/tests/test_object_store.py

@@ -33,6 +33,7 @@ from dulwich.index import (
     )
 from dulwich.errors import (
     NotTreeError,
+    EmptyFileException,
     )
 from dulwich.objects import (
     sha_to_hex,
@@ -43,6 +44,7 @@ from dulwich.objects import (
 from dulwich.object_store import (
     DiskObjectStore,
     MemoryObjectStore,
+    OverlayObjectStore,
     ObjectStoreGraphWalker,
     commit_tree_changes,
     tree_lookup_path,
@@ -212,6 +214,14 @@ class ObjectStoreTests(object):
         self.store.close()
 
 
+class OverlayObjectStoreTests(ObjectStoreTests, TestCase):
+
+    def setUp(self):
+        TestCase.setUp(self)
+        self.bases = [MemoryObjectStore(), MemoryObjectStore()]
+        self.store = OverlayObjectStore(self.bases, self.bases[0])
+
+
 class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
 
     def setUp(self):
@@ -224,7 +234,7 @@ class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
         try:
             b = make_object(Blob, data=b"more yummy data")
             write_pack_objects(f, [(b, None)])
-        except:
+        except BaseException:
             abort()
             raise
         else:
@@ -343,6 +353,33 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         self.assertIn(b2.id, store)
         self.assertEqual(b2, store[b2.id])
 
+    def test_corrupted_object_raise_exception(self):
+        """Corrupted sha1 disk file should raise specific exception"""
+        self.store.add_object(testobject)
+        self.assertEqual((Blob.type_num, b'yummy data'),
+                         self.store.get_raw(testobject.id))
+        self.assertTrue(self.store.contains_loose(testobject.id))
+        self.assertIsNotNone(self.store._get_loose_object(testobject.id))
+
+        path = self.store._get_shafile_path(testobject.id)
+        with open(path, 'wb') as f:  # corrupt the file
+            f.write(b'')
+
+        expected_error_msg = 'Corrupted empty file detected'
+        try:
+            self.store.contains_loose(testobject.id)
+        except EmptyFileException as e:
+            self.assertEqual(str(e), expected_error_msg)
+
+        try:
+            self.store._get_loose_object(testobject.id)
+        except EmptyFileException as e:
+            self.assertEqual(str(e), expected_error_msg)
+
+        # this does not change iteration on loose objects though
+        self.assertEqual([testobject.id],
+                         list(self.store._iter_loose_objects()))
+
     def test_add_alternate_path(self):
         store = DiskObjectStore(self.store_dir)
         self.assertEqual([], list(store._read_alternate_paths()))
@@ -377,7 +414,7 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         try:
             b = make_object(Blob, data=b"more yummy data")
             write_pack_objects(f, [(b, None)])
-        except:
+        except BaseException:
             abort()
             raise
         else:

+ 59 - 0
dulwich/tests/test_objects.py

@@ -56,6 +56,7 @@ from dulwich.objects import (
     _parse_tree_py,
     sorted_tree_items,
     _sorted_tree_items_py,
+    MAX_TIME
     )
 from dulwich.tests import (
     TestCase,
@@ -158,6 +159,7 @@ class BlobReadTests(TestCase):
         self.assertEqual(b'test 5\n', b.data)
         b.chunked = [b'te', b'st', b' 6\n']
         self.assertEqual(b'test 6\n', b.as_raw_string())
+        self.assertEqual(b'test 6\n', bytes(b))
 
     def test_parse_legacy_blob(self):
         string = b'test 3\n'
@@ -639,6 +641,38 @@ class CommitParseTests(ShaFileCheckTests):
             else:
                 self.assertCheckFails(Commit, text)
 
+    def test_check_commit_with_unparseable_time(self):
+        identity_with_wrong_time = (
+            b'Igor Sysoev <igor@sysoev.ru> 18446743887488505614+42707004')
+
+        # Those fail at reading time
+        self.assertCheckFails(
+            Commit,
+            self.make_commit_text(author=default_committer,
+                                  committer=identity_with_wrong_time))
+        self.assertCheckFails(
+            Commit,
+            self.make_commit_text(author=identity_with_wrong_time,
+                                  committer=default_committer))
+
+    def test_check_commit_with_overflow_date(self):
+        """Date with overflow should raise an ObjectFormatException when checked
+
+        """
+        identity_with_wrong_time = (
+            b'Igor Sysoev <igor@sysoev.ru> 18446743887488505614 +42707004')
+        commit0 = Commit.from_string(self.make_commit_text(
+                author=identity_with_wrong_time,
+                committer=default_committer))
+        commit1 = Commit.from_string(self.make_commit_text(
+                author=default_committer,
+                committer=identity_with_wrong_time))
+
+        # Those fails when triggering the check() method
+        for commit in [commit0, commit1]:
+            with self.assertRaises(ObjectFormatException):
+                commit.check()
+
     def test_parse_gpgsig(self):
         c = Commit.from_string(b"""tree aaff74984cccd156a469afa7d9ab10e4777beb24
 author Jelmer Vernooij <jelmer@samba.org> 1412179807 +0200
@@ -760,6 +794,8 @@ class TreeTests(ShaFileCheckTests):
         x[b'myname'] = (0o100755, myhexsha)
         self.assertEqual(b'100755 myname\0' + hex_to_sha(myhexsha),
                          x.as_raw_string())
+        self.assertEqual(b'100755 myname\0' + hex_to_sha(myhexsha),
+                         bytes(x))
 
     def test_tree_update_id(self):
         x = Tree()
@@ -1003,6 +1039,21 @@ class TagParseTests(ShaFileCheckTests):
                     b'Sun 7 Jul 2007 12:54:34 +0700')))
         self.assertCheckFails(Tag, self.make_tag_text(object_sha=b'xxx'))
 
+    def test_check_tag_with_unparseable_field(self):
+        self.assertCheckFails(Tag, self.make_tag_text(
+            tagger=(b'Linus Torvalds <torvalds@woody.linux-foundation.org> '
+                    b'423423+0000')))
+
+    def test_check_tag_with_overflow_time(self):
+        """Date with overflow should raise an ObjectFormatException when checked
+
+        """
+        author = 'Some Dude <some@dude.org> %s +0000' % (MAX_TIME+1, )
+        tag = Tag.from_string(self.make_tag_text(
+            tagger=(author.encode())))
+        with self.assertRaises(ObjectFormatException):
+            tag.check()
+
     def test_check_duplicates(self):
         # duplicate each of the header fields
         for i in range(4):
@@ -1218,6 +1269,14 @@ class ShaFileSerializeTests(TestCase):
         with self.assert_serialization_on_change(tag):
             tag.message = b'new message'
 
+    def test_tag_serialize_time_error(self):
+        with self.assertRaises(ObjectFormatException):
+            tag = make_object(
+                Tag, name=b'tag', message=b'some message',
+                tagger=b'Tagger <test@example.com> 1174773719+0000',
+                object=(Commit, b'0' * 40))
+            tag._deserialize(tag._serialize())
+
 
 class PrettyFormatTreeEntryTests(TestCase):
 

+ 22 - 0
dulwich/tests/test_pack.py

@@ -130,6 +130,14 @@ class PackIndexTests(PackTests):
         self.assertEqual(p.object_index(tree_sha), 138)
         self.assertEqual(p.object_index(commit_sha), 12)
 
+    def test_object_sha1(self):
+        """Tests that the correct object offset is returned from the index."""
+        p = self.get_pack_index(pack1_sha)
+        self.assertRaises(KeyError, p.object_sha1, 876)
+        self.assertEqual(p.object_sha1(178), hex_to_sha(a_sha))
+        self.assertEqual(p.object_sha1(138), hex_to_sha(tree_sha))
+        self.assertEqual(p.object_sha1(12), hex_to_sha(commit_sha))
+
     def test_index_len(self):
         p = self.get_pack_index(pack1_sha)
         self.assertEqual(3, len(p))
@@ -524,6 +532,20 @@ class TestThinPack(PackTests):
                 (3, b'foo1234'),
                 p.get_raw(self.blobs[b'foo1234'].id))
 
+    def test_get_raw_unresolved(self):
+        with self.make_pack(False) as p:
+            self.assertEqual(
+                (7,
+                 b'\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c',
+                 [b'x\x9ccf\x9f\xc0\xccbhdl\x02\x00\x06f\x01l']),
+                p.get_raw_unresolved(self.blobs[b'foo1234'].id))
+        with self.make_pack(True) as p:
+            self.assertEqual(
+                (7,
+                 b'\x19\x10(\x15f=#\xf8\xb7ZG\xe7\xa0\x19e\xdc\xdc\x96F\x8c',
+                 [b'x\x9ccf\x9f\xc0\xccbhdl\x02\x00\x06f\x01l']),
+                p.get_raw_unresolved(self.blobs[b'foo1234'].id))
+
     def test_iterobjects(self):
         with self.make_pack(False) as p:
             self.assertRaises(KeyError, list, p.iterobjects())

+ 3 - 3
dulwich/tests/test_patch.py

@@ -166,7 +166,7 @@ Date: Thu, 15 Apr 2010 15:40:28 +0200
 Subject:  [Dulwich-users] [PATCH] Added unit tests for
  dulwich.object_store.tree_lookup_path.
 
-From: Jelmer Vernooy <jelmer@debian.org>
+From: Jelmer Vernooij <jelmer@debian.org>
 
 * dulwich/tests/test_object_store.py
   (TreeLookupPathTests): This test case contains a few tests that ensure the
@@ -180,7 +180,7 @@ From: Jelmer Vernooy <jelmer@debian.org>
 1.7.0.4
 """  # noqa: W291
         c, diff, version = git_am_patch_split(BytesIO(text), "utf-8")
-        self.assertEqual(b"Jelmer Vernooy <jelmer@debian.org>", c.author)
+        self.assertEqual(b"Jelmer Vernooij <jelmer@debian.org>", c.author)
         self.assertEqual(b'''\
 Added unit tests for dulwich.object_store.tree_lookup_path.
 
@@ -197,7 +197,7 @@ Date: Thu, 15 Apr 2010 15:40:28 +0200
 Subject:  [Dulwich-users] [PATCH] Added unit tests for
  dulwich.object_store.tree_lookup_path.
 
-From: Jelmer Vernooy <jelmer@debian.org>
+From: Jelmer Vernooij <jelmer@debian.org>
 
 ---
  pixmaps/prey.ico |  Bin 9662 -> 9662 bytes

+ 138 - 33
dulwich/tests/test_porcelain.py

@@ -39,7 +39,10 @@ from dulwich.objects import (
     Tree,
     ZERO_SHA,
     )
-from dulwich.repo import Repo
+from dulwich.repo import (
+    NoIndexPresent,
+    Repo,
+    )
 from dulwich.tests import (
     TestCase,
     )
@@ -54,13 +57,10 @@ class PorcelainTestCase(TestCase):
 
     def setUp(self):
         super(PorcelainTestCase, self).setUp()
-        repo_dir = tempfile.mkdtemp()
-        self.addCleanup(shutil.rmtree, repo_dir)
-        self.repo = Repo.init(repo_dir)
-
-    def tearDown(self):
-        super(PorcelainTestCase, self).tearDown()
-        self.repo.close()
+        self.test_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self.test_dir)
+        self.repo = Repo.init(os.path.join(self.test_dir, 'repo'), mkdir=True)
+        self.addCleanup(self.repo.close)
 
 
 class ArchiveTests(PorcelainTestCase):
@@ -104,6 +104,17 @@ class CommitTests(PorcelainTestCase):
         self.assertTrue(isinstance(sha, bytes))
         self.assertEqual(len(sha), 40)
 
+    def test_unicode(self):
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
+        self.repo.refs[b"refs/heads/foo"] = c3.id
+        sha = porcelain.commit(
+                self.repo.path, message="Some message",
+                author="Joe <joe@example.com>",
+                committer="Bob <bob@example.com>")
+        self.assertTrue(isinstance(sha, bytes))
+        self.assertEqual(len(sha), 40)
+
 
 class CloneTests(PorcelainTestCase):
 
@@ -125,7 +136,7 @@ class CloneTests(PorcelainTestCase):
                             checkout=False, errstream=errstream)
         self.assertEqual(r.path, target_path)
         target_repo = Repo(target_path)
-        self.assertEqual(target_repo.head(), c3.id)
+        self.assertEqual(0, len(target_repo.open_index()))
         self.assertEqual(c3.id, target_repo.refs[b'refs/tags/foo'])
         self.assertTrue(b'f1' not in os.listdir(target_path))
         self.assertTrue(b'f2' not in os.listdir(target_path))
@@ -178,7 +189,8 @@ class CloneTests(PorcelainTestCase):
                 errstream=errstream) as r:
             self.assertEqual(r.path, target_path)
         with Repo(target_path) as r:
-            self.assertRaises(KeyError, r.head)
+            r.head()
+            self.assertRaises(NoIndexPresent, r.open_index)
         self.assertFalse(b'f1' in os.listdir(target_path))
         self.assertFalse(b'f2' in os.listdir(target_path))
 
@@ -234,7 +246,7 @@ class AddTests(PorcelainTestCase):
             f.write("\n")
         porcelain.add(repo=self.repo.path, paths=[fullpath])
         porcelain.commit(repo=self.repo.path, message=b'test',
-                         author=b'test', committer=b'test')
+                         author=b'test <email>', committer=b'test <email>')
 
         # Add a second test file and a file in a directory
         with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
@@ -265,7 +277,8 @@ class AddTests(PorcelainTestCase):
             os.chdir(os.path.join(self.repo.path, 'foo'))
             porcelain.add(repo=self.repo.path)
             porcelain.commit(repo=self.repo.path, message=b'test',
-                             author=b'test', committer=b'test')
+                             author=b'test <email>',
+                             committer=b'test <email>')
         finally:
             os.chdir(cwd)
 
@@ -300,6 +313,19 @@ class AddTests(PorcelainTestCase):
         porcelain.add(self.repo, paths=[os.path.join(self.repo.path, "foo")])
         self.assertIn(b"foo", self.repo.open_index())
 
+    def test_add_not_in_repo(self):
+        with open(os.path.join(self.test_dir, 'foo'), 'w') as f:
+            f.write("BAR")
+        self.assertRaises(
+            ValueError,
+            porcelain.add, self.repo,
+            paths=[os.path.join(self.test_dir, "foo")])
+        self.assertRaises(
+            ValueError,
+            porcelain.add, self.repo,
+            paths=["../foo"])
+        self.assertEqual([], list(self.repo.open_index()))
+
 
 class RemoveTests(PorcelainTestCase):
 
@@ -308,8 +334,9 @@ class RemoveTests(PorcelainTestCase):
         with open(fullpath, 'w') as f:
             f.write("BAR")
         porcelain.add(self.repo.path, paths=[fullpath])
-        porcelain.commit(repo=self.repo, message=b'test', author=b'test',
-                         committer=b'test')
+        porcelain.commit(repo=self.repo, message=b'test',
+                         author=b'test <email>',
+                         committer=b'test <email>')
         self.assertTrue(os.path.exists(os.path.join(self.repo.path, 'foo')))
         cwd = os.getcwd()
         try:
@@ -556,6 +583,18 @@ class TagCreateTests(PorcelainTestCase):
         self.repo[b'refs/tags/tryme']
         self.assertEqual(list(tags.values()), [self.repo.head()])
 
+    def test_unannotated_unicode(self):
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
+        self.repo.refs[b"HEAD"] = c3.id
+
+        porcelain.tag_create(self.repo.path, "tryme", annotated=False)
+
+        tags = self.repo.refs.as_dict(b"refs/tags")
+        self.assertEqual(list(tags.keys()), [b"tryme"])
+        self.repo[b'refs/tags/tryme']
+        self.assertEqual(list(tags.values()), [self.repo.head()])
+
 
 class TagListTests(PorcelainTestCase):
 
@@ -643,7 +682,8 @@ class PushTests(PorcelainTestCase):
         errstream = BytesIO()
 
         porcelain.commit(repo=self.repo.path, message=b'init',
-                         author=b'', committer=b'')
+                         author=b'author <email>',
+                         committer=b'committer <email>')
 
         # Setup target repo cloned from temp test repo
         clone_path = tempfile.mkdtemp()
@@ -660,7 +700,8 @@ class PushTests(PorcelainTestCase):
         os.close(handle)
         porcelain.add(repo=clone_path, paths=[fullpath])
         porcelain.commit(repo=clone_path, message=b'push',
-                         author=b'', committer=b'')
+                         author=b'author <email>',
+                         committer=b'committer <email>')
 
         # Setup a non-checked out branch in the remote
         refs_path = b"refs/heads/foo"
@@ -695,7 +736,8 @@ class PushTests(PorcelainTestCase):
         errstream = BytesIO()
 
         porcelain.commit(repo=self.repo.path, message=b'init',
-                         author=b'', committer=b'')
+                         author=b'author <email>',
+                         committer=b'committer <email>')
 
         # Setup target repo cloned from temp test repo
         clone_path = tempfile.mkdtemp()
@@ -729,7 +771,8 @@ class PullTests(PorcelainTestCase):
         os.close(handle)
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test',
-                         author=b'test', committer=b'test')
+                         author=b'test <email>',
+                         committer=b'test <email>')
 
         # Setup target repo
         self.target_path = tempfile.mkdtemp()
@@ -743,7 +786,8 @@ class PullTests(PorcelainTestCase):
         os.close(handle)
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test2',
-                         author=b'test2', committer=b'test2')
+                         author=b'test2 <email>',
+                         committer=b'test2 <email>')
 
         self.assertTrue(b'refs/heads/master' in self.repo.refs)
         self.assertTrue(b'refs/heads/master' in target_repo.refs)
@@ -792,7 +836,8 @@ class StatusTests(PorcelainTestCase):
 
         porcelain.add(repo=self.repo.path, paths=[fullpath])
         porcelain.commit(repo=self.repo.path, message=b'test status',
-                         author=b'', committer=b'')
+                         author=b'author <email>',
+                         committer=b'committer <email>')
 
         # modify access and modify time of path
         os.utime(fullpath, (0, 0))
@@ -823,7 +868,8 @@ class StatusTests(PorcelainTestCase):
             f.write('stuff')
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test status',
-                         author=b'', committer=b'')
+                         author=b'author <email>',
+                         committer=b'committer <email>')
 
         filename = 'foo'
         fullpath = os.path.join(self.repo.path, filename)
@@ -847,7 +893,8 @@ class StatusTests(PorcelainTestCase):
             f.write('stuff')
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test status',
-                         author=b'', committer=b'')
+                         author=b'author <email>',
+                         committer=b'committer <email>')
         with open(fullpath, 'w') as f:
             f.write('otherstuff')
         porcelain.add(repo=self.repo.path, paths=fullpath)
@@ -868,7 +915,8 @@ class StatusTests(PorcelainTestCase):
             f.write('stuff')
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test status',
-                         author=b'', committer=b'')
+                         author=b'author <email>',
+                         committer=b'committer <email>')
         cwd = os.getcwd()
         try:
             os.chdir(self.repo.path)
@@ -941,7 +989,8 @@ class ReceivePackTests(PorcelainTestCase):
             f.write('stuff')
         porcelain.add(repo=self.repo.path, paths=fullpath)
         self.repo.do_commit(message=b'test status',
-                            author=b'', committer=b'',
+                            author=b'author <email>',
+                            committer=b'committer <email>',
                             author_timestamp=1402354300,
                             commit_timestamp=1402354300, author_timezone=0,
                             commit_timezone=0)
@@ -950,10 +999,10 @@ class ReceivePackTests(PorcelainTestCase):
                 self.repo.path, BytesIO(b"0000"), outf)
         outlines = outf.getvalue().splitlines()
         self.assertEqual([
-            b'00919e65bdcf4a22cdd4f3700604a275cd2aaf146b23 HEAD\x00 report-status '  # noqa: E501
+            b'0091319b56ce3aee2d489f759736a79cc552c9bb86d9 HEAD\x00 report-status '  # noqa: E501
             b'delete-refs quiet ofs-delta side-band-64k '
             b'no-done symref=HEAD:refs/heads/master',
-            b'003f9e65bdcf4a22cdd4f3700604a275cd2aaf146b23 refs/heads/master',
+           b'003f319b56ce3aee2d489f759736a79cc552c9bb86d9 refs/heads/master',
             b'0000'], outlines)
         self.assertEqual(0, exitcode)
 
@@ -1000,6 +1049,14 @@ class BranchDeleteTests(PorcelainTestCase):
         porcelain.branch_delete(self.repo, b'foo')
         self.assertFalse(b"foo" in porcelain.branch_list(self.repo))
 
+    def test_simple_unicode(self):
+        [c1] = build_commit_graph(self.repo.object_store, [[1]])
+        self.repo[b"HEAD"] = c1.id
+        porcelain.branch_create(self.repo, 'foo')
+        self.assertTrue(b"foo" in porcelain.branch_list(self.repo))
+        porcelain.branch_delete(self.repo, 'foo')
+        self.assertFalse(b"foo" in porcelain.branch_list(self.repo))
+
 
 class FetchTests(PorcelainTestCase):
 
@@ -1012,7 +1069,8 @@ class FetchTests(PorcelainTestCase):
         os.close(handle)
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test',
-                         author=b'test', committer=b'test')
+                         author=b'test <email>',
+                         committer=b'test <email>')
 
         # Setup target repo
         target_path = tempfile.mkdtemp()
@@ -1025,19 +1083,63 @@ class FetchTests(PorcelainTestCase):
         os.close(handle)
         porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test2',
-                         author=b'test2', committer=b'test2')
+                         author=b'test2 <email>',
+                         committer=b'test2 <email>')
 
         self.assertFalse(self.repo[b'HEAD'].id in target_repo)
         target_repo.close()
 
         # Fetch changes into the cloned repo
-        porcelain.fetch(target_path, self.repo.path, outstream=outstream,
-                        errstream=errstream)
+        porcelain.fetch(target_path, self.repo.path,
+                        outstream=outstream, errstream=errstream)
 
         # Check the target repo for pushed changes
         with Repo(target_path) as r:
             self.assertTrue(self.repo[b'HEAD'].id in r)
 
+    def test_with_remote_name(self):
+        remote_name = b'origin'
+        outstream = BytesIO()
+        errstream = BytesIO()
+
+        # create a file for initial commit
+        handle, fullpath = tempfile.mkstemp(dir=self.repo.path)
+        os.close(handle)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
+        porcelain.commit(repo=self.repo.path, message=b'test',
+                         author=b'test <email>',
+                         committer=b'test <email>')
+
+        # Setup target repo
+        target_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, target_path)
+        target_repo = porcelain.clone(self.repo.path, target=target_path,
+                                      errstream=errstream)
+
+        # Capture current refs
+        target_refs = target_repo.get_refs()
+
+        # create a second file to be pushed
+        handle, fullpath = tempfile.mkstemp(dir=self.repo.path)
+        os.close(handle)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
+        porcelain.commit(repo=self.repo.path, message=b'test2',
+                         author=b'test2 <email>',
+                         committer=b'test2 <email>')
+
+        self.assertFalse(self.repo[b'HEAD'].id in target_repo)
+        target_repo.close()
+
+        # Fetch changes into the cloned repo
+        porcelain.fetch(target_path, self.repo.path, remote_name=remote_name,
+                        outstream=outstream, errstream=errstream)
+
+        # Check the target repo for pushed changes, as well as updates
+        # for the refs
+        with Repo(target_path) as r:
+            self.assertTrue(self.repo[b'HEAD'].id in r)
+            self.assertNotEqual(self.repo.get_refs(), target_refs)
+
 
 class RepackTests(PorcelainTestCase):
 
@@ -1055,7 +1157,8 @@ class LsTreeTests(PorcelainTestCase):
 
     def test_empty(self):
         porcelain.commit(repo=self.repo.path, message=b'test status',
-                         author=b'', committer=b'')
+                         author=b'author <email>',
+                         committer=b'committer <email>')
 
         f = StringIO()
         porcelain.ls_tree(self.repo, b"HEAD", outstream=f)
@@ -1069,7 +1172,8 @@ class LsTreeTests(PorcelainTestCase):
 
         porcelain.add(repo=self.repo.path, paths=[fullpath])
         porcelain.commit(repo=self.repo.path, message=b'test status',
-                         author=b'', committer=b'')
+                         author=b'author <email>',
+                         committer=b'committer <email>')
 
         f = StringIO()
         porcelain.ls_tree(self.repo, b"HEAD", outstream=f)
@@ -1085,7 +1189,8 @@ class LsRemoteTests(PorcelainTestCase):
 
     def test_some(self):
         cid = porcelain.commit(repo=self.repo.path, message=b'test status',
-                               author=b'', committer=b'')
+                               author=b'author <email>',
+                               committer=b'committer <email>')
 
         self.assertEqual({
             b'refs/heads/master': cid,

+ 25 - 0
dulwich/tests/test_refs.py

@@ -39,6 +39,7 @@ from dulwich.refs import (
     parse_symref_value,
     read_packed_refs_with_peeled,
     read_packed_refs,
+    strip_peeled_refs,
     write_packed_refs,
     )
 from dulwich.repo import Repo
@@ -552,3 +553,27 @@ class ParseSymrefValueTests(TestCase):
 
     def test_invalid(self):
         self.assertRaises(ValueError, parse_symref_value, b'foobar')
+
+
+class StripPeeledRefsTests(TestCase):
+
+    all_refs = {
+        b'refs/heads/master': b'8843d7f92416211de9ebb963ff4ce28125932878',
+        b'refs/heads/testing': b'186a005b134d8639a58b6731c7c1ea821a6eedba',
+        b'refs/tags/1.0.0': b'a93db4b0360cc635a2b93675010bac8d101f73f0',
+        b'refs/tags/1.0.0^{}': b'a93db4b0360cc635a2b93675010bac8d101f73f0',
+        b'refs/tags/2.0.0': b'0749936d0956c661ac8f8d3483774509c165f89e',
+        b'refs/tags/2.0.0^{}': b'0749936d0956c661ac8f8d3483774509c165f89e',
+    }
+    non_peeled_refs = {
+        b'refs/heads/master': b'8843d7f92416211de9ebb963ff4ce28125932878',
+        b'refs/heads/testing': b'186a005b134d8639a58b6731c7c1ea821a6eedba',
+        b'refs/tags/1.0.0': b'a93db4b0360cc635a2b93675010bac8d101f73f0',
+        b'refs/tags/2.0.0': b'0749936d0956c661ac8f8d3483774509c165f89e',
+    }
+
+    def test_strip_peeled_refs(self):
+        # Simple check of two dicts
+        self.assertEqual(
+            strip_peeled_refs(self.all_refs),
+            self.non_peeled_refs)

+ 20 - 2
dulwich/tests/test_repository.py

@@ -37,8 +37,10 @@ from dulwich import objects
 from dulwich.config import Config
 from dulwich.errors import NotGitRepository
 from dulwich.repo import (
+    InvalidUserIdentity,
     Repo,
     MemoryRepo,
+    check_user_identity,
     )
 from dulwich.tests import (
     TestCase,
@@ -758,11 +760,11 @@ class BuildRepoRootTests(TestCase):
     def test_commit_fail_ref(self):
         r = self._repo
 
-        def set_if_equals(name, old_ref, new_ref):
+        def set_if_equals(name, old_ref, new_ref, **kwargs):
             return False
         r.refs.set_if_equals = set_if_equals
 
-        def add_if_new(name, new_ref):
+        def add_if_new(name, new_ref, **kwargs):
             self.fail('Unexpected call to add_if_new')
         r.refs.add_if_new = add_if_new
 
@@ -927,3 +929,19 @@ class BuildRepoRootTests(TestCase):
     def test_discover_notrepo(self):
         with self.assertRaises(NotGitRepository):
             Repo.discover('/')
+
+
+class CheckUserIdentityTests(TestCase):
+
+    def test_valid(self):
+        check_user_identity(b'Me <me@example.com>')
+
+    def test_invalid(self):
+        self.assertRaises(InvalidUserIdentity,
+                          check_user_identity, b'No Email')
+        self.assertRaises(InvalidUserIdentity,
+                          check_user_identity, b'Fullname <missing')
+        self.assertRaises(InvalidUserIdentity,
+                          check_user_identity, b'Fullname missing>')
+        self.assertRaises(InvalidUserIdentity,
+                          check_user_identity, b'Fullname >order<>')

+ 1 - 4
dulwich/web.py

@@ -119,13 +119,10 @@ def send_file(req, f, content_type):
             if not data:
                 break
             yield data
-        f.close()
     except IOError:
-        f.close()
         yield req.error('Error reading file')
-    except:
+    finally:
         f.close()
-        raise
 
 
 def _url_to_path(url):

+ 0 - 5
setup.cfg

@@ -1,6 +1 @@
 [build_ext]
-
-[egg_info]
-tag_build = 
-tag_date = 0
-

+ 19 - 7
setup.py

@@ -7,11 +7,14 @@ try:
     from setuptools import setup, Extension
 except ImportError:
     from distutils.core import setup, Extension
+    has_setuptools = False
+else:
+    has_setuptools = True
 from distutils.core import Distribution
 import os
 import sys
 
-dulwich_version_string = '0.18.5'
+dulwich_version_string = '0.19.0'
 
 include_dirs = []
 # Windows MSVC support
@@ -44,10 +47,11 @@ if sys.platform == 'darwin' and os.path.exists('/usr/bin/xcodebuild'):
         ['/usr/bin/xcodebuild', '-version'], stdout=subprocess.PIPE,
         stderr=subprocess.PIPE, env={})
     out, err = p.communicate()
-    for l in out.splitlines():
-        l = l.decode("utf8")
+    for line in out.splitlines():
+        line = line.decode("utf8")
         # Also parse only first digit, because 3.2.1 can't be parsed nicely
-        if l.startswith('Xcode') and int(l.split()[1].split('.')[0]) >= 4:
+        if (line.startswith('Xcode') and
+                int(line.split()[1].split('.')[0]) >= 4):
             os.environ['ARCHFLAGS'] = ''
 
 tests_require = ['fastimport']
@@ -57,6 +61,7 @@ if '__pypy__' not in sys.modules and not sys.platform == 'win32':
     tests_require.extend([
         'gevent', 'geventhttpclient', 'mock', 'setuptools>=17.1'])
 
+
 ext_modules = [
     Extension('dulwich._objects', ['dulwich/_objects.c'],
               include_dirs=include_dirs),
@@ -66,6 +71,15 @@ ext_modules = [
               include_dirs=include_dirs),
 ]
 
+setup_kwargs = {}
+
+if has_setuptools:
+    setup_kwargs['extras_require'] = {'fastimport': ['fastimport']}
+    setup_kwargs['install_requires'] = ['urllib3[secure]>=1.21']
+    setup_kwargs['include_package_data'] = True
+    setup_kwargs['test_suite'] = 'dulwich.tests.test_suite'
+    setup_kwargs['tests_require'] = tests_require
+
 
 if sys.platform == 'win32':
     # Win32 setup breaks with non-ascii characters.
@@ -109,8 +123,6 @@ setup(name='dulwich',
           'Topic :: Software Development :: Version Control',
       ],
       ext_modules=ext_modules,
-      test_suite='dulwich.tests.test_suite',
-      tests_require=tests_require,
       distclass=DulwichDistribution,
-      include_package_data=True,
+      **setup_kwargs
       )