Browse Source

New upstream version 0.18.0

Jelmer Vernooij 7 years ago
parent
commit
e82e290e4b
79 changed files with 3419 additions and 1553 deletions
  1. 4 1
      .travis.yml
  2. 129 8
      AUTHORS
  3. 4 0
      Makefile
  4. 146 93
      NEWS
  5. 5 4
      PKG-INFO
  6. 38 0
      README.md
  7. 76 15
      appveyor.yml
  8. 13 0
      bin/dulwich
  9. 47 44
      docs/conf.py
  10. 2 2
      docs/tutorial/file-format.txt
  11. 6 0
      docs/tutorial/porcelain.txt
  12. 1 1
      docs/tutorial/remote.txt
  13. 5 4
      dulwich.egg-info/PKG-INFO
  14. 2 0
      dulwich.egg-info/SOURCES.txt
  15. 1 1
      dulwich/__init__.py
  16. 4 8
      dulwich/_diff_tree.c
  17. 6 8
      dulwich/_objects.c
  18. 12 9
      dulwich/_pack.c
  19. 8 4
      dulwich/archive.py
  20. 125 66
      dulwich/client.py
  21. 18 11
      dulwich/config.py
  22. 1 2
      dulwich/contrib/paramiko_vendor.py
  23. 9 6
      dulwich/contrib/swift.py
  24. 17 11
      dulwich/contrib/test_swift.py
  25. 10 8
      dulwich/contrib/test_swift_smoke.py
  26. 15 11
      dulwich/diff_tree.py
  27. 7 6
      dulwich/errors.py
  28. 33 18
      dulwich/fastexport.py
  29. 12 8
      dulwich/file.py
  30. 358 0
      dulwich/ignore.py
  31. 42 15
      dulwich/index.py
  32. 2 1
      dulwich/log_utils.py
  33. 44 32
      dulwich/object_store.py
  34. 64 41
      dulwich/objects.py
  35. 17 2
      dulwich/objectspec.py
  36. 57 53
      dulwich/pack.py
  37. 18 12
      dulwich/patch.py
  38. 199 71
      dulwich/porcelain.py
  39. 18 14
      dulwich/protocol.py
  40. 5 3
      dulwich/reflog.py
  41. 25 19
      dulwich/refs.py
  42. 35 26
      dulwich/repo.py
  43. 73 46
      dulwich/server.py
  44. 19 9
      dulwich/tests/__init__.py
  45. 1 0
      dulwich/tests/compat/__init__.py
  46. 32 22
      dulwich/tests/compat/server_utils.py
  47. 45 35
      dulwich/tests/compat/test_client.py
  48. 1 0
      dulwich/tests/compat/test_pack.py
  49. 8 8
      dulwich/tests/compat/test_repository.py
  50. 5 3
      dulwich/tests/compat/test_server.py
  51. 9 5
      dulwich/tests/compat/test_web.py
  52. 3 1
      dulwich/tests/compat/utils.py
  53. 0 1
      dulwich/tests/test_archive.py
  54. 71 41
      dulwich/tests/test_client.py
  55. 19 20
      dulwich/tests/test_config.py
  56. 86 46
      dulwich/tests/test_diff_tree.py
  57. 52 24
      dulwich/tests/test_fastexport.py
  58. 10 13
      dulwich/tests/test_greenthreads.py
  59. 260 0
      dulwich/tests/test_ignore.py
  60. 64 50
      dulwich/tests/test_index.py
  61. 30 27
      dulwich/tests/test_lru_cache.py
  62. 37 27
      dulwich/tests/test_missing_obj_finder.py
  63. 30 21
      dulwich/tests/test_object_store.py
  64. 74 49
      dulwich/tests/test_objects.py
  65. 27 11
      dulwich/tests/test_objectspec.py
  66. 57 37
      dulwich/tests/test_pack.py
  67. 88 46
      dulwich/tests/test_patch.py
  68. 329 128
      dulwich/tests/test_porcelain.py
  69. 12 9
      dulwich/tests/test_protocol.py
  70. 7 5
      dulwich/tests/test_reflog.py
  71. 26 14
      dulwich/tests/test_refs.py
  72. 145 98
      dulwich/tests/test_repository.py
  73. 44 29
      dulwich/tests/test_server.py
  74. 18 14
      dulwich/tests/test_walk.py
  75. 19 15
      dulwich/tests/test_web.py
  76. 13 12
      dulwich/tests/utils.py
  77. 18 18
      dulwich/walk.py
  78. 18 10
      dulwich/web.py
  79. 29 21
      setup.py

+ 4 - 1
.travis.yml

@@ -23,7 +23,7 @@ matrix:
       env: TEST_REQUIRE=fastimport
 
 install:
-  - travis_retry pip install -U pip coverage codecov $TEST_REQUIRE
+  - travis_retry pip install -U pip coverage codecov flake8 $TEST_REQUIRE
 
 script:
   # Test without c extensions
@@ -33,6 +33,9 @@ script:
   - python setup.py build_ext -i
   - python -m coverage run -p --source=dulwich -m unittest dulwich.tests.test_suite
 
+  # Style
+  - make style
+
 after_success:
   - python -m coverage combine
   - codecov

+ 129 - 8
AUTHORS

@@ -1,11 +1,132 @@
-Jelmer Vernooij <jelmer@jelmer.uk>
-James Westby <jw+debian@jameswestby.net>
-John Carr <john.carr@unrouted.co.uk>
+Jelmer Vernooij <jelmer@jelmer.uk>
 Dave Borowitz <dborowitz@google.com>
-Chris Eberle <eberle1080@gmail.com>
-"milki" <milki@rescomp.berkeley.edu>
+John Carr <john.carr@unrouted.co.uk>
 Gary van der Merwe <garyvdm@gmail.com>
+milki <milki@rescomp.berkeley.edu>
+Augie Fackler <durin42@gmail.com>
+Tay Ray Chuan <rctay89@gmail.com>
+Risto Kankkunen <risto.kankkunen@iki.fi>
+Jonas Haag <jonas@lophus.org>
+Fabien Boucher <fabien.boucher@enovance.com>
+James Westby <jw+debian@jameswestby.net>
+Mike Edgar <adgar@google.com>
+Koen Martens <gmc@sonologic.nl>
+Abderrahim Kitouni <a.kitouni@gmail.com>
+William Grant <william.grant@canonical.com>
+Marcin Kuzminski <marcin@python-works.com>
+Ryan Faulkner <rfaulk@yahoo-inc.com>
+Julian Berman <Julian@GrayVines.com>
+Mark Mikofski <mark.mikofski@sunpowercorp.com>
+Michael K <michael-k@users.noreply.github.com>
+Ali Sabil <ali.sabil@gmail.com>
+Damien Tournoud <damien@commerceguys.com>
+Hannu Valtonen <hannu.valtonen@ohmu.fi>
+Mika Mäenpää <mika.j.maenpaa@iki.fi>
+Paul Hummer <paul@eventuallyanyway.com>
+Lele Gaifax <lele@metapensiero.it>
+Lukasz Balcerzak <lukasz.balcerzak@python-center.org>
+Tommy Yu <tommy.yu@auckland.ac.nz>
+anatoly techtonik <techtonik@gmail.com>
+bmcorser <bmcorser@gmail.com>
+Brendan Cully <brendan@kublai.com>
+Chow Loong Jin <hyperair@debian.org>
+Chris Eberle <eberle1080@gmail.com>
+Dmitriy <dkomarov@gmail.com>
+Hervé Cauwelier <herve@oursours.net>
+Hugo Osvaldo Barrera <hugo@barrera.io>
+Jameson Nash <jameson@mit.edu>
+Marc Brinkmann <git@marcbrinkmann.de>
+Nicolas Dandrimont <nicolas@dandrimont.eu>
+Robert Brown <robert.brown@gmail.com>
+Siddharth Agarwal <sid0@fb.com>
+Stefan Zimmermann <zimmermann.code@gmail.com>
+Takeshi Kanemoto <tak.kanemoto@gmail.com>
+Yifan Zhang <yifan@wavii.com>
+Aaron O'Mullan <aaron.omullan@friendco.de>
+Adam "Cezar" Jenkins <emperorcezar@gmail.com>
+Alberto Ruiz <aruiz@gnome.org>
+Alexander Belchenko <bialix@ukr.net>
+Andreas Kloeckner <inform@tiker.net>
+André Roth <neolynx@gmail.com>
+Benjamin Pollack <benjamin@bitquabit.com>
+Benoit HERVIER <khertan@khertan.net>
+Dan Callaghan <dcallagh@redhat.com>
+David Keijser <david.keijser@klarna.com>
+David Ostrovsky <david@ostrovsky.org>
+David Pursehouse <david.pursehouse@gmail.com>
+Dmitrij D. Czarkoff <czarkoff@gmail.com>
+Doug Hellmann <doug@doughellmann.com>
+Dov Feldstern <dovdevel@gmail.com>
+Félix Mattrat <felix@dysosmus.net>
+Hwee Miin Koh <hwee-miin.koh@ubisoft.com>
+Jason R. Coombs <jaraco@jaraco.com>
+Jeremy Whitlock <jcscoobyrs@gmail.com>
+John Arbash Meinel <john@arbash-meinel.com>
+Laurent Rineau <laurent.rineau@cgal.org>
+Martin Packman <gzlist@googlemail.com>
+Max Shawabkeh <max99x@gmail.com>
+Michael Hudson <michael.hudson@canonical.com>
+Nick Stenning <nick@whiteink.com>
+Nick Ward <ward.nickjames@gmail.com>
+Paul Chen <lancevdance@gmail.com>
+Roland Mas <lolando@debian.org>
+Ronald Blaschke <ron@rblasch.org>
+Ronny Pfannschmidt <Ronny.Pfannschmidt@gmx.de>
+Ross Light <ross@zombiezen.com>
+Ryan McKern <ryan@orangefort.com>
+Ted Horst <ted.horst@earthlink.net>
+Thomas Liebetraut <thomas@tommie-lie.de>
+Timo Schmid <info@bluec0re.eu>
+Víðir Valberg Guðmundsson <vidir.valberg@orn.li>
+dak180 <dak180@users.sourceforge.net>
+Akbar Gumbira <akbargumbira@gmail.com>
+Alex Holmes <alex.holmes@isotoma.com>
+Andi McClure <andi.m.mcclure@gmail.com>
+Andres Lowrie <andres.lowrie@gmail.com>
+Artem Tikhomirov <artem.tikhomirov@syntevo.com>
+Brian Visel <eode@eptitude.net>
+Bruce Duncan <Bruce.Duncan@ed.ac.uk>
+Bruno Renié <brutasse@gmail.com>
+Chaiwat Suttipongsakul <cwt@bashell.com>
+Chris Bunney <crbunney@users.noreply.github.com>
+Chris Reid <chris@reidsy.com>
+Daniele Sluijters <daniele.sluijters@gmail.com>
+David Bennett <davbennett@google.com>
+David Blewett <davidb@sixfeetup.com>
+David Carr <david@carrclan.us>
+Dirk <dirk@opani.com>
+Elan Ruusamäe <glen@delfi.ee>
+Forrest Hopkins <fingerheroes@gmail.com>
+Hal Wine <hal.wine@gmail.com>
+Hans Kolek <hkolek@gmail.com>
+Jakub Wilk <jwilk@jwilk.net>
+JonChu <jchonphoenix@gmail.com>
+Kostis Anagnostopoulos <ankostis@gmail.com>
+Kyle Kelly <kkelly@yelp.com>
+Lionel Flandrin <lionel@svkt.org>
+Max Bowsher <_@maxb.eu>
+Mike Williams <miwilliams@google.com>
+Mikhail Terekhov <terekhov@emc.com>
+Nix <nix@esperi.co.uk>
+OnMaster <wme@CONTACT.DE>
+Pascal Quantin <pascal.quantin@gmail.com>
+Ricardo Salveti <ricardo.salveti@openbossa.org>
+Rod Cloutier <rodcloutier@gmail.com>
+Sam Vilain <svilain@saymedia.com>
+Stefano Rivera <stefano@rivera.za.net>
+Steven Myint <git@stevenmyint.com>
+Søren Løvborg <sorenl@unity3d.com>
+Travis Cline <travis.cline@gmail.com>
+Victor Stinner <vstinner@redhat.com>
+Volodymyr Holovko <vholovko@gmail.com>
+Yuval Langer <yuval.langer@gmail.com>
+codingtony <tony.bussieres@gmail.com>
+jon bain <jsbain@yahoo.com>
+kwatters <kwatters@tagged.com>
+max <max0d41@github.com>
+Segev Finer <segev208@gmail.com>
+fviolette <fviolette@talend.com>
+dzhuang <dzhuang.scut@gmail.com>
+Antoine Pietri <antoine.pietri1@gmail.com>
 
-Hervé Cauwelier <herve@itaapy.com> wrote the original tutorial.
-
-See the revision history for a full list of contributors.
+If you contributed but are missing from this list, please send me an e-mail.

+ 4 - 0
Makefile

@@ -1,6 +1,7 @@
 PYTHON = python
 PYFLAKES = pyflakes
 PEP8 = pep8
+FLAKE8 ?= flake8
 SETUP = $(PYTHON) setup.py
 PYDOCTOR ?= pydoctor
 TESTRUNNER ?= unittest
@@ -57,6 +58,9 @@ flakes:
 pep8:
 	$(PEP8) dulwich
 
+style:
+	$(FLAKE8) --exclude=build,.git,build-pypy,.tox
+
 before-push: check
 	git diff origin/master | $(PEP8) --diff
 

+ 146 - 93
NEWS

@@ -1,3 +1,56 @@
+0.18.0	2017-07-31
+
+ BUG FIXES
+
+  * Fix remaining tests on Windows. (Jelmer Vernooij, #493)
+
+  * Fix build of C extensions with Python 3 on Windows.
+    (Jelmer Vernooij)
+
+  * Pass 'mkdir' argument onto Repo.init_bare in Repo.clone.
+    (Jelmer Vernooij, #504)
+
+  * In ``dulwich.porcelain.add``, if no files are specified,
+    add from current working directory rather than repository root.
+    (Jelmer Vernooij, #521)
+
+  * Properly deal with submodules in 'porcelain.status'.
+    (Jelmer Vernooij, #517)
+
+  * ``dulwich.porcelain.remove`` now actually removes files from
+    disk, not just from the index. (Jelmer Vernooij, #488)
+
+  * Fix handling of "reset" command with markers and without
+    "from". (Antoine Pietri)
+
+  * Fix handling of "merge" command with markers. (Antoine Pietri)
+
+  * Support treeish argument to porcelain.reset(), rather than
+    requiring a ref/commit id. (Jelmer Vernooij)
+
+  * Handle race condition when mtime doesn't change between writes/reads.
+    (Jelmer Vernooij, #541)
+
+  * Fix ``dulwich.porcelain.show`` on commits with Python 3.
+    (Jelmer Vernooij, #532)
+
+ IMPROVEMENTS
+
+  * Add basic support for reading ignore files in ``dulwich.ignore``.
+    ``dulwich.porcelain.add`` and ``dulwich.porcelain.status`` now honor
+    ignores. (Jelmer Vernooij, Segev Finer, #524, #526)
+
+  * New ``dulwich.porcelain.check_ignore`` command.
+    (Jelmer Vernooij)
+
+  * ``dulwich.porcelain.status`` now supports a ``ignored`` argument.
+    (Jelmer Vernooij)
+
+ DOCUMENTATION
+
+  * Clarified docstrings for Client.{send_pack,fetch_pack} implementations.
+    (Jelmer Vernooij, #523)
+
 0.17.3	2017-03-20
 
  PLATFORM SUPPORT
@@ -12,62 +65,62 @@
 
  BUG FIXES
 
- * Add workaround for
-   https://bitbucket.org/pypy/pypy/issues/2499/cpyext-pystring_asstring-doesnt-work,
-   fixing Dulwich when used with C extensions on pypy < 5.6. (Victor Stinner)
+  * Add workaround for
+    https://bitbucket.org/pypy/pypy/issues/2499/cpyext-pystring_asstring-doesnt-work,
+    fixing Dulwich when used with C extensions on pypy < 5.6. (Victor Stinner)
 
- * Properly quote config values with a '#' character in them.
-   (Jelmer Vernooij, #511)
+  * Properly quote config values with a '#' character in them.
+    (Jelmer Vernooij, #511)
 
 0.17.1	2017-03-01
 
  IMPROVEMENTS
 
- * Add basic 'dulwich pull' command. (Jelmer Vernooij)
+  * Add basic 'dulwich pull' command. (Jelmer Vernooij)
 
  BUG FIXES
 
- * Cope with existing submodules during pull.
-   (Jelmer Vernooij, #505)
+  * Cope with existing submodules during pull.
+    (Jelmer Vernooij, #505)
 
 0.17.0	2017-03-01
 
  TEST FIXES
 
- * Skip test that requires sync to synchronize filesystems if os.sync is
-   not available. (Koen Martens)
+  * Skip test that requires sync to synchronize filesystems if os.sync is
+    not available. (Koen Martens)
 
  IMPROVEMENTS
 
- * Implement MemoryRepo.{set_description,get_description}.
-   (Jelmer Vernooij)
+  * Implement MemoryRepo.{set_description,get_description}.
+    (Jelmer Vernooij)
 
- * Raise exception in Repo.stage() when absolute paths are
-   passed in. Allow passing in relative paths to
-   porcelain.add().(Jelmer Vernooij)
+  * Raise exception in Repo.stage() when absolute paths are
+    passed in. Allow passing in relative paths to
+    porcelain.add().(Jelmer Vernooij)
 
  BUG FIXES
 
- * Handle multi-line quoted values in config files.
-   (Jelmer Vernooij, #495)
+  * Handle multi-line quoted values in config files.
+    (Jelmer Vernooij, #495)
 
- * Allow porcelain.clone of repository without HEAD.
-   (Jelmer Vernooij, #501)
+  * Allow porcelain.clone of repository without HEAD.
+    (Jelmer Vernooij, #501)
 
- * Support passing tag ids to Walker()'s include argument.
-   (Jelmer Vernooij)
+  * Support passing tag ids to Walker()'s include argument.
+    (Jelmer Vernooij)
 
- * Don't strip trailing newlines from extra headers.
-   (Nicolas Dandrimont)
+  * Don't strip trailing newlines from extra headers.
+    (Nicolas Dandrimont)
 
- * Set bufsize=0 for subprocess interaction with SSH client.
-   Fixes hangs on Python 3. (René Stern, #434)
+  * Set bufsize=0 for subprocess interaction with SSH client.
+    Fixes hangs on Python 3. (René Stern, #434)
 
- * Don't drop first slash for SSH paths, except for those
-   starting with "~". (Jelmer Vernooij, René Stern, #463)
+  * Don't drop first slash for SSH paths, except for those
+    starting with "~". (Jelmer Vernooij, René Stern, #463)
 
- * Properly log off after retrieving just refs.
-   (Jelmer Vernooij)
+  * Properly log off after retrieving just refs.
+    (Jelmer Vernooij)
 
 0.16.3	2016-01-14
 
@@ -505,51 +558,51 @@ API CHANGES
 
  IMPROVEMENTS
 
- * Add support for recursive add in 'git add'.
-   (Ryan Faulkner, Jelmer Vernooij)
+  * Add support for recursive add in 'git add'.
+    (Ryan Faulkner, Jelmer Vernooij)
 
- * Add porcelain 'list_tags'. (Ryan Faulkner)
+  * Add porcelain 'list_tags'. (Ryan Faulkner)
 
- * Add porcelain 'push'. (Ryan Faulkner)
+  * Add porcelain 'push'. (Ryan Faulkner)
 
- * Add porcelain 'pull'. (Ryan Faulkner)
+  * Add porcelain 'pull'. (Ryan Faulkner)
 
- * Support 'http.proxy' in HttpGitClient.
-   (Jelmer Vernooij, #1096030)
+  * Support 'http.proxy' in HttpGitClient.
+    (Jelmer Vernooij, #1096030)
 
- * Support 'http.useragent' in HttpGitClient.
-   (Jelmer Vernooij)
+  * Support 'http.useragent' in HttpGitClient.
+    (Jelmer Vernooij)
 
- * In server, wait for clients to send empty list of
-   wants when talking to empty repository.
-   (Damien Tournoud)
+  * In server, wait for clients to send empty list of
+    wants when talking to empty repository.
+    (Damien Tournoud)
 
- * Various changes to improve compatibility with
-   Python 3. (Gary van der Merwe)
+  * Various changes to improve compatibility with
+    Python 3. (Gary van der Merwe)
 
  BUG FIXES
 
- * Support unseekable 'wsgi.input' streams.
-   (Jonas Haag)
+  * Support unseekable 'wsgi.input' streams.
+    (Jonas Haag)
 
- * Raise TypeError when passing unicode() object
-   to Repo.__getitem__.
-   (Jonas Haag)
+  * Raise TypeError when passing unicode() object
+    to Repo.__getitem__.
+    (Jonas Haag)
 
- * Fix handling of `reset` command in dulwich.fastexport.
-   (Jelmer Vernooij, #1249029)
+  * Fix handling of `reset` command in dulwich.fastexport.
+    (Jelmer Vernooij, #1249029)
 
- * In client, don't wait for server to close connection
-   first. Fixes hang when used against GitHub
-   server implementation. (Siddharth Agarwal)
+  * In client, don't wait for server to close connection
+    first. Fixes hang when used against GitHub
+    server implementation. (Siddharth Agarwal)
 
- * DeltaChainIterator: fix a corner case where an object is inflated as an
-   object already in the repository.
-   (Damien Tournoud, #135)
+  * DeltaChainIterator: fix a corner case where an object is inflated as an
+    object already in the repository.
+    (Damien Tournoud, #135)
 
- * Stop leaking file handles during pack reload. (Damien Tournoud)
+  * Stop leaking file handles during pack reload. (Damien Tournoud)
 
- * Avoid reopening packs during pack cache reload. (Jelmer Vernooij)
+  * Avoid reopening packs during pack cache reload. (Jelmer Vernooij)
 
  API CHANGES
 
@@ -559,63 +612,63 @@ API CHANGES
 
  IMPROVEMENTS
 
- * Add porcelain 'tag'. (Ryan Faulkner)
+  * Add porcelain 'tag'. (Ryan Faulkner)
 
- * New module `dulwich.objectspec` for parsing strings referencing
-   objects and commit ranges. (Jelmer Vernooij)
+  * New module `dulwich.objectspec` for parsing strings referencing
+    objects and commit ranges. (Jelmer Vernooij)
 
- * Add shallow branch support. (milki)
+  * Add shallow branch support. (milki)
 
- * Allow passing urllib2 `opener` into HttpGitClient.
-   (Dov Feldstern, #909037)
+  * Allow passing urllib2 `opener` into HttpGitClient.
+    (Dov Feldstern, #909037)
 
  CHANGES
 
- * Drop support for Python 2.4 and 2.5. (Jelmer Vernooij)
+  * Drop support for Python 2.4 and 2.5. (Jelmer Vernooij)
 
  API CHANGES
 
- * Remove long deprecated ``Repo.commit``, ``Repo.get_blob``,
-   ``Repo.tree`` and ``Repo.tag``. (Jelmer Vernooij)
+  * Remove long deprecated ``Repo.commit``, ``Repo.get_blob``,
+    ``Repo.tree`` and ``Repo.tag``. (Jelmer Vernooij)
 
- * Remove long deprecated ``Repo.revision_history`` and ``Repo.ref``.
-   (Jelmer Vernooij)
+  * Remove long deprecated ``Repo.revision_history`` and ``Repo.ref``.
+    (Jelmer Vernooij)
 
- * Remove long deprecated ``Tree.entries``. (Jelmer Vernooij)
+  * Remove long deprecated ``Tree.entries``. (Jelmer Vernooij)
 
  BUG FIXES
 
- * Raise KeyError rather than TypeError when passing in
-   unicode object of length 20 or 40 to Repo.__getitem__.
-   (Jelmer Vernooij)
+  * Raise KeyError rather than TypeError when passing in
+    unicode object of length 20 or 40 to Repo.__getitem__.
+    (Jelmer Vernooij)
 
- * Use 'rm' rather than 'unlink' in tests, since the latter
-   does not exist on OpenBSD and other platforms.
-   (Dmitrij D. Czarkoff)
+  * Use 'rm' rather than 'unlink' in tests, since the latter
+    does not exist on OpenBSD and other platforms.
+    (Dmitrij D. Czarkoff)
 
 0.9.4	2013-11-30
 
  IMPROVEMENTS
 
- * Add ssh_kwargs attribute to ParamikoSSHVendor. (milki)
+  * Add ssh_kwargs attribute to ParamikoSSHVendor. (milki)
 
- * Add Repo.set_description(). (Víðir Valberg Guðmundsson)
+  * Add Repo.set_description(). (Víðir Valberg Guðmundsson)
 
- * Add a basic `dulwich.porcelain` module. (Jelmer Vernooij, Marcin Kuzminski)
+  * Add a basic `dulwich.porcelain` module. (Jelmer Vernooij, Marcin Kuzminski)
 
- * Various performance improvements for object access.
+  * Various performance improvements for object access.
    (Jelmer Vernooij)
 
- * New function `get_transport_and_path_from_url`,
-   similar to `get_transport_and_path` but only
-   supports URLs.
-   (Jelmer Vernooij)
+  * New function `get_transport_and_path_from_url`,
+    similar to `get_transport_and_path` but only
+    supports URLs.
+    (Jelmer Vernooij)
 
- * Add support for file:// URLs in `get_transport_and_path_from_url`.
-   (Jelmer Vernooij)
+  * Add support for file:// URLs in `get_transport_and_path_from_url`.
+    (Jelmer Vernooij)
 
- * Add LocalGitClient implementation.
-   (Jelmer Vernooij)
+  * Add LocalGitClient implementation.
+    (Jelmer Vernooij)
 
  BUG FIXES
 
@@ -919,11 +972,11 @@ FEATURES
   * Smart protocol clients can now change refs even if they are
     not uploading new data. (Jelmer Vernooij, #855993)
 
- * Don't compile C extensions when running in pypy.
-   (Ronny Pfannschmidt, #881546)
+  * Don't compile C extensions when running in pypy.
+    (Ronny Pfannschmidt, #881546)
 
- * Use different name for strnlen replacement function to avoid clashing
-   with system strnlen. (Jelmer Vernooij, #880362)
+  * Use different name for strnlen replacement function to avoid clashing
+    with system strnlen. (Jelmer Vernooij, #880362)
 
  API CHANGES
 
@@ -1521,4 +1574,4 @@ note: This list is most likely incomplete for 0.6.0.
 
 0.1.0	2009-01-24
 
- * Initial release.
+  * Initial release.

+ 5 - 4
PKG-INFO

@@ -1,9 +1,9 @@
 Metadata-Version: 1.1
 Name: dulwich
-Version: 0.17.3
+Version: 0.18.0
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
-Author: Jelmer Vernooij
+Author: UNKNOWN
 Author-email: jelmer@jelmer.uk
 License: Apachev2 or later or GPLv2
 Description: 
@@ -13,8 +13,8 @@ Description:
               All functionality is available in pure Python. Optional
               C extensions can be built for improved performance.
         
-              The project is named after the part of London that Mr. and Mrs. Git live in
-              in the particular Monty Python sketch.
+              The project is named after the part of London that Mr. and Mrs. Git live
+              in in the particular Monty Python sketch.
               
 Keywords: git
 Platform: UNKNOWN
@@ -28,4 +28,5 @@ Classifier: Programming Language :: Python :: 3.6
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
 Classifier: Operating System :: POSIX
+Classifier: Operating System :: Microsoft :: Windows
 Classifier: Topic :: Software Development :: Version Control

+ 38 - 0
README.md

@@ -1,4 +1,5 @@
 [![Build Status](https://travis-ci.org/jelmer/dulwich.png?branch=master)](https://travis-ci.org/jelmer/dulwich)
+[![Windows Build status](https://ci.appveyor.com/api/projects/status/cnothr6pxprfx2lf/branch/master?svg=true)](https://ci.appveyor.com/project/jelmer/dulwich-njb6g/branch/master)
 
 This is the Dulwich project.
 
@@ -27,6 +28,35 @@ or if you are installing from pip::
 
     $ pip install dulwich --global-option="--pure"
 
+Getting started
+---------------
+
+Dulwich comes with both a lower-level API and higher-level plumbing ("porcelain").
+
+For example, to use the lower level API to access the commit message of the
+last commit:
+
+    >>> from dulwich.repo import Repo
+    >>> r = Repo('.')
+    >>> r.head()
+    '57fbe010446356833a6ad1600059d80b1e731e15'
+    >>> c = r[r.head()]
+    >>> c
+    <Commit 015fc1267258458901a94d228e39f0a378370466>
+    >>> c.message
+    'Add note about encoding.\n'
+
+And to print it using porcelain:
+
+    >>> from dulwich import porcelain
+    >>> porcelain.log('.', max_entries=1)
+    --------------------------------------------------
+    commit: 57fbe010446356833a6ad1600059d80b1e731e15
+    Author: Jelmer Vernooij <jelmer@jelmer.uk>
+    Date:   Sat Apr 29 2017 23:57:34 +0000
+
+    Add note about encoding.
+
 Further documentation
 ---------------------
 
@@ -44,6 +74,14 @@ There is a *#dulwich* IRC channel on the [Freenode](https://www.freenode.net/),
 and [dulwich-discuss](https://groups.google.com/forum/#!forum/dulwich-discuss)
 mailing lists.
 
+Contributing
+------------
+
+For a full list of contributors, see the git logs or [AUTHORS](AUTHORS).
+
+If you'd like to contribute to Dulwich, see the [CONTRIBUTING](CONTRIBUTING.md)
+file and [list of open issues](https://github.com/jelmer/dulwich/issues).
+
 Supported versions of Python
 ----------------------------
 

+ 76 - 15
appveyor.yml

@@ -1,35 +1,96 @@
 environment:
+
   matrix:
+
     - PYTHON: "C:\\Python27"
-      PYWIN32_URL: "https://downloads.sourceforge.net/project/pywin32/pywin32/Build%20220/pywin32-220.win32-py2.7.exe"
+      PYTHON_VERSION: "2.7.x"
+      PYTHON_ARCH: "32"
 
-    - PYTHON: "C:\\Python34"
-      PYWIN32_URL: "https://downloads.sourceforge.net/project/pywin32/pywin32/Build%20220/pywin32-220.win32-py3.4.exe"
+    - PYTHON: "C:\\Python27-x64"
+      PYTHON_VERSION: "2.7.x"
+      PYTHON_ARCH: "64"
 
-    - PYTHON: "C:\\Python35"
-      PYWIN32_URL: "https://downloads.sourceforge.net/project/pywin32/pywin32/Build%20220/pywin32-220.win32-py3.5.exe"
+    - PYTHON: "C:\\Python33"
+      PYTHON_VERSION: "3.3.x"
+      PYTHON_ARCH: "32"
 
-    - PYTHON: "C:\\Python27-x64"
-      PYWIN32_URL: "https://downloads.sourceforge.net/project/pywin32/pywin32/Build%20220/pywin32-220.win-amd64-py2.7.exe"
+    - PYTHON: "C:\\Python33-x64"
+      PYTHON_VERSION: "3.3.x"
+      PYTHON_ARCH: "64"
+      DISTUTILS_USE_SDK: "1"
+
+    - PYTHON: "C:\\Python34"
+      PYTHON_VERSION: "3.4.x"
+      PYTHON_ARCH: "32"
 
     - PYTHON: "C:\\Python34-x64"
-      PYWIN32_URL: "https://downloads.sourceforge.net/project/pywin32/pywin32/Build%20220/pywin32-220.win-amd64-py3.4.exe"
+      PYTHON_VERSION: "3.4.x"
+      PYTHON_ARCH: "64"
+      DISTUTILS_USE_SDK: "1"
+
+    - PYTHON: "C:\\Python35"
+      PYTHON_VERSION: "3.5.x"
+      PYTHON_ARCH: "32"
 
     - PYTHON: "C:\\Python35-x64"
-      PYWIN32_URL: "https://downloads.sourceforge.net/project/pywin32/pywin32/Build%20220/pywin32-220.win-amd64-py3.5.exe"
+      PYTHON_VERSION: "3.5.x"
+      PYTHON_ARCH: "64"
+
+    - PYTHON: "C:\\Python36"
+      PYTHON_VERSION: "3.6.x"
+      PYTHON_ARCH: "32"
+
+    - PYTHON: "C:\\Python36-x64"
+      PYTHON_VERSION: "3.6.x"
+      PYTHON_ARCH: "64"
 
 install:
-  - ps: (new-object net.webclient).DownloadFile($env:PYWIN32_URL, 'c:\\pywin32.exe')
-  - "%PYTHON%/Scripts/easy_install.exe c:\\pywin32.exe"
-  - "%PYTHON%/Scripts/easy_install.exe wheel"
+  # If there is a newer build queued for the same PR, cancel this one.
+  # The AppVeyor 'rollout builds' option is supposed to serve the same
+  # purpose but it is problematic because it tends to cancel builds pushed
+  # directly to master instead of just PR builds (or the converse).
+  # credits: JuliaLang developers.
+  - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
+        https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
+        Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
+          throw "There are newer queued builds for this pull request, failing early." }
+  - ECHO "Filesystem root:"
+  - ps: "ls \"C:/\""
+
+  - ECHO "Installed SDKs:"
+  - ps: "ls \"C:/Program Files/Microsoft SDKs/Windows\""
+
+  # Install Python (from the official .msi of http://python.org) and pip when
+  # not already installed.
+  - ps: if (-not(Test-Path($env:PYTHON))) { & appveyor\install.ps1 }
+
+  # Prepend newly installed Python to the PATH of this build (this cannot be
+  # done from inside the powershell script as it would require to restart
+  # the parent CMD process).
+  - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
+
+  # Check that we have the expected version and architecture for Python
+  - "build.cmd %PYTHON%\\python.exe --version"
+  - "build.cmd %PYTHON%\\python.exe -c \"import struct; print(struct.calcsize('P') * 8)\""
+
+  # Install setuptools/wheel so that we can e.g. use bdist_wheel
+  - "pip install setuptools wheel"
+
+  - "build.cmd %PYTHON%\\python.exe setup.py develop"
 
-build: off
+build_script:
+  # Build the compiled extension
+  - "build.cmd %PYTHON%\\python.exe setup.py build"
 
 test_script:
-  - "%WITH_COMPILER% %PYTHON%/python setup.py test"
+  - "build.cmd %PYTHON%\\python.exe setup.py test"
 
 after_test:
-  - "%WITH_COMPILER% %PYTHON%/python setup.py bdist_wheel"
+  - "build.cmd %PYTHON%\\python.exe setup.py bdist_wheel"
+  # http://stackoverflow.com/questions/43255455/unicode-character-causing-error-with-bdist-wininst-on-python-3-but-not-python-2
+  # - "python setup.py bdist_wininst"
+  - "build.cmd %PYTHON%\\python.exe setup.py bdist_msi"
+  - ps: "ls dist"
 
 artifacts:
   - path: dist\*

+ 13 - 0
bin/dulwich

@@ -507,6 +507,18 @@ class cmd_remote(Command):
         return cmd_kls(args[1:])
 
 
+class cmd_check_ignore(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        options, args = parser.parse_args(args)
+        ret = 1
+        for path in porcelain.check_ignore('.', args):
+            print(path)
+            ret = 0
+        return ret
+
+
 class cmd_help(Command):
 
     def run(self, args):
@@ -532,6 +544,7 @@ For a list of supported commands, see 'dulwich help -a'.
 commands = {
     "add": cmd_add,
     "archive": cmd_archive,
+    "check-ignore": cmd_check_ignore,
     "clone": cmd_clone,
     "commit": cmd_commit,
     "commit-tree": cmd_commit_tree,

+ 47 - 44
docs/conf.py

@@ -3,7 +3,8 @@
 # dulwich documentation build configuration file, created by
 # sphinx-quickstart on Thu Feb 18 23:18:28 2010.
 #
-# This file is execfile()d with the current directory set to its containing dir.
+# This file is execfile()d with the current directory set to its containing
+# dir.
 #
 # Note that not all possible configuration values are present in this
 # autogenerated file.
@@ -11,7 +12,8 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys, os
+import os
+import sys
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
@@ -20,10 +22,10 @@ sys.path.insert(0, os.path.abspath('..'))
 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__))))
 dulwich = __import__('dulwich')
 
-# -- General configuration -----------------------------------------------------
+# -- General configuration ----------------------------------------------------
 
-# Add any Sphinx extension module names here, as strings. They can be extensions
-# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = ['sphinx.ext.autodoc']
 try:
     import rst2pdf
@@ -42,7 +44,7 @@ templates_path = ['templates']
 source_suffix = '.txt'
 
 # The encoding of source files.
-#source_encoding = 'utf-8'
+#         source_encoding = 'utf-8'
 
 # The master toctree document.
 master_doc = 'index'
@@ -62,72 +64,73 @@ release = '.'.join(map(str, dulwich.__version__))
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
-#language = None
+# language = None
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of documents that shouldn't be included in the build.
-#unused_docs = []
+# unused_docs = []
 
 # List of directories, relative to source directory, that shouldn't be searched
 # for source files.
 exclude_trees = ['build']
 
-# The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 
-# -- Options for HTML output ---------------------------------------------------
+# -- Options for HTML output -------------------------------------------------
 
 # The theme to use for HTML and HTML Help pages.  Major themes that come with
 # Sphinx are currently 'default' and 'sphinxdoc'.
-#html_theme = 'default'
+# html_theme = 'default'
 html_theme = 'nature'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+# html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
 html_theme_path = ['theme']
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+# html_logo = None
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -136,53 +139,54 @@ html_static_path = []
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_use_modindex = True
+# html_use_modindex = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = ''
+# html_file_suffix = ''
 
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'dulwichdoc'
 
 
-# -- Options for LaTeX output --------------------------------------------------
+# -- Options for LaTeX output ------------------------------------------------
 
 # The paper size ('letter' or 'a4').
-#latex_paper_size = 'letter'
+# latex_paper_size = 'letter'
 
 # The font size ('10pt', '11pt' or '12pt').
-#latex_font_size = '10pt'
+# latex_font_size = '10pt'
 
 # Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title, author, documentclass [howto/manual]).
+# (source start file, target name, title, author, documentclass
+# [howto/manual]).
 latex_documents = [
   ('index', 'dulwich.tex', u'dulwich Documentation',
    u'Jelmer Vernooij', 'manual'),
@@ -190,26 +194,25 @@ latex_documents = [
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # Additional stuff for the LaTeX preamble.
-#latex_preamble = ''
+# latex_preamble = ''
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_use_modindex = True
+# latex_use_modindex = True
 
 pdf_documents = [
     ('index', u'dulwich', u'Documentation for dulwich',
         u'Jelmer Vernooij'),
 ]
-pdf_stylesheets = ['sphinx','kerning','a4']
+pdf_stylesheets = ['sphinx', 'kerning', 'a4']
 pdf_break_level = 2
 pdf_inline_footnotes = True
-

+ 2 - 2
docs/tutorial/file-format.txt

@@ -33,7 +33,7 @@ A commit file looks like this::
   [parent <parent sha> if several parents from merges]
   author <author name> <author e-mail> <timestamp> <timezone>
   committer <author name> <author e-mail> <timestamp> <timezone>
- 
+
   <commit message>
 
 But where are the changes you committed? The commit contains a reference to a
@@ -90,7 +90,7 @@ used for the history. They can either appear as simple files on disk (one file
 per object) or in a ``pack`` file, which is a container for a number of these
 objects.
 
-The is also an index of the current state of the working copy in the
+There is also an index of the current state of the working copy in the
 repository as well as files to track the existing branches and tags.
 
 For a more detailed explanation of object formats and SHA-1 digests, see:

+ 6 - 0
docs/tutorial/porcelain.txt

@@ -32,3 +32,9 @@ Commit changes
   >>> open("testrepo/testfile", "w").write("data")
   >>> porcelain.add(r, "testfile")
   >>> porcelain.commit(r, b"A sample commit")
+
+Push changes
+------------
+
+  >>> tr = porcelain.init("targetrepo")
+  >>> r = porcelain.push("testrepo", "targetrepo", "master")

+ 1 - 1
docs/tutorial/remote.txt

@@ -28,7 +28,7 @@ The smart server protocol can be accessed over either plain TCP (git://),
 SSH (git+ssh://) or tunneled over HTTP (http://).
 
 Dulwich provides support for accessing remote repositories in
-``dulwich.client``. To create a new client, you can either construct
+``dulwich.client``. To create a new client, you can construct
 one manually::
 
    >>> from dulwich.client import TCPGitClient

+ 5 - 4
dulwich.egg-info/PKG-INFO

@@ -1,9 +1,9 @@
 Metadata-Version: 1.1
 Name: dulwich
-Version: 0.17.3
+Version: 0.18.0
 Summary: Python Git Library
 Home-page: https://www.dulwich.io/
-Author: Jelmer Vernooij
+Author: UNKNOWN
 Author-email: jelmer@jelmer.uk
 License: Apachev2 or later or GPLv2
 Description: 
@@ -13,8 +13,8 @@ Description:
               All functionality is available in pure Python. Optional
               C extensions can be built for improved performance.
         
-              The project is named after the part of London that Mr. and Mrs. Git live in
-              in the particular Monty Python sketch.
+              The project is named after the part of London that Mr. and Mrs. Git live
+              in in the particular Monty Python sketch.
               
 Keywords: git
 Platform: UNKNOWN
@@ -28,4 +28,5 @@ Classifier: Programming Language :: Python :: 3.6
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
 Classifier: Operating System :: POSIX
+Classifier: Operating System :: Microsoft :: Windows
 Classifier: Topic :: Software Development :: Version Control

+ 2 - 0
dulwich.egg-info/SOURCES.txt

@@ -47,6 +47,7 @@ dulwich/fastexport.py
 dulwich/file.py
 dulwich/greenthreads.py
 dulwich/hooks.py
+dulwich/ignore.py
 dulwich/index.py
 dulwich/log_utils.py
 dulwich/lru_cache.py
@@ -86,6 +87,7 @@ dulwich/tests/test_file.py
 dulwich/tests/test_grafts.py
 dulwich/tests/test_greenthreads.py
 dulwich/tests/test_hooks.py
+dulwich/tests/test_ignore.py
 dulwich/tests/test_index.py
 dulwich/tests/test_lru_cache.py
 dulwich/tests/test_missing_obj_finder.py

+ 1 - 1
dulwich/__init__.py

@@ -22,4 +22,4 @@
 
 """Python implementation of the Git file formats and protocols."""
 
-__version__ = (0, 17, 3)
+__version__ = (0, 18, 0)

+ 4 - 8
dulwich/_diff_tree.c

@@ -25,12 +25,8 @@
 typedef unsigned short mode_t;
 #endif
 
-#if (PY_VERSION_HEX < 0x02050000)
-typedef int Py_ssize_t;
-#endif
-
-#if (PY_VERSION_HEX < 0x02060000)
-#define Py_SIZE(ob)             (((PyVarObject*)(ob))->ob_size)
+#if PY_MAJOR_VERSION < 3
+typedef long Py_hash_t;
 #endif
 
 #if PY_MAJOR_VERSION >= 3
@@ -300,11 +296,11 @@ static PyObject *py_is_tree(PyObject *self, PyObject *args)
 	return result;
 }
 
-static int add_hash(PyObject *get, PyObject *set, char *str, int n)
+static Py_hash_t add_hash(PyObject *get, PyObject *set, char *str, int n)
 {
 	PyObject *str_obj = NULL, *hash_obj = NULL, *value = NULL,
 		*set_value = NULL;
-	long hash;
+	Py_hash_t hash;
 
 	/* It would be nice to hash without copying str into a PyString, but that
 	 * isn't exposed by the API. */

+ 6 - 8
dulwich/_objects.c

@@ -22,10 +22,6 @@
 #include <stdlib.h>
 #include <sys/stat.h>
 
-#if (PY_VERSION_HEX < 0x02050000)
-typedef int Py_ssize_t;
-#endif
-
 #if PY_MAJOR_VERSION >= 3
 #define PyInt_Check(obj) 0
 #define PyInt_CheckExact(obj) 0
@@ -65,7 +61,8 @@ static PyObject *sha_to_pyhex(const unsigned char *sha)
 static PyObject *py_parse_tree(PyObject *self, PyObject *args, PyObject *kw)
 {
 	char *text, *start, *end;
-	int len, namelen, strict;
+	int len, strict;
+	size_t namelen;
 	PyObject *ret, *item, *name, *sha, *py_strict = NULL;
 	static char *kwlist[] = {"text", "strict", NULL};
 
@@ -147,7 +144,8 @@ int cmp_tree_item(const void *_a, const void *_b)
 {
 	const struct tree_item *a = _a, *b = _b;
 	const char *remain_a, *remain_b;
-	int ret, common;
+	int ret;
+	size_t common;
 	if (strlen(a->name) > strlen(b->name)) {
 		common = strlen(b->name);
 		remain_a = a->name + common;
@@ -175,9 +173,9 @@ int cmp_tree_item_name_order(const void *_a, const void *_b) {
 static PyObject *py_sorted_tree_items(PyObject *self, PyObject *args)
 {
 	struct tree_item *qsort_entries = NULL;
-	int name_order, num_entries, n = 0, i;
+	int name_order, n = 0, i;
 	PyObject *entries, *py_name_order, *ret, *key, *value, *py_mode, *py_sha;
-	Py_ssize_t pos = 0;
+	Py_ssize_t pos = 0, num_entries;
 	int (*cmp)(const void *, const void *);
 
 	if (!PyArg_ParseTuple(args, "OO", &entries, &py_name_order))

+ 12 - 9
dulwich/_pack.c

@@ -264,15 +264,6 @@ moduleinit(void)
 	PyObject *m;
 	PyObject *errors_module;
 
-	errors_module = PyImport_ImportModule("dulwich.errors");
-	if (errors_module == NULL)
-		return NULL;
-
-	PyExc_ApplyDeltaError = PyObject_GetAttrString(errors_module, "ApplyDeltaError");
-	Py_DECREF(errors_module);
-	if (PyExc_ApplyDeltaError == NULL)
-		return NULL;
-
 #if PY_MAJOR_VERSION >= 3
 	static struct PyModuleDef moduledef = {
 	  PyModuleDef_HEAD_INIT,
@@ -285,6 +276,18 @@ moduleinit(void)
 	  NULL,            /* m_clear*/
 	  NULL,            /* m_free */
 	};
+#endif
+
+	errors_module = PyImport_ImportModule("dulwich.errors");
+	if (errors_module == NULL)
+		return NULL;
+
+	PyExc_ApplyDeltaError = PyObject_GetAttrString(errors_module, "ApplyDeltaError");
+	Py_DECREF(errors_module);
+	if (PyExc_ApplyDeltaError == NULL)
+		return NULL;
+
+#if PY_MAJOR_VERSION >= 3
 	m = PyModule_Create(&moduledef);
 #else
 	m = Py_InitModule3("_pack", py_pack_methods, NULL);

+ 8 - 4
dulwich/archive.py

@@ -34,9 +34,11 @@ class ChunkedBytesIO(object):
     """Turn a list of bytestrings into a file-like object.
 
     This is similar to creating a `BytesIO` from a concatenation of the
-    bytestring list, but saves memory by NOT creating one giant bytestring first::
+    bytestring list, but saves memory by NOT creating one giant bytestring
+    first::
 
-        BytesIO(b''.join(list_of_bytestrings)) =~= ChunkedBytesIO(list_of_bytestrings)
+        BytesIO(b''.join(list_of_bytestrings)) =~= ChunkedBytesIO(
+            list_of_bytestrings)
     """
     def __init__(self, contents):
         self.contents = contents
@@ -84,12 +86,14 @@ def tar_stream(store, tree, mtime, format=''):
             try:
                 blob = store[entry.sha]
             except KeyError:
-                # Entry probably refers to a submodule, which we don't yet support.
+                # Entry probably refers to a submodule, which we don't yet
+                # support.
                 continue
             data = ChunkedBytesIO(blob.chunked)
 
             info = tarfile.TarInfo()
-            info.name = entry_abspath.decode('ascii') # tarfile only works with ascii.
+            # tarfile only works with ascii.
+            info.name = entry_abspath.decode('ascii')
             info.size = blob.raw_length()
             info.mode = entry.mode
             info.mtime = mtime

+ 125 - 66
dulwich/client.py

@@ -104,6 +104,19 @@ def _fileno_can_read(fileno):
     return len(select.select([fileno], [], [], 0)[0]) > 0
 
 
+def _win32_peek_avail(handle):
+    """Wrapper around PeekNamedPipe to check how many bytes are available."""
+    from ctypes import byref, wintypes, windll
+    c_avail = wintypes.DWORD()
+    c_message = wintypes.DWORD()
+    success = windll.kernel32.PeekNamedPipe(
+        handle, None, 0, None, byref(c_avail),
+        byref(c_message))
+    if not success:
+        raise OSError(wintypes.GetLastError())
+    return c_avail.value
+
+
 COMMON_CAPABILITIES = [CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K]
 FETCH_CAPABILITIES = ([CAPABILITY_THIN_PACK, CAPABILITY_MULTI_ACK,
                        CAPABILITY_MULTI_ACK_DETAILED] +
@@ -146,7 +159,8 @@ class ReportStatusParser(object):
                 ref_status[ref] = status
             # TODO(jelmer): don't assume encoding of refs is ascii.
             raise UpdateRefsError(', '.join([
-                ref.decode('ascii') for ref in ref_status if ref not in ok]) +
+                refname.decode('ascii') for refname in ref_status
+                if refname not in ok]) +
                 ' failed to update', ref_status=ref_status)
 
     def handle_packet(self, pkt):
@@ -231,11 +245,14 @@ class GitClient(object):
         """
         raise NotImplementedError(cls.from_parsedurl)
 
-    def send_pack(self, path, determine_wants, generate_pack_contents,
+    def send_pack(self, path, update_refs, generate_pack_contents,
                   progress=None, write_pack=write_pack_objects):
         """Upload a pack to a remote repository.
 
         :param path: Repository path (as bytestring)
+        :param update_refs: Function to determine changes to remote refs.
+            Receive dict with existing remote refs, returns dict with
+            changed refs (name -> sha, where sha=ZERO_SHA for deletions)
         :param generate_pack_contents: Function that can return a sequence of
             the shas of the objects to upload.
         :param progress: Optional progress function
@@ -256,7 +273,8 @@ class GitClient(object):
         :param path: Path to fetch from (as bytestring)
         :param target: Target repository to fetch into
         :param determine_wants: Optional function to determine what refs
-            to fetch
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch. Defaults to all shas.
         :param progress: Optional progress function
         :return: Dictionary with all remote refs (not just those fetched)
         """
@@ -266,10 +284,12 @@ class GitClient(object):
             # TODO(jelmer): Avoid reading entire file into memory and
             # only processing it after the whole file has been fetched.
             f = BytesIO()
+
             def commit():
                 if f.tell():
                     f.seek(0)
                     target.object_store.add_thin_pack(f.read, None)
+
             def abort():
                 pass
         else:
@@ -289,7 +309,10 @@ class GitClient(object):
                    progress=None):
         """Retrieve a pack from a git smart server.
 
-        :param determine_wants: Callback that returns list of commits to fetch
+        :param path: Remote path to fetch from
+        :param determine_wants: Function determine what refs
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch.
         :param graph_walker: Object with next() and ack().
         :param pack_data: Callback called for each bit of data in the pack
         :param progress: Callback for progress reports (strings)
@@ -337,10 +360,9 @@ class GitClient(object):
                 else:
                     ok.add(ref)
                 ref_status[ref] = status
-            raise UpdateRefsError(', '.join([ref for ref in ref_status
-                                             if ref not in ok]) +
-                                             b' failed to update',
-                                  ref_status=ref_status)
+            raise UpdateRefsError(', '.join([
+                refname for refname in ref_status if refname not in ok]) +
+                b' failed to update', ref_status=ref_status)
 
     def _read_side_band64k_data(self, proto, channel_callbacks):
         """Read per-channel data.
@@ -382,15 +404,16 @@ class GitClient(object):
             old_sha1 = old_refs.get(refname, ZERO_SHA)
             if not isinstance(old_sha1, bytes):
                 raise TypeError('old sha1 for %s is not a bytestring: %r' %
-                        (refname, old_sha1))
+                                (refname, old_sha1))
             new_sha1 = new_refs.get(refname, ZERO_SHA)
             if not isinstance(new_sha1, bytes):
                 raise TypeError('old sha1 for %s is not a bytestring %r' %
-                        (refname, new_sha1))
+                                (refname, new_sha1))
 
             if old_sha1 != new_sha1:
                 if sent_capabilities:
-                    proto.write_pkt_line(old_sha1 + b' ' + new_sha1 + b' ' + refname)
+                    proto.write_pkt_line(old_sha1 + b' ' + new_sha1 + b' ' +
+                                         refname)
                 else:
                     proto.write_pkt_line(
                         old_sha1 + b' ' + new_sha1 + b' ' + refname + b'\0' +
@@ -410,7 +433,8 @@ class GitClient(object):
         """
         if b"side-band-64k" in capabilities:
             if progress is None:
-                progress = lambda x: None
+                def progress(x):
+                    pass
             channel_callbacks = {2: progress}
             if CAPABILITY_REPORT_STATUS in capabilities:
                 channel_callbacks[1] = PktLineParser(
@@ -435,7 +459,8 @@ class GitClient(object):
             whether there is extra graph data to read on proto
         """
         assert isinstance(wants, list) and isinstance(wants[0], bytes)
-        proto.write_pkt_line(COMMAND_WANT + b' ' + wants[0] + b' ' + b' '.join(capabilities) + b'\n')
+        proto.write_pkt_line(COMMAND_WANT + b' ' + wants[0] + b' ' +
+                             b' '.join(capabilities) + b'\n')
         for want in wants[1:]:
             proto.write_pkt_line(COMMAND_WANT + b' ' + want + b'\n')
         proto.write_pkt_line(None)
@@ -481,7 +506,9 @@ class GitClient(object):
         if CAPABILITY_SIDE_BAND_64K in capabilities:
             if progress is None:
                 # Just ignore progress data
-                progress = lambda x: None
+
+                def progress(x):
+                    pass
             self._read_side_band64k_data(proto, {
                 SIDE_BAND_CHANNEL_DATA: pack_data,
                 SIDE_BAND_CHANNEL_PROGRESS: progress}
@@ -517,11 +544,14 @@ class TraditionalGitClient(GitClient):
         """
         raise NotImplementedError()
 
-    def send_pack(self, path, determine_wants, generate_pack_contents,
+    def send_pack(self, path, update_refs, generate_pack_contents,
                   progress=None, write_pack=write_pack_objects):
         """Upload a pack to a remote repository.
 
         :param path: Repository path (as bytestring)
+        :param update_refs: Function to determine changes to remote refs.
+            Receive dict with existing remote refs, returns dict with
+            changed refs (name -> sha, where sha=ZERO_SHA for deletions)
         :param generate_pack_contents: Function that can return a sequence of
             the shas of the objects to upload.
         :param progress: Optional callback called with progress updates
@@ -537,26 +567,28 @@ class TraditionalGitClient(GitClient):
         proto, unused_can_read = self._connect(b'receive-pack', path)
         with proto:
             old_refs, server_capabilities = read_pkt_refs(proto)
-            negotiated_capabilities = self._send_capabilities & server_capabilities
+            negotiated_capabilities = (
+                self._send_capabilities & server_capabilities)
 
             if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
                 self._report_status_parser = ReportStatusParser()
             report_status_parser = self._report_status_parser
 
             try:
-                new_refs = orig_new_refs = determine_wants(dict(old_refs))
+                new_refs = orig_new_refs = update_refs(dict(old_refs))
             except:
                 proto.write_pkt_line(None)
                 raise
 
-            if not CAPABILITY_DELETE_REFS in server_capabilities:
+            if CAPABILITY_DELETE_REFS not in server_capabilities:
                 # Server does not support deletions. Fail later.
                 new_refs = dict(orig_new_refs)
                 for ref, sha in orig_new_refs.items():
                     if sha == ZERO_SHA:
                         if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
                             report_status_parser._ref_statuses.append(
-                                b'ng ' + sha + b' remote does not support deleting refs')
+                                b'ng ' + sha +
+                                b' remote does not support deleting refs')
                             report_status_parser._ref_status_ok = False
                         del new_refs[ref]
 
@@ -573,7 +605,8 @@ class TraditionalGitClient(GitClient):
 
             (have, want) = self._handle_receive_pack_head(
                 proto, negotiated_capabilities, old_refs, new_refs)
-            if not want and set(new_refs.items()).issubset(set(old_refs.items())):
+            if (not want and
+                    set(new_refs.items()).issubset(set(old_refs.items()))):
                 return new_refs
             objects = generate_pack_contents(have, want)
 
@@ -592,7 +625,10 @@ class TraditionalGitClient(GitClient):
                    progress=None):
         """Retrieve a pack from a git smart server.
 
-        :param determine_wants: Callback that returns list of commits to fetch
+        :param path: Remote path to fetch from
+        :param determine_wants: Function determine what refs
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch.
         :param graph_walker: Object with next() and ack().
         :param pack_data: Callback called for each bit of data in the pack
         :param progress: Callback for progress reports (strings)
@@ -621,7 +657,8 @@ class TraditionalGitClient(GitClient):
             self._handle_upload_pack_head(
                 proto, negotiated_capabilities, graph_walker, wants, can_read)
             self._handle_upload_pack_tail(
-                proto, negotiated_capabilities, graph_walker, pack_data, progress)
+                proto, negotiated_capabilities, graph_walker, pack_data,
+                progress)
             return refs
 
     def get_refs(self, path):
@@ -702,6 +739,7 @@ class TCPGitClient(TraditionalGitClient):
         rfile = s.makefile('rb', -1)
         # 0 means unbuffered
         wfile = s.makefile('wb', 0)
+
         def close():
             rfile.close()
             wfile.close()
@@ -712,7 +750,8 @@ class TCPGitClient(TraditionalGitClient):
         if path.startswith(b"/~"):
             path = path[1:]
         # TODO(jelmer): Alternative to ascii?
-        proto.send_cmd(b'git-' + cmd, path, b'host=' + self._host.encode('ascii'))
+        proto.send_cmd(
+            b'git-' + cmd, path, b'host=' + self._host.encode('ascii'))
         return proto, lambda: _fileno_can_read(s)
 
 
@@ -730,10 +769,8 @@ class SubprocessWrapper(object):
     def can_read(self):
         if sys.platform == 'win32':
             from msvcrt import get_osfhandle
-            from win32pipe import PeekNamedPipe
             handle = get_osfhandle(self.proc.stdout.fileno())
-            data, total_bytes_avail, msg_bytes_left = PeekNamedPipe(handle, 0)
-            return total_bytes_avail != 0
+            return _win32_peek_avail(handle) != 0
         else:
             return _fileno_can_read(self.proc.stdout.fileno())
 
@@ -748,10 +785,10 @@ class SubprocessWrapper(object):
 def find_git_command():
     """Find command to run for system Git (usually C Git).
     """
-    if sys.platform == 'win32': # support .exe, .bat and .cmd
-        try: # to avoid overhead
+    if sys.platform == 'win32':  # support .exe, .bat and .cmd
+        try:  # to avoid overhead
             import win32api
-        except ImportError: # run through cmd.exe with some overhead
+        except ImportError:  # run through cmd.exe with some overhead
             return ['cmd', '/c', 'git']
         else:
             status, git = win32api.FindExecutable('git')
@@ -780,8 +817,8 @@ class SubprocessGitClient(TraditionalGitClient):
     def _connect(self, service, path):
         if not isinstance(service, bytes):
             raise TypeError(service)
-        if not isinstance(path, bytes):
-            path = path.encode(self._remote_path_encoding)
+        if isinstance(path, bytes):
+            path = path.decode(self._remote_path_encoding)
         if self.git_command is None:
             git_command = find_git_command()
         argv = git_command + [service.decode('ascii'), path]
@@ -820,11 +857,14 @@ class LocalGitClient(GitClient):
             path = path.decode(sys.getfilesystemencoding())
         return closing(Repo(path))
 
-    def send_pack(self, path, determine_wants, generate_pack_contents,
+    def send_pack(self, path, update_refs, generate_pack_contents,
                   progress=None, write_pack=write_pack_objects):
         """Upload a pack to a remote repository.
 
         :param path: Repository path (as bytestring)
+        :param update_refs: Function to determine changes to remote refs.
+            Receive dict with existing remote refs, returns dict with
+            changed refs (name -> sha, where sha=ZERO_SHA for deletions)
         :param generate_pack_contents: Function that can return a sequence of
             the shas of the objects to upload.
         :param progress: Optional progress function
@@ -838,19 +878,23 @@ class LocalGitClient(GitClient):
             {refname: new_ref}, including deleted refs.
         """
         if not progress:
-            progress = lambda x: None
+            def progress(x):
+                pass
 
-        with self._open_repo(path)  as target:
+        with self._open_repo(path) as target:
             old_refs = target.get_refs()
-            new_refs = determine_wants(dict(old_refs))
+            new_refs = update_refs(dict(old_refs))
 
             have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
             want = []
             for refname, new_sha1 in new_refs.items():
-                if new_sha1 not in have and not new_sha1 in want and new_sha1 != ZERO_SHA:
+                if (new_sha1 not in have and
+                        new_sha1 not in want and
+                        new_sha1 != ZERO_SHA):
                     want.append(new_sha1)
 
-            if not want and set(new_refs.items()).issubset(set(old_refs.items())):
+            if (not want and
+                    set(new_refs.items()).issubset(set(old_refs.items()))):
                 return new_refs
 
             target.object_store.add_objects(generate_pack_contents(have, want))
@@ -858,8 +902,10 @@ class LocalGitClient(GitClient):
             for refname, new_sha1 in new_refs.items():
                 old_sha1 = old_refs.get(refname, ZERO_SHA)
                 if new_sha1 != ZERO_SHA:
-                    if not target.refs.set_if_equals(refname, old_sha1, new_sha1):
-                        progress('unable to set %s to %s' % (refname, new_sha1))
+                    if not target.refs.set_if_equals(
+                            refname, old_sha1, new_sha1):
+                        progress('unable to set %s to %s' %
+                                 (refname, new_sha1))
                 else:
                     if not target.refs.remove_if_equals(refname, old_sha1):
                         progress('unable to remove %s' % refname)
@@ -871,8 +917,9 @@ class LocalGitClient(GitClient):
 
         :param path: Path to fetch from (as bytestring)
         :param target: Target repository to fetch into
-        :param determine_wants: Optional function to determine what refs
-            to fetch
+        :param determine_wants: Optional function determine what refs
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch. Defaults to all shas.
         :param progress: Optional progress function
         :return: Dictionary with all remote refs (not just those fetched)
         """
@@ -884,17 +931,21 @@ class LocalGitClient(GitClient):
                    progress=None):
         """Retrieve a pack from a git smart server.
 
-        :param determine_wants: Callback that returns list of commits to fetch
+        :param path: Remote path to fetch from
+        :param determine_wants: Function determine what refs
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch.
         :param graph_walker: Object with next() and ack().
         :param pack_data: Callback called for each bit of data in the pack
         :param progress: Callback for progress reports (strings)
         :return: Dictionary with all remote refs (not just those fetched)
         """
         with self._open_repo(path) as r:
-            objects_iter = r.fetch_objects(determine_wants, graph_walker, progress)
+            objects_iter = r.fetch_objects(
+                determine_wants, graph_walker, progress)
 
-            # Did the process short-circuit (e.g. in a stateless RPC call)? Note
-            # that the client still expects a 0-object pack in most cases.
+            # Did the process short-circuit (e.g. in a stateless RPC call)?
+            # Note that the client still expects a 0-object pack in most cases.
             if objects_iter is None:
                 return
             write_pack_objects(ProtocolFile(None, pack_data), objects_iter)
@@ -940,10 +991,7 @@ class SubprocessSSHVendor(SSHVendor):
     """SSH vendor that shells out to the local 'ssh' command."""
 
     def run_command(self, host, command, username=None, port=None):
-        if not isinstance(command, bytes):
-            raise TypeError(command)
-
-        #FIXME: This has no way to deal with passwords..
+        # FIXME: This has no way to deal with passwords..
         args = ['ssh', '-x']
         if port is not None:
             args.extend(['-p', str(port)])
@@ -1005,11 +1053,12 @@ class SSHGitClient(TraditionalGitClient):
     def _connect(self, cmd, path):
         if not isinstance(cmd, bytes):
             raise TypeError(cmd)
-        if not isinstance(path, bytes):
-            path = path.encode(self._remote_path_encoding)
-        if path.startswith(b"/~"):
+        if isinstance(path, bytes):
+            path = path.decode(self._remote_path_encoding)
+        if path.startswith("/~"):
             path = path[1:]
-        argv = self._get_cmd_path(cmd) + b" '" + path + b"'"
+        argv = (self._get_cmd_path(cmd).decode(self._remote_path_encoding) +
+                " '" + path + "'")
         con = self.ssh_vendor.run_command(
             self.host, argv, port=self.port, username=self.username)
         return (Protocol(con.read, con.write, con.close,
@@ -1076,9 +1125,15 @@ class HttpGitClient(GitClient):
                    password=password, username=username, **kwargs)
 
     def __repr__(self):
-        return "%s(%r, dumb=%r)" % (type(self).__name__, self._base_url, self.dumb)
+        return "%s(%r, dumb=%r)" % (
+            type(self).__name__, self._base_url, self.dumb)
 
     def _get_url(self, path):
+        if not isinstance(path, str):
+            # TODO(jelmer): this is unrelated to the local filesystem;
+            # This is not necessarily the right encoding to decode the path
+            # with.
+            path = path.decode(sys.getfilesystemencoding())
         return urlparse.urljoin(self._base_url, path).rstrip("/") + "/"
 
     def _http_request(self, url, headers={}, data=None):
@@ -1138,14 +1193,17 @@ class HttpGitClient(GitClient):
         if content_type != (
                 "application/x-%s-result" % service):
             raise GitProtocolError("Invalid content-type from server: %s"
-                % content_type)
+                                   % content_type)
         return resp
 
-    def send_pack(self, path, determine_wants, generate_pack_contents,
+    def send_pack(self, path, update_refs, generate_pack_contents,
                   progress=None, write_pack=write_pack_objects):
         """Upload a pack to a remote repository.
 
         :param path: Repository path (as bytestring)
+        :param update_refs: Function to determine changes to remote refs.
+            Receive dict with existing remote refs, returns dict with
+            changed refs (name -> sha, where sha=ZERO_SHA for deletions)
         :param generate_pack_contents: Function that can return a sequence of
             the shas of the objects to upload.
         :param progress: Optional progress function
@@ -1166,7 +1224,7 @@ class HttpGitClient(GitClient):
         if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
             self._report_status_parser = ReportStatusParser()
 
-        new_refs = determine_wants(dict(old_refs))
+        new_refs = update_refs(dict(old_refs))
         if new_refs is None:
             # Determine wants function is aborting the push.
             return old_refs
@@ -1185,13 +1243,12 @@ class HttpGitClient(GitClient):
                                    data=req_data.getvalue())
         try:
             resp_proto = Protocol(resp.read, None)
-            self._handle_receive_pack_tail(resp_proto, negotiated_capabilities,
-                progress)
+            self._handle_receive_pack_tail(
+                resp_proto, negotiated_capabilities, progress)
             return new_refs
         finally:
             resp.close()
 
-
     def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
                    progress=None):
         """Retrieve a pack from a git smart server.
@@ -1205,7 +1262,8 @@ class HttpGitClient(GitClient):
         url = self._get_url(path)
         refs, server_capabilities = self._discover_references(
             b"git-upload-pack", url)
-        negotiated_capabilities = self._fetch_capabilities & server_capabilities
+        negotiated_capabilities = (
+            self._fetch_capabilities & server_capabilities)
         wants = determine_wants(refs)
         if wants is not None:
             wants = [cid for cid in wants if cid != ZERO_SHA]
@@ -1216,14 +1274,15 @@ class HttpGitClient(GitClient):
         req_data = BytesIO()
         req_proto = Protocol(None, req_data.write)
         self._handle_upload_pack_head(
-            req_proto, negotiated_capabilities, graph_walker, wants,
-            lambda: False)
+                req_proto, negotiated_capabilities, graph_walker, wants,
+                lambda: False)
         resp = self._smart_request(
             "git-upload-pack", url, data=req_data.getvalue())
         try:
             resp_proto = Protocol(resp.read, None)
-            self._handle_upload_pack_tail(resp_proto, negotiated_capabilities,
-                graph_walker, pack_data, progress)
+            self._handle_upload_pack_tail(
+                resp_proto, negotiated_capabilities, graph_walker, pack_data,
+                progress)
             return refs
         finally:
             resp.close()
@@ -1283,7 +1342,7 @@ def get_transport_and_path(location, **kwargs):
         # Windows local path
         return default_local_git_client_cls(**kwargs), location
 
-    if ':' in location and not '@' in location:
+    if ':' in location and '@' not in location:
         # SSH with no user@, zero or one leading slash.
         (hostname, path) = location.split(':', 1)
         return SSHGitClient(hostname, **kwargs), path

+ 18 - 11
dulwich/config.py

@@ -172,12 +172,13 @@ class ConfigDict(Config, MutableMapping):
 
 def _format_string(value):
     if (value.startswith(b" ") or
-        value.startswith(b"\t") or
-        value.endswith(b" ") or
-        b'#' in value or
-        value.endswith(b"\t")):
+            value.startswith(b"\t") or
+            value.endswith(b" ") or
+            b'#' in value or
+            value.endswith(b"\t")):
         return b'"' + _escape_value(value) + b'"'
-    return _escape_value(value)
+    else:
+        return _escape_value(value)
 
 
 _ESCAPE_TABLE = {
@@ -190,6 +191,7 @@ _ESCAPE_TABLE = {
 _COMMENT_CHARS = [ord(b"#"), ord(b";")]
 _WHITESPACE_CHARS = [ord(b"\t"), ord(b" ")]
 
+
 def _parse_string(value):
     value = bytearray(value.strip())
     ret = bytearray()
@@ -208,8 +210,8 @@ def _parse_string(value):
                     (value, i))
             except KeyError:
                 raise ValueError(
-                    "escape character followed by unknown character %s at %d in %r" %
-                    (value[i], i, value))
+                    "escape character followed by unknown character "
+                    "%s at %d in %r" % (value[i], i, value))
             if whitespace:
                 ret.extend(whitespace)
                 whitespace = bytearray()
@@ -236,7 +238,11 @@ def _parse_string(value):
 
 def _escape_value(value):
     """Escape a value."""
-    return value.replace(b"\\", b"\\\\").replace(b"\n", b"\\n").replace(b"\t", b"\\t").replace(b"\"", b"\\\"")
+    value = value.replace(b"\\", b"\\\\")
+    value = value.replace(b"\n", b"\\n")
+    value = value.replace(b"\t", b"\\t")
+    value = value.replace(b"\"", b"\\\"")
+    return value
 
 
 def _check_variable_name(name):
@@ -295,8 +301,8 @@ class ConfigFile(ConfigDict):
                         section = (pts[0], pts[1])
                     else:
                         if not _check_section_name(pts[0]):
-                            raise ValueError("invalid section name %r" %
-                                    pts[0])
+                            raise ValueError(
+                                "invalid section name %r" % pts[0])
                         pts = pts[0].split(b".", 1)
                         if len(pts) == 2:
                             section = (pts[0], pts[1])
@@ -359,7 +365,8 @@ class ConfigFile(ConfigDict):
             if subsection_name is None:
                 f.write(b"[" + section_name + b"]\n")
             else:
-                f.write(b"[" + section_name + b" \"" + subsection_name + b"\"]\n")
+                f.write(b"[" + section_name +
+                        b" \"" + subsection_name + b"\"]\n")
             for key, value in values.items():
                 if value is True:
                     value = b"true"

+ 1 - 2
dulwich/contrib/paramiko_vendor.py

@@ -34,6 +34,7 @@ import paramiko
 import paramiko.client
 import threading
 
+
 class _ParamikoWrapper(object):
     STDERR_READ_N = 2048  # 2k
 
@@ -116,8 +117,6 @@ class ParamikoSSHVendor(object):
 
     def run_command(self, host, command, username=None, port=None,
                     progress_stderr=None):
-        if not isinstance(command, bytes):
-            raise TypeError(command)
         # Paramiko needs an explicit port. None is not valid
         if port is None:
             port = 22

+ 9 - 6
dulwich/contrib/swift.py

@@ -286,8 +286,8 @@ class SwiftConnector(object):
                                 connection_timeout=self.http_timeout,
                                 network_timeout=self.http_timeout,
                                 headers=token_header)
-        self.base_path = str(
-            posixpath.join(urlparse.urlparse(self.storage_url).path, self.root))
+        self.base_path = str(posixpath.join(
+                urlparse.urlparse(self.storage_url).path, self.root))
 
     def swift_auth_v1(self):
         self.user = self.user.replace(";", ":")
@@ -812,7 +812,8 @@ class SwiftObjectStore(PackBasedObjectStore):
         entries.sort()
         pack_base_name = posixpath.join(
             self.pack_dir,
-            'pack-' + iter_sha1(e[0] for e in entries).decode(sys.getfilesystemencoding()))
+            'pack-' + iter_sha1(e[0] for e in entries).decode(
+                sys.getfilesystemencoding()))
         self.scon.put_object(pack_base_name + '.pack', f)
 
         # Write the index.
@@ -994,7 +995,7 @@ def cmd_daemon(args):
 
     try:
         import gevent
-        import geventhttpclient
+        import geventhttpclient  # noqa: F401
     except ImportError:
         print("gevent and geventhttpclient libraries are mandatory "
               " for use the Swift backend.")
@@ -1036,14 +1037,16 @@ def main(argv=sys.argv):
     }
 
     if len(sys.argv) < 2:
-        print("Usage: %s <%s> [OPTIONS...]" % (sys.argv[0], "|".join(commands.keys())))
+        print("Usage: %s <%s> [OPTIONS...]" % (
+                sys.argv[0], "|".join(commands.keys())))
         sys.exit(1)
 
     cmd = sys.argv[1]
-    if not cmd in commands:
+    if cmd not in commands:
         print("No such subcommand: %s" % cmd)
         sys.exit(1)
     commands[cmd](sys.argv[2:])
 
+
 if __name__ == '__main__':
     main()

+ 17 - 11
dulwich/contrib/test_swift.py

@@ -65,12 +65,12 @@ except ImportError:
 missing_libs = []
 
 try:
-    import gevent
+    import gevent  # noqa:F401
 except ImportError:
     missing_libs.append("gevent")
 
 try:
-    import geventhttpclient
+    import geventhttpclient  # noqa:F401
 except ImportError:
     missing_libs.append("geventhttpclient")
 
@@ -81,7 +81,8 @@ except ImportError:
 
 skipmsg = "Required libraries are not installed (%r)" % missing_libs
 
-skipIfPY3 = skipIf(sys.version_info[0] == 3, "SWIFT module not yet ported to python3.")
+skipIfPY3 = skipIf(sys.version_info[0] == 3,
+                   "SWIFT module not yet ported to python3.")
 
 if not missing_libs:
     from dulwich.contrib import swift
@@ -201,6 +202,7 @@ def create_commits(length=1, marker=b'Default'):
         data.extend([blob, tree, tag, cmt])
     return data
 
+
 @skipIf(missing_libs, skipmsg)
 class FakeSwiftConnector(object):
 
@@ -252,7 +254,7 @@ class FakeSwiftConnector(object):
 
     def get_object_stat(self, name):
         name = posixpath.join(self.root, name)
-        if not name in self.store:
+        if name not in self.store:
             return None
         return {'content-length': len(self.store[name])}
 
@@ -315,7 +317,9 @@ class TestSwiftObjectStore(TestCase):
         head = odata[-1].id
         peeled_sha = dict([(sha.object[1], sha.id)
                            for sha in odata if isinstance(sha, Tag)])
-        get_tagged = lambda: peeled_sha
+
+        def get_tagged():
+            return peeled_sha
         i = sos.iter_shas(sos.find_missing_objects([],
                                                    [head, ],
                                                    progress=None,
@@ -478,9 +482,9 @@ class TestSwiftInfoRefsContainer(TestCase):
 
     def setUp(self):
         super(TestSwiftInfoRefsContainer, self).setUp()
-        content = \
-            b"22effb216e3a82f97da599b8885a6cadb488b4c5\trefs/heads/master\n" + \
-            b"cca703b0e1399008b53a1a236d6b4584737649e4\trefs/heads/dev"
+        content = (
+            b"22effb216e3a82f97da599b8885a6cadb488b4c5\trefs/heads/master\n"
+            b"cca703b0e1399008b53a1a236d6b4584737649e4\trefs/heads/dev")
         self.store = {'fakerepo/info/refs': content}
         self.conf = swift.load_conf(file=StringIO(config_file %
                                                   def_config_file))
@@ -562,9 +566,9 @@ class TestSwiftConnector(TestCase):
 
     def test_create_root(self):
         with patch('dulwich.contrib.swift.SwiftConnector.test_root_exists',
-                lambda *args: None):
+                   lambda *args: None):
             with patch('geventhttpclient.HTTPClient.request',
-                lambda *args: Response()):
+                       lambda *args: Response()):
                 self.assertEqual(self.conn.create_root(), None)
 
     def test_create_root_fails(self):
@@ -616,7 +620,9 @@ class TestSwiftConnector(TestCase):
             self.assertEqual(self.conn.get_object('a').read(), b'content')
         with patch('geventhttpclient.HTTPClient.request',
                    lambda *args, **kwargs: Response(content=b'content')):
-            self.assertEqual(self.conn.get_object('a', range='0-6'), b'content')
+            self.assertEqual(
+                    self.conn.get_object('a', range='0-6'),
+                    b'content')
 
     def test_get_object_fails(self):
         with patch('geventhttpclient.HTTPClient.request',

+ 10 - 8
dulwich/contrib/test_swift_smoke.py

@@ -42,12 +42,14 @@ import gevent
 from gevent import monkey
 monkey.patch_all()
 
-from dulwich import server
-from dulwich import repo
-from dulwich import index
-from dulwich import client
-from dulwich import objects
-from dulwich.contrib import swift
+from dulwich (  # noqa:E402
+    server,
+    repo,
+    index,
+    client,
+    objects,
+    )
+from dulwich.contrib import swift  # noqa:E402
 
 
 class DulwichServer():
@@ -202,7 +204,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
         files = ('testfile', 'testfile2', 'dir/testfile3')
         i = 0
         for f in files:
-            file(os.path.join(self.temp_d, f), 'w').write("DATA %s" % i)
+            open(os.path.join(self.temp_d, f), 'w').write("DATA %s" % i)
             i += 1
         local_repo.stage(files)
         local_repo.do_commit('Test commit', 'fbo@localhost',
@@ -252,7 +254,7 @@ class SwiftRepoSmokeTest(unittest.TestCase):
         files = ('testfile11', 'testfile22', 'test/testfile33')
         i = 0
         for f in files:
-            file(os.path.join(self.temp_d, f), 'w').write("DATA %s" % i)
+            open(os.path.join(self.temp_d, f), 'w').write("DATA %s" % i)
             i += 1
         local_repo.stage(files)
         local_repo.do_commit('Test commit', 'fbo@localhost',

+ 15 - 11
dulwich/diff_tree.py

@@ -173,10 +173,10 @@ def tree_changes(store, tree1_id, tree2_id, want_unchanged=False,
         source and target tree.
     """
     if (rename_detector is not None and tree1_id is not None and
-        tree2_id is not None):
+            tree2_id is not None):
         for change in rename_detector.changes_with_renames(
-            tree1_id, tree2_id, want_unchanged=want_unchanged):
-                yield change
+                tree1_id, tree2_id, want_unchanged=want_unchanged):
+            yield change
         return
 
     entries = walk_trees(store, tree1_id, tree2_id,
@@ -255,8 +255,11 @@ def tree_changes_for_merge(store, parent_tree_ids, tree_id,
                 path = change.new.path
             changes_by_path[path][i] = change
 
-    old_sha = lambda c: c.old.sha
-    change_type = lambda c: c.type
+    def old_sha(c):
+        return c.old.sha
+
+    def change_type(c):
+        return c.type
 
     # Yield only conflicting changes.
     for _, changes in sorted(changes_by_path.items()):
@@ -381,9 +384,9 @@ class RenameDetector(object):
             an add/delete pair to be a rename/copy; see _similarity_score.
         :param max_files: The maximum number of adds and deletes to consider,
             or None for no limit. The detector is guaranteed to compare no more
-            than max_files ** 2 add/delete pairs. This limit is provided because
-            rename detection can be quadratic in the project size. If the limit
-            is exceeded, no content rename detection is attempted.
+            than max_files ** 2 add/delete pairs. This limit is provided
+            because rename detection can be quadratic in the project size. If
+            the limit is exceeded, no content rename detection is attempted.
         :param rewrite_threshold: The threshold similarity score below which a
             modify should be considered a delete/add, or None to not break
             modifies; see _similarity_score.
@@ -404,7 +407,7 @@ class RenameDetector(object):
 
     def _should_split(self, change):
         if (self._rewrite_threshold is None or change.type != CHANGE_MODIFY or
-            change.old.sha == change.new.sha):
+                change.old.sha == change.new.sha):
             return False
         old_obj = self._store[change.old.sha]
         new_obj = self._store[change.new.sha]
@@ -551,7 +554,7 @@ class RenameDetector(object):
             path = add.new.path
             delete = delete_map.get(path)
             if (delete is not None and
-                stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode)):
+                    stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode)):
                 modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new)
 
         self._adds = [a for a in self._adds if a.new.path not in modifies]
@@ -570,7 +573,8 @@ class RenameDetector(object):
     def _prune_unchanged(self):
         if self._want_unchanged:
             return
-        self._deletes = [d for d in self._deletes if d.type != CHANGE_UNCHANGED]
+        self._deletes = [
+            d for d in self._deletes if d.type != CHANGE_UNCHANGED]
 
     def changes_with_renames(self, tree1_id, tree2_id, want_unchanged=False):
         """Iterate TreeChanges between two tree SHAs, with rename detection."""

+ 7 - 6
dulwich/errors.py

@@ -36,11 +36,12 @@ class ChecksumMismatch(Exception):
         self.got = got
         self.extra = extra
         if self.extra is None:
-            Exception.__init__(self,
-                "Checksum mismatch: Expected %s, got %s" % (expected, got))
+            Exception.__init__(
+                self, "Checksum mismatch: Expected %s, got %s" %
+                (expected, got))
         else:
-            Exception.__init__(self,
-                "Checksum mismatch: Expected %s, got %s; %s" %
+            Exception.__init__(
+                self, "Checksum mismatch: Expected %s, got %s; %s" %
                 (expected, got, extra))
 
 
@@ -136,8 +137,8 @@ class HangupException(GitProtocolError):
     """Hangup exception."""
 
     def __init__(self):
-        Exception.__init__(self,
-            "The remote server unexpectedly closed the connection.")
+        Exception.__init__(
+            self, "The remote server unexpectedly closed the connection.")
 
 
 class UnexpectedCommandError(GitProtocolError):

+ 33 - 18
dulwich/fastexport.py

@@ -30,18 +30,20 @@ from dulwich.objects import (
     Blob,
     Commit,
     Tag,
+    ZERO_SHA,
     )
 from fastimport import __version__ as fastimport_version
-if fastimport_version <= (0, 9, 5) and sys.version_info[0] == 3 and sys.version_info[1] < 5:
+if (fastimport_version <= (0, 9, 5) and
+        sys.version_info[0] == 3 and sys.version_info[1] < 5):
     raise ImportError("Older versions of fastimport don't support python3<3.5")
-from fastimport import (
+from fastimport import (  # noqa: E402
     commands,
     errors as fastimport_errors,
     parser,
     processor,
     )
 
-import stat
+import stat  # noqa: E402
 
 
 def split_email(text):
@@ -62,7 +64,7 @@ class GitFastExporter(object):
         self.outf.write(getattr(cmd, "__bytes__", cmd.__repr__)() + b"\n")
 
     def _allocate_marker(self):
-        self._marker_idx+=1
+        self._marker_idx += 1
         return ("%d" % (self._marker_idx,)).encode('ascii')
 
     def _export_blob(self, blob):
@@ -77,7 +79,7 @@ class GitFastExporter(object):
 
     def _iter_files(self, base_tree, new_tree):
         for ((old_path, new_path), (old_mode, new_mode),
-            (old_hexsha, new_hexsha)) in \
+             (old_hexsha, new_hexsha)) in \
                 self.store.tree_changes(base_tree, new_tree):
             if new_path is None:
                 yield commands.FileDeleteCommand(old_path)
@@ -104,7 +106,8 @@ class GitFastExporter(object):
             merges = []
         author, author_email = split_email(commit.author)
         committer, committer_email = split_email(commit.committer)
-        cmd = commands.CommitCommand(ref, marker,
+        cmd = commands.CommitCommand(
+            ref, marker,
             (author, author_email, commit.author_time, commit.author_timezone),
             (committer, committer_email, commit.commit_time,
                 commit.commit_timezone),
@@ -126,7 +129,7 @@ class GitImportProcessor(processor.ImportProcessor):
     def __init__(self, repo, params=None, verbose=False, outf=None):
         processor.ImportProcessor.__init__(self, params, verbose)
         self.repo = repo
-        self.last_commit = None
+        self.last_commit = ZERO_SHA
         self.markers = {}
         self._contents = {}
 
@@ -174,7 +177,8 @@ class GitImportProcessor(processor.ImportProcessor):
                     blob_id = blob.id
                 else:
                     assert filecmd.dataref.startswith(b":"), \
-                        "non-marker refs not supported yet (%r)" % filecmd.dataref
+                           ("non-marker refs not supported yet (%r)" %
+                            filecmd.dataref)
                     blob_id = self.markers[filecmd.dataref[1:]]
                 self._contents[filecmd.path] = (filecmd.mode, blob_id)
             elif filecmd.name == b"filedelete":
@@ -190,12 +194,16 @@ class GitImportProcessor(processor.ImportProcessor):
                 self._contents = {}
             else:
                 raise Exception("Command %s not supported" % filecmd.name)
-        commit.tree = commit_tree(self.repo.object_store,
+        commit.tree = commit_tree(
+            self.repo.object_store,
             ((path, hexsha, mode) for (path, (mode, hexsha)) in
                 self._contents.items()))
-        if self.last_commit is not None:
+        if self.last_commit != ZERO_SHA:
             commit.parents.append(self.last_commit)
-        commit.parents += cmd.merges
+        for merge in cmd.merges:
+            if merge.startswith(b':'):
+                merge = self.markers[merge[1:]]
+            commit.parents.append(merge)
         self.repo.object_store.add_object(commit)
         self.repo[cmd.ref] = commit.id
         self.last_commit = commit.id
@@ -209,17 +217,24 @@ class GitImportProcessor(processor.ImportProcessor):
     def _reset_base(self, commit_id):
         if self.last_commit == commit_id:
             return
-        self.last_commit = commit_id
         self._contents = {}
-        tree_id = self.repo[commit_id].tree
-        for (path, mode, hexsha) in (
-                self.repo.object_store.iter_tree_contents(tree_id)):
-            self._contents[path] = (mode, hexsha)
+        self.last_commit = commit_id
+        if commit_id != ZERO_SHA:
+            tree_id = self.repo[commit_id].tree
+            for (path, mode, hexsha) in (
+                    self.repo.object_store.iter_tree_contents(tree_id)):
+                self._contents[path] = (mode, hexsha)
 
     def reset_handler(self, cmd):
         """Process a ResetCommand."""
-        self._reset_base(cmd.from_)
-        self.repo.refs[cmd.ref] = cmd.from_
+        if cmd.from_ is None:
+            from_ = ZERO_SHA
+        else:
+            from_ = cmd.from_
+            if from_.startswith(b":"):
+                from_ = self.markers[from_[1:]]
+        self._reset_base(from_)
+        self.repo.refs[cmd.ref] = from_
 
     def tag_handler(self, cmd):
         """Process a TagCommand."""

+ 12 - 8
dulwich/file.py

@@ -26,6 +26,7 @@ import os
 import sys
 import tempfile
 
+
 def ensure_dir_exists(dirname):
     """Ensure a directory exists, creating if necessary."""
     try:
@@ -105,10 +106,12 @@ class _GitFile(object):
     PROXY_METHODS = ('__iter__', 'flush', 'fileno', 'isatty', 'read',
                      'readline', 'readlines', 'seek', 'tell',
                      'truncate', 'write', 'writelines')
+
     def __init__(self, filename, mode, bufsize):
         self._filename = filename
         self._lockfilename = '%s.lock' % self._filename
-        fd = os.open(self._lockfilename,
+        fd = os.open(
+            self._lockfilename,
             os.O_RDWR | os.O_CREAT | os.O_EXCL | getattr(os, "O_BINARY", 0))
         self._file = os.fdopen(fd, mode, bufsize)
         self._closed = False
@@ -137,12 +140,12 @@ class _GitFile(object):
         """Close this file, saving the lockfile over the original.
 
         :note: If this method fails, it will attempt to delete the lockfile.
-            However, it is not guaranteed to do so (e.g. if a filesystem becomes
-            suddenly read-only), which will prevent future writes to this file
-            until the lockfile is removed manually.
-        :raises OSError: if the original file could not be overwritten. The lock
-            file is still closed, so further attempts to write to the same file
-            object will raise ValueError.
+            However, it is not guaranteed to do so (e.g. if a filesystem
+            becomes suddenly read-only), which will prevent future writes to
+            this file until the lockfile is removed manually.
+        :raises OSError: if the original file could not be overwritten. The
+            lock file is still closed, so further attempts to write to the same
+            file object will raise ValueError.
         """
         if self._closed:
             return
@@ -152,7 +155,8 @@ class _GitFile(object):
                 os.rename(self._lockfilename, self._filename)
             except OSError as e:
                 if sys.platform == 'win32' and e.errno == errno.EEXIST:
-                    # Windows versions prior to Vista don't support atomic renames
+                    # Windows versions prior to Vista don't support atomic
+                    # renames
                     _fancy_rename(self._lockfilename, self._filename)
                 else:
                     raise

+ 358 - 0
dulwich/ignore.py

@@ -0,0 +1,358 @@
+# Copyright (C) 2017 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Parsing of gitignore files.
+
+For details for the matching rules, see https://git-scm.com/docs/gitignore
+"""
+
+import os.path
+import re
+import sys
+
+
+def _translate_segment(segment):
+    if segment == b"*":
+        return b'[^/]+'
+    res = b""
+    i, n = 0, len(segment)
+    while i < n:
+        c = segment[i:i+1]
+        i = i+1
+        if c == b'*':
+            res += b'[^/]*'
+        elif c == b'?':
+            res += b'.'
+        elif c == b'[':
+            j = i
+            if j < n and segment[j:j+1] == b'!':
+                j = j+1
+            if j < n and segment[j:j+1] == b']':
+                j = j+1
+            while j < n and segment[j:j+1] != b']':
+                j = j+1
+            if j >= n:
+                res += b'\\['
+            else:
+                stuff = segment[i:j].replace(b'\\', b'\\\\')
+                i = j+1
+                if stuff.startswith(b'!'):
+                    stuff = b'^' + stuff[1:]
+                elif stuff.startswith(b'^'):
+                    stuff = b'\\' + stuff
+                res += b'[' + stuff + b']'
+        else:
+            res += re.escape(c)
+    return res
+
+
+def translate(pat):
+    """Translate a shell PATTERN to a regular expression.
+
+    There is no way to quote meta-characters.
+
+    Originally copied from fnmatch in Python 2.7, but modified for Dulwich
+    to cope with features in Git ignore patterns.
+    """
+
+    res = b'(?ms)'
+
+    if b'/' not in pat[:-1]:
+        # If there's no slash, this is a filename-based match
+        res += b'(.*/)?'
+
+    if pat.startswith(b'**/'):
+        # Leading **/
+        pat = pat[2:]
+        res += b'(.*/)?'
+
+    if pat.startswith(b'/'):
+        pat = pat[1:]
+
+    for i, segment in enumerate(pat.split(b'/')):
+        if segment == b'**':
+            res += b'(/.*)?'
+            continue
+        else:
+            res += ((re.escape(b'/') if i > 0 else b'') +
+                    _translate_segment(segment))
+
+    if not pat.endswith(b'/'):
+        res += b'/?'
+
+    return res + b'\Z'
+
+
+def read_ignore_patterns(f):
+    """Read a git ignore file.
+
+    :param f: File-like object to read from
+    :return: List of patterns
+    """
+
+    for l in f:
+        l = l.rstrip(b"\r\n")
+
+        # Ignore blank lines, they're used for readability.
+        if not l:
+            continue
+
+        if l.startswith(b'#'):
+            # Comment
+            continue
+
+        # Trailing spaces are ignored unless they are quoted with a backslash.
+        while l.endswith(b' ') and not l.endswith(b'\\ '):
+            l = l[:-1]
+        l = l.replace(b'\\ ', b' ')
+
+        yield l
+
+
+def match_pattern(path, pattern, ignorecase=False):
+    """Match a gitignore-style pattern against a path.
+
+    :param path: Path to match
+    :param pattern: Pattern to match
+    :param ignorecase: Whether to do case-sensitive matching
+    :return: bool indicating whether the pattern matched
+    """
+    return Pattern(pattern, ignorecase).match(path)
+
+
+class Pattern(object):
+    """A single ignore pattern."""
+
+    def __init__(self, pattern, ignorecase=False):
+        self.pattern = pattern
+        self.ignorecase = ignorecase
+        if pattern[0:1] == b'!':
+            self.is_exclude = False
+            pattern = pattern[1:]
+        else:
+            if pattern[0:1] == b'\\':
+                pattern = pattern[1:]
+            self.is_exclude = True
+        flags = 0
+        if self.ignorecase:
+            flags = re.IGNORECASE
+        self._re = re.compile(translate(pattern), flags)
+
+    def __bytes__(self):
+        return self.pattern
+
+    def __str__(self):
+        return self.pattern.decode(sys.getfilesystemencoding())
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.pattern == other.pattern and
+                self.ignorecase == other.ignorecase)
+
+    def __repr__(self):
+        return "%s(%s, %r)" % (
+            type(self).__name__, self.pattern, self.ignorecase)
+
+    def match(self, path):
+        """Try to match a path against this ignore pattern.
+
+        :param path: Path to match (relative to ignore location)
+        :return: boolean
+        """
+        return bool(self._re.match(path))
+
+
+class IgnoreFilter(object):
+
+    def __init__(self, patterns, ignorecase=False):
+        self._patterns = []
+        self._ignorecase = ignorecase
+        for pattern in patterns:
+            self.append_pattern(pattern)
+
+    def append_pattern(self, pattern):
+        """Add a pattern to the set."""
+        self._patterns.append(Pattern(pattern, self._ignorecase))
+
+    def find_matching(self, path):
+        """Yield all matching patterns for path.
+
+        :param path: Path to match
+        :return: Iterator over  iterators
+        """
+        if not isinstance(path, bytes):
+            path = path.encode(sys.getfilesystemencoding())
+        for pattern in self._patterns:
+            if pattern.match(path):
+                yield pattern
+
+    def is_ignored(self, path):
+        """Check whether a path is ignored.
+
+        For directories, include a trailing slash.
+
+        :return: status is None if file is not mentioned, True if it is
+            included, False if it is explicitly excluded.
+        """
+        status = None
+        for pattern in self.find_matching(path):
+            status = pattern.is_exclude
+        return status
+
+    @classmethod
+    def from_path(cls, path, ignorecase=False):
+        with open(path, 'rb') as f:
+            ret = cls(read_ignore_patterns(f), ignorecase)
+            ret._path = path
+            return ret
+
+    def __repr__(self):
+        if getattr(self, '_path', None) is None:
+            return "<%s>" % (type(self).__name__)
+        else:
+            return "%s.from_path(%r)" % (type(self).__name__, self._path)
+
+
+class IgnoreFilterStack(object):
+    """Check for ignore status in multiple filters."""
+
+    def __init__(self, filters):
+        self._filters = filters
+
+    def is_ignored(self, path):
+        """Check whether a path is explicitly included or excluded in ignores.
+
+        :param path: Path to check
+        :return: None if the file is not mentioned, True if it is included,
+            False if it is explicitly excluded.
+        """
+        status = None
+        for filter in self._filters:
+            status = filter.is_ignored(path)
+            if status is not None:
+                return status
+        return status
+
+
+def default_user_ignore_filter_path(config):
+    """Return default user ignore filter path.
+
+    :param config: A Config object
+    :return: Path to a global ignore file
+    """
+    try:
+        return config.get(('core', ), 'excludesFile')
+    except KeyError:
+        pass
+
+    xdg_config_home = os.environ.get(
+        "XDG_CONFIG_HOME", os.path.expanduser("~/.config/"),
+    )
+    return os.path.join(xdg_config_home, 'git', 'ignore')
+
+
+class IgnoreFilterManager(object):
+    """Ignore file manager."""
+
+    def __init__(self, top_path, global_filters, ignorecase):
+        self._path_filters = {}
+        self._top_path = top_path
+        self._global_filters = global_filters
+        self._ignorecase = ignorecase
+
+    def __repr__(self):
+        return "%s(%s, %r, %r)" % (
+            type(self).__name__, self._top_path,
+            self._global_filters,
+            self._ignorecase)
+
+    def _load_path(self, path):
+        try:
+            return self._path_filters[path]
+        except KeyError:
+            pass
+
+        p = os.path.join(self._top_path, path, '.gitignore')
+        try:
+            self._path_filters[path] = IgnoreFilter.from_path(
+                p, self._ignorecase)
+        except IOError:
+            self._path_filters[path] = None
+        return self._path_filters[path]
+
+    def find_matching(self, path):
+        """Find matching patterns for path.
+
+        Stops after the first ignore file with matches.
+
+        :param path: Path to check
+        :return: Iterator over Pattern instances
+        """
+        if os.path.isabs(path):
+            raise ValueError('%s is an absolute path' % path)
+        filters = [(0, f) for f in self._global_filters]
+        if os.path.sep != '/':
+            path = path.replace(os.path.sep, '/')
+        parts = path.split('/')
+        for i in range(len(parts)+1):
+            dirname = '/'.join(parts[:i])
+            for s, f in filters:
+                relpath = '/'.join(parts[s:i])
+                if i < len(parts):
+                    # Paths leading up to the final part are all directories,
+                    # so need a trailing slash.
+                    relpath += '/'
+                matches = list(f.find_matching(relpath))
+                if matches:
+                    return iter(matches)
+            ignore_filter = self._load_path(dirname)
+            if ignore_filter is not None:
+                filters.insert(0, (i, ignore_filter))
+        return iter([])
+
+    def is_ignored(self, path):
+        """Check whether a path is explicitly included or excluded in ignores.
+
+        :param path: Path to check
+        :return: None if the file is not mentioned, True if it is included,
+            False if it is explicitly excluded.
+        """
+        matches = list(self.find_matching(path))
+        if matches:
+            return matches[-1].is_exclude
+        return None
+
+    @classmethod
+    def from_repo(cls, repo):
+        """Create a IgnoreFilterManager from a repository.
+
+        :param repo: Repository object
+        :return: A `IgnoreFilterManager` object
+        """
+        global_filters = []
+        for p in [
+                os.path.join(repo.controldir(), 'info', 'exclude'),
+                default_user_ignore_filter_path(repo.get_config_stack())]:
+            try:
+                global_filters.append(IgnoreFilter.from_path(p))
+            except IOError:
+                pass
+        config = repo.get_config_stack()
+        ignorecase = config.get_boolean((b'core'), (b'ignorecase'), False)
+        return cls(repo.path, global_filters, ignorecase)

+ 42 - 15
dulwich/index.py

@@ -124,8 +124,10 @@ def write_cache_entry(f, entry):
     (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
     write_cache_time(f, ctime)
     write_cache_time(f, mtime)
-    flags = len(name) | (flags &~ 0x0fff)
-    f.write(struct.pack(b'>LLLLLL20sH', dev & 0xFFFFFFFF, ino & 0xFFFFFFFF, mode, uid, gid, size, hex_to_sha(sha), flags))
+    flags = len(name) | (flags & ~0x0fff)
+    f.write(struct.pack(
+            b'>LLLLLL20sH', dev & 0xFFFFFFFF, ino & 0xFFFFFFFF,
+            mode, uid, gid, size, hex_to_sha(sha), flags))
     f.write(name)
     real_size = ((f.tell() - beginoffset + 8) & ~7)
     f.write(b'\0' * ((beginoffset + real_size) - f.tell()))
@@ -243,7 +245,8 @@ class Index(object):
     def __getitem__(self, name):
         """Retrieve entry by relative path.
 
-        :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
+        :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
+            flags)
         """
         return self._byname[name]
 
@@ -292,13 +295,14 @@ class Index(object):
         :param object_store: Object store to use for retrieving tree contents
         :param tree: SHA1 of the root tree
         :param want_unchanged: Whether unchanged files should be reported
-        :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
+        :return: Iterator over tuples with (oldpath, newpath), (oldmode,
+            newmode), (oldsha, newsha)
         """
         def lookup_entry(path):
             entry = self[path]
             return entry.sha, entry.mode
-        for (name, mode, sha) in changes_from_tree(self._byname.keys(),
-                lookup_entry, object_store, tree,
+        for (name, mode, sha) in changes_from_tree(
+                self._byname.keys(), lookup_entry, object_store, tree,
                 want_unchanged=want_unchanged):
             yield (name, mode, sha)
 
@@ -363,7 +367,7 @@ def commit_index(object_store, index):
 
 
 def changes_from_tree(names, lookup_entry, object_store, tree,
-        want_unchanged=False):
+                      want_unchanged=False):
     """Find the differences between the contents of a tree and
     a working copy.
 
@@ -435,6 +439,12 @@ def build_file_from_blob(blob, mode, target_path, honor_filemode=True):
         # FIXME: This will fail on Windows. What should we do instead?
         if oldstat:
             os.unlink(target_path)
+        if sys.platform == 'win32' and sys.version_info[0] == 3:
+            # os.readlink on Python3 on Windows requires a unicode string.
+            # TODO(jelmer): Don't assume tree_encoding == fs_encoding
+            tree_encoding = sys.getfilesystemencoding()
+            contents = contents.decode(tree_encoding)
+            target_path = target_path.decode(tree_encoding)
         os.symlink(contents, target_path)
     else:
         if oldstat is not None and oldstat.st_size == len(contents):
@@ -489,8 +499,8 @@ def build_index_from_tree(root_path, index_path, object_store, tree_id,
     :param object_store: Non-empty object store holding tree contents
     :param honor_filemode: An optional flag to honor core.filemode setting in
         config file, default is core.filemode=True, change executable bit
-    :param validate_path_element: Function to validate path elements to check out;
-        default just refuses .git and .. directories.
+    :param validate_path_element: Function to validate path elements to check
+        out; default just refuses .git and .. directories.
 
     :note:: existing index is wiped and contents are not merged
         in a working dir. Suitable only for fresh clones.
@@ -516,8 +526,8 @@ def build_index_from_tree(root_path, index_path, object_store, tree_id,
             # TODO(jelmer): record and return submodule paths
         else:
             obj = object_store[entry.sha]
-            st = build_file_from_blob(obj, entry.mode, full_path,
-                honor_filemode=honor_filemode)
+            st = build_file_from_blob(
+                obj, entry.mode, full_path, honor_filemode=honor_filemode)
         # Add file to index
         if not honor_filemode or S_ISGITLINK(entry.mode):
             # we can not use tuple slicing to build a new tuple,
@@ -545,7 +555,14 @@ def blob_from_path_and_stat(fs_path, st):
         with open(fs_path, 'rb') as f:
             blob.data = f.read()
     else:
-        blob.data = os.readlink(fs_path)
+        if sys.platform == 'win32' and sys.version_info[0] == 3:
+            # os.readlink on Python3 on Windows requires a unicode string.
+            # TODO(jelmer): Don't assume tree_encoding == fs_encoding
+            tree_encoding = sys.getfilesystemencoding()
+            fs_path = fs_path.decode(tree_encoding)
+            blob.data = os.readlink(fs_path).encode(tree_encoding)
+        else:
+            blob.data = os.readlink(fs_path)
     return blob
 
 
@@ -562,7 +579,6 @@ def get_unstaged_changes(index, root_path):
 
     for tree_path, entry in index.iteritems():
         full_path = _tree_to_fs_path(root_path, tree_path)
-        # TODO(jelmer): handle S_ISGITLINK(entry.mode) here
         try:
             blob = blob_from_path_and_stat(full_path, os.lstat(full_path))
         except OSError as e:
@@ -574,8 +590,19 @@ def get_unstaged_changes(index, root_path):
         except IOError as e:
             if e.errno != errno.EISDIR:
                 raise
-            # The file was changed to a directory, so consider it removed.
-            yield tree_path
+            # This is actually a directory
+            if os.path.exists(os.path.join(tree_path, '.git')):
+                # Submodule
+                from dulwich.errors import NotGitRepository
+                from dulwich.repo import Repo
+                try:
+                    if entry.sha != Repo(tree_path).head():
+                        yield tree_path
+                except NotGitRepository:
+                    yield tree_path
+            else:
+                # The file was changed to a directory, so consider it removed.
+                yield tree_path
         else:
             if blob.id != entry.sha:
                 yield tree_path

+ 2 - 1
dulwich/log_utils.py

@@ -31,7 +31,8 @@ http://docs.python.org/library/logging.html#configuring-logging-for-a-library
 
 For many modules, the only function from the logging module they need is
 getLogger; this module exports that function for convenience. If a calling
-module needs something else, it can import the standard logging module directly.
+module needs something else, it can import the standard logging module
+directly.
 """
 
 import logging

+ 44 - 32
dulwich/object_store.py

@@ -22,7 +22,6 @@
 
 """Git object store interfaces and implementation."""
 
-
 from io import BytesIO
 import errno
 from itertools import chain
@@ -30,6 +29,7 @@ import os
 import stat
 import sys
 import tempfile
+import time
 
 from dulwich.diff_tree import (
     tree_changes,
@@ -74,8 +74,8 @@ class BaseObjectStore(object):
 
     def determine_wants_all(self, refs):
         return [sha for (ref, sha) in refs.items()
-                if not sha in self and not ref.endswith(b"^{}") and
-                   not sha == ZERO_SHA]
+                if sha not in self and not ref.endswith(b"^{}") and
+                not sha == ZERO_SHA]
 
     def iter_shas(self, shas):
         """Iterate over the objects for the specified shas.
@@ -173,12 +173,14 @@ class BaseObjectStore(object):
         :param wants: Iterable over SHAs of objects to fetch.
         :param progress: Simple progress function that will be called with
             updated progress strings.
-        :param get_tagged: Function that returns a dict of pointed-to sha -> tag
-            sha for including tags.
-        :param get_parents: Optional function for getting the parents of a commit.
+        :param get_tagged: Function that returns a dict of pointed-to sha ->
+            tag sha for including tags.
+        :param get_parents: Optional function for getting the parents of a
+            commit.
         :return: Iterator over (sha, path) pairs.
         """
-        finder = MissingObjectFinder(self, haves, wants, progress, get_tagged, get_parents=get_parents)
+        finder = MissingObjectFinder(self, haves, wants, progress, get_tagged,
+                                     get_parents=get_parents)
         return iter(finder.next, None)
 
     def find_common_revisions(self, graphwalker):
@@ -210,8 +212,8 @@ class BaseObjectStore(object):
 
         :param sha: The object SHA to peel.
         :return: The fully-peeled SHA1 of a tag object, after peeling all
-            intermediate tags; if the original ref does not point to a tag, this
-            will equal the original SHA1.
+            intermediate tags; if the original ref does not point to a tag,
+            this will equal the original SHA1.
         """
         obj = self[sha]
         obj_class = object_class(obj.type_name)
@@ -227,7 +229,8 @@ class BaseObjectStore(object):
         :param heads: commits to start from
         :param common: commits to end at, or empty set to walk repository
             completely
-        :param get_parents: Optional function for getting the parents of a commit.
+        :param get_parents: Optional function for getting the parents of a
+            commit.
         :return: a tuple (A, B) where A - all commits reachable
             from heads but not present in common, B - common (shared) elements
             that are directly reachable from heads
@@ -338,7 +341,8 @@ class PackBasedObjectStore(BaseObjectStore):
 
     def __iter__(self):
         """Iterate over the SHAs that are present in this store."""
-        iterables = list(self.packs) + [self._iter_loose_objects()] + [self._iter_alternate_objects()]
+        iterables = (list(self.packs) + [self._iter_loose_objects()] +
+                     [self._iter_alternate_objects()])
         return chain(*iterables)
 
     def contains_loose(self, sha):
@@ -428,8 +432,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
     def _read_alternate_paths(self):
         try:
-            f = GitFile(os.path.join(self.path, INFODIR, "alternates"),
-                    'rb')
+            f = GitFile(os.path.join(self.path, INFODIR, "alternates"), 'rb')
         except (OSError, IOError) as e:
             if e.errno == errno.ENOENT:
                 return
@@ -442,7 +445,8 @@ class DiskObjectStore(PackBasedObjectStore):
                 if os.path.isabs(l):
                     yield l.decode(sys.getfilesystemencoding())
                 else:
-                    yield os.path.join(self.path, l).decode(sys.getfilesystemencoding())
+                    yield os.path.join(self.path, l).decode(
+                        sys.getfilesystemencoding())
 
     def add_alternate_path(self, path):
         """Add an alternate path to this object store.
@@ -477,12 +481,13 @@ class DiskObjectStore(PackBasedObjectStore):
                 self.close()
                 return
             raise
-        self._pack_cache_time = os.stat(self.pack_dir).st_mtime
+        self._pack_cache_time = max(
+                os.stat(self.pack_dir).st_mtime, time.time())
         pack_files = set()
         for name in pack_dir_contents:
-            assert isinstance(name, basestring if sys.version_info[0] == 2 else str)
             if name.startswith("pack-") and name.endswith(".pack"):
-                # verify that idx exists first (otherwise the pack was not yet fully written)
+                # verify that idx exists first (otherwise the pack was not yet
+                # fully written)
                 idx_name = os.path.splitext(name)[0] + ".idx"
                 if idx_name in pack_dir_contents:
                     pack_name = name[:-len(".pack")]
@@ -498,7 +503,7 @@ class DiskObjectStore(PackBasedObjectStore):
 
     def _pack_cache_stale(self):
         try:
-            return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
+            return os.stat(self.pack_dir).st_mtime >= self._pack_cache_time
         except OSError as e:
             if e.errno == errno.ENOENT:
                 return True
@@ -599,12 +604,12 @@ class DiskObjectStore(PackBasedObjectStore):
     def add_thin_pack(self, read_all, read_some):
         """Add a new thin pack to this object store.
 
-        Thin packs are packs that contain deltas with parents that exist outside
-        the pack. They should never be placed in the object store directly, and
-        always indexed and completed as they are copied.
+        Thin packs are packs that contain deltas with parents that exist
+        outside the pack. They should never be placed in the object store
+        directly, and always indexed and completed as they are copied.
 
-        :param read_all: Read function that blocks until the number of requested
-            bytes are read.
+        :param read_all: Read function that blocks until the number of
+            requested bytes are read.
         :param read_some: Read function that returns at least one byte, but may
             not return the number of bytes requested.
         :return: A Pack object pointing at the now-completed thin pack in the
@@ -645,6 +650,7 @@ class DiskObjectStore(PackBasedObjectStore):
         """
         fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
         f = os.fdopen(fd, 'wb')
+
         def commit():
             os.fsync(fd)
             f.close()
@@ -653,6 +659,7 @@ class DiskObjectStore(PackBasedObjectStore):
             else:
                 os.remove(path)
                 return None
+
         def abort():
             f.close()
             os.remove(path)
@@ -671,7 +678,7 @@ class DiskObjectStore(PackBasedObjectStore):
             if e.errno != errno.EEXIST:
                 raise
         if os.path.exists(path):
-            return # Already there, no need to write again
+            return  # Already there, no need to write again
         with GitFile(path, 'wb') as f:
             f.write(obj.as_legacy_object())
 
@@ -759,11 +766,13 @@ class MemoryObjectStore(BaseObjectStore):
             call when the pack is finished.
         """
         f = BytesIO()
+
         def commit():
             p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
             f.close()
             for obj in PackInflater.for_pack_data(p, self.get_raw):
                 self.add_object(obj)
+
         def abort():
             pass
         return f, commit, abort
@@ -794,19 +803,20 @@ class MemoryObjectStore(BaseObjectStore):
     def add_thin_pack(self, read_all, read_some):
         """Add a new thin pack to this object store.
 
-        Thin packs are packs that contain deltas with parents that exist outside
-        the pack. Because this object store doesn't support packs, we extract
-        and add the individual objects.
+        Thin packs are packs that contain deltas with parents that exist
+        outside the pack. Because this object store doesn't support packs, we
+        extract and add the individual objects.
 
-        :param read_all: Read function that blocks until the number of requested
-            bytes are read.
+        :param read_all: Read function that blocks until the number of
+            requested bytes are read.
         :param read_some: Read function that returns at least one byte, but may
             not return the number of bytes requested.
         """
         f, commit, abort = self.add_pack()
         try:
             indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
-            copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)
+            copier = PackStreamCopier(read_all, read_some, f,
+                                      delta_iter=indexer)
             copier.verify()
             self._complete_thin_pack(f, indexer)
         except:
@@ -1054,7 +1064,8 @@ class MissingObjectFinder(object):
         if sha in self._tagged:
             self.add_todo([(self._tagged[sha], None, True)])
         self.sha_done.add(sha)
-        self.progress(("counting objects: %d\r" % len(self.sha_done)).encode('ascii'))
+        self.progress(("counting objects: %d\r" %
+                       len(self.sha_done)).encode('ascii'))
         return (sha, name)
 
     __next__ = next
@@ -1109,7 +1120,8 @@ class ObjectStoreGraphWalker(object):
             ret = self.heads.pop()
             ps = self.get_parents(ret)
             self.parents[ret] = ps
-            self.heads.update([p for p in ps if not p in self.parents])
+            self.heads.update(
+                [p for p in ps if p not in self.parents])
             return ret
         return None
 

+ 64 - 41
dulwich/objects.py

@@ -136,7 +136,8 @@ def filename_to_hex(filename):
 
 def object_header(num_type, length):
     """Return an object header for the given numeric type and text length."""
-    return object_class(num_type).type_name + b' ' + str(length).encode('ascii') + b'\0'
+    return (object_class(num_type).type_name +
+            b' ' + str(length).encode('ascii') + b'\0')
 
 
 def serializable_property(name, docstring=None):
@@ -145,6 +146,7 @@ def serializable_property(name, docstring=None):
     def set(obj, value):
         setattr(obj, "_"+name, value)
         obj._needs_serialization = True
+
     def get(obj):
         return getattr(obj, "_"+name)
     return property(get, set, doc=docstring)
@@ -182,9 +184,9 @@ def check_identity(identity, error_msg):
     email_start = identity.find(b'<')
     email_end = identity.find(b'>')
     if (email_start < 0 or email_end < 0 or email_end <= email_start
-        or identity.find(b'<', email_start + 1) >= 0
-        or identity.find(b'>', email_end + 1) >= 0
-        or not identity.endswith(b'>')):
+            or identity.find(b'<', email_start + 1) >= 0
+            or identity.find(b'>', email_end + 1) >= 0
+            or not identity.endswith(b'>')):
         raise ObjectFormatException(error_msg)
 
 
@@ -514,7 +516,7 @@ class ShaFile(object):
     def __cmp__(self, other):
         if not isinstance(other, ShaFile):
             raise TypeError
-        return cmp(self.id, other.id)
+        return cmp(self.id, other.id)  # noqa: F821
 
 
 class Blob(ShaFile):
@@ -551,7 +553,8 @@ class Blob(ShaFile):
     def _deserialize(self, chunks):
         self._chunked_text = chunks
 
-    chunked = property(_get_chunked, _set_chunked,
+    chunked = property(
+        _get_chunked, _set_chunked,
         "The text within the blob object, as chunks (not necessarily lines).")
 
     @classmethod
@@ -713,10 +716,12 @@ class Tag(ShaFile):
                 chunks.append(git_line(_TAGGER_HEADER, self._tagger))
             else:
                 chunks.append(git_line(
-                    _TAGGER_HEADER, self._tagger, str(self._tag_time).encode('ascii'),
-                    format_timezone(self._tag_timezone, self._tag_timezone_neg_utc)))
+                    _TAGGER_HEADER, self._tagger,
+                    str(self._tag_time).encode('ascii'),
+                    format_timezone(
+                        self._tag_timezone, self._tag_timezone_neg_utc)))
         if self._message is not None:
-            chunks.append(b'\n') # To close headers
+            chunks.append(b'\n')  # To close headers
             chunks.append(self._message)
         return chunks
 
@@ -747,10 +752,11 @@ class Tag(ShaFile):
                 else:
                     self._tagger = value[0:sep+1]
                     try:
-                        (timetext, timezonetext) = value[sep+2:].rsplit(b' ', 1)
+                        (timetext, timezonetext) = (
+                                value[sep+2:].rsplit(b' ', 1))
                         self._tag_time = int(timetext)
-                        self._tag_timezone, self._tag_timezone_neg_utc = \
-                                parse_timezone(timezonetext)
+                        self._tag_timezone, self._tag_timezone_neg_utc = (
+                                parse_timezone(timezonetext))
                     except ValueError as e:
                         raise ObjectFormatException(e)
             elif field is None:
@@ -772,15 +778,18 @@ class Tag(ShaFile):
     object = property(_get_object, _set_object)
 
     name = serializable_property("name", "The name of this tag")
-    tagger = serializable_property("tagger",
-        "Returns the name of the person who created this tag")
-    tag_time = serializable_property("tag_time",
-        "The creation timestamp of the tag.  As the number of seconds "
-        "since the epoch")
-    tag_timezone = serializable_property("tag_timezone",
-        "The timezone that tag_time is in.")
+    tagger = serializable_property(
+            "tagger",
+            "Returns the name of the person who created this tag")
+    tag_time = serializable_property(
+            "tag_time",
+            "The creation timestamp of the tag.  As the number of seconds "
+            "since the epoch")
+    tag_timezone = serializable_property(
+            "tag_timezone",
+            "The timezone that tag_time is in.")
     message = serializable_property(
-        "message", "The message attached to this tag")
+            "message", "The message attached to this tag")
 
 
 class TreeEntry(namedtuple('TreeEntry', ['path', 'mode', 'sha'])):
@@ -828,7 +837,8 @@ def serialize_tree(items):
     :return: Serialized tree text as chunks
     """
     for name, mode, hexsha in items:
-        yield ("%04o" % mode).encode('ascii') + b' ' + name + b'\0' + hex_to_sha(hexsha)
+        yield (("%04o" % mode).encode('ascii') + b' ' + name +
+               b'\0' + hex_to_sha(hexsha))
 
 
 def sorted_tree_items(entries, name_order):
@@ -969,7 +979,8 @@ class Tree(ShaFile):
         except ValueError as e:
             raise ObjectFormatException(e)
         # TODO: list comprehension is for efficiency in the common (small)
-        # case; if memory efficiency in the large case is a concern, use a genexp.
+        # case; if memory efficiency in the large case is a concern, use a
+        # genexp.
         self._entries = dict([(n, (m, s)) for n, m, s in parsed_entries])
 
     def check(self):
@@ -1069,7 +1080,8 @@ def format_timezone(offset, unnecessary_negative_timezone=False):
         offset = -offset
     else:
         sign = '+'
-    return ('%c%02d%02d' % (sign, offset / 3600, (offset / 60) % 60)).encode('ascii')
+    return ('%c%02d%02d' %
+            (sign, offset / 3600, (offset / 60) % 60)).encode('ascii')
 
 
 def parse_commit(chunks):
@@ -1102,7 +1114,8 @@ def parse_commit(chunks):
         elif field == _COMMITTER_HEADER:
             committer, timetext, timezonetext = value.rsplit(b' ', 2)
             commit_time = int(timetext)
-            commit_info = (committer, commit_time, parse_timezone(timezonetext))
+            commit_info = (
+                    committer, commit_time, parse_timezone(timezonetext))
         elif field == _ENCODING_HEADER:
             encoding = value
         elif field == _MERGETAG_HEADER:
@@ -1148,12 +1161,12 @@ class Commit(ShaFile):
 
     def _deserialize(self, chunks):
         (self._tree, self._parents, author_info, commit_info, self._encoding,
-                self._mergetag, self._gpgsig, self._message, self._extra) = (
+         self._mergetag, self._gpgsig, self._message, self._extra) = (
                         parse_commit(chunks))
-        (self._author, self._author_time, (self._author_timezone,
-             self._author_timezone_neg_utc)) = author_info
-        (self._committer, self._commit_time, (self._commit_timezone,
-             self._commit_timezone_neg_utc)) = commit_info
+        (self._author, self._author_time,
+         (self._author_timezone, self._author_timezone_neg_utc)) = author_info
+        (self._committer, self._commit_time,
+         (self._commit_timezone, self._commit_timezone_neg_utc)) = commit_info
 
     def check(self):
         """Check this object for internal consistency.
@@ -1193,16 +1206,19 @@ class Commit(ShaFile):
 
     def _serialize(self):
         chunks = []
-        tree_bytes = self._tree.id if isinstance(self._tree, Tree) else self._tree
+        tree_bytes = (
+                self._tree.id if isinstance(self._tree, Tree) else self._tree)
         chunks.append(git_line(_TREE_HEADER, tree_bytes))
         for p in self._parents:
             chunks.append(git_line(_PARENT_HEADER, p))
         chunks.append(git_line(
-            _AUTHOR_HEADER, self._author, str(self._author_time).encode('ascii'),
-            format_timezone(self._author_timezone,
-                            self._author_timezone_neg_utc)))
+            _AUTHOR_HEADER, self._author,
+            str(self._author_time).encode('ascii'),
+            format_timezone(
+                    self._author_timezone, self._author_timezone_neg_utc)))
         chunks.append(git_line(
-            _COMMITTER_HEADER, self._committer, str(self._commit_time).encode('ascii'),
+            _COMMITTER_HEADER, self._committer,
+            str(self._commit_time).encode('ascii'),
             format_timezone(self._commit_timezone,
                             self._commit_timezone_neg_utc)))
         if self.encoding:
@@ -1251,28 +1267,35 @@ class Commit(ShaFile):
         """Return extra settings of this commit."""
         return self._extra
 
-    extra = property(_get_extra,
+    extra = property(
+        _get_extra,
         doc="Extra header fields not understood (presumably added in a "
             "newer version of git). Kept verbatim so the object can "
             "be correctly reserialized. For private commit metadata, use "
             "pseudo-headers in Commit.message, rather than this field.")
 
-    author = serializable_property("author",
+    author = serializable_property(
+        "author",
         "The name of the author of the commit")
 
-    committer = serializable_property("committer",
+    committer = serializable_property(
+        "committer",
         "The name of the committer of the commit")
 
     message = serializable_property(
         "message", "The commit message")
 
-    commit_time = serializable_property("commit_time",
-        "The timestamp of the commit. As the number of seconds since the epoch.")
+    commit_time = serializable_property(
+        "commit_time",
+        "The timestamp of the commit. As the number of seconds since the "
+        "epoch.")
 
-    commit_timezone = serializable_property("commit_timezone",
+    commit_timezone = serializable_property(
+        "commit_timezone",
         "The zone the commit time is in")
 
-    author_time = serializable_property("author_time",
+    author_time = serializable_property(
+        "author_time",
         "The timestamp the commit was written. As the number of "
         "seconds since the epoch.")
 

+ 17 - 2
dulwich/objectspec.py

@@ -39,6 +39,21 @@ def parse_object(repo, objectish):
     return repo[objectish]
 
 
+def parse_tree(repo, treeish):
+    """Parse a string referring to a tree.
+
+    :param repo: A `Repo` object
+    :param treeish: A string referring to a tree
+    :return: A git object
+    :raise KeyError: If the object can not be found
+    """
+    treeish = to_bytes(treeish)
+    o = repo[treeish]
+    if o.type_name == b"commit":
+        return repo[o.tree]
+    return o
+
+
 def parse_ref(container, refspec):
     """Parse a string referring to a reference.
 
@@ -93,7 +108,7 @@ def parse_reftuple(lh_container, rh_container, refspec):
             rh = parse_ref(rh_container, rh)
         except KeyError:
             # TODO: check force?
-            if not b"/" in rh:
+            if b"/" not in rh:
                 rh = b"refs/heads/" + rh
     return (lh, rh, force)
 
@@ -157,7 +172,7 @@ def parse_commit(repo, committish):
     :raise ValueError: If the range can not be parsed
     """
     committish = to_bytes(committish)
-    return repo[committish] # For now..
+    return repo[committish]  # For now..
 
 
 # TODO: parse_path_in_tree(), which handles e.g. v1.0:Documentation

+ 57 - 53
dulwich/pack.py

@@ -53,6 +53,14 @@ except ImportError:
 import os
 import sys
 
+from hashlib import sha1
+from os import (
+    SEEK_CUR,
+    SEEK_END,
+    )
+from struct import unpack_from
+import zlib
+
 try:
     import mmap
 except ImportError:
@@ -64,23 +72,15 @@ else:
 if sys.platform == 'Plan9':
     has_mmap = False
 
-from hashlib import sha1
-from os import (
-    SEEK_CUR,
-    SEEK_END,
-    )
-from struct import unpack_from
-import zlib
-
-from dulwich.errors import (
+from dulwich.errors import (  # noqa: E402
     ApplyDeltaError,
     ChecksumMismatch,
     )
-from dulwich.file import GitFile
-from dulwich.lru_cache import (
+from dulwich.file import GitFile  # noqa: E402
+from dulwich.lru_cache import (  # noqa: E402
     LRUSizeCache,
     )
-from dulwich.objects import (
+from dulwich.objects import (  # noqa: E402
     ShaFile,
     hex_to_sha,
     sha_to_hex,
@@ -309,8 +309,8 @@ def load_pack_index_file(path, f):
     if contents[:4] == b'\377tOc':
         version = struct.unpack(b'>L', contents[4:8])[0]
         if version == 2:
-            return PackIndex2(path, file=f, contents=contents,
-                size=size)
+            return PackIndex2(
+                path, file=f, contents=contents, size=size)
         else:
             raise KeyError('Unknown pack index format %d' % version)
     else:
@@ -451,7 +451,8 @@ class FilePackIndex(PackIndex):
     is the end of the group that shares the same starting byte. Subtract one
     from the starting byte and index again to find the start of the group.
     The values are sorted by sha id within the group, so do the math to find
-    the start and end offset and then bisect in to find if the value is present.
+    the start and end offset and then bisect in to find if the value is
+    present.
     """
 
     def __init__(self, filename, file=None, contents=None, size=None):
@@ -475,7 +476,7 @@ class FilePackIndex(PackIndex):
     def __eq__(self, other):
         # Quick optimization:
         if (isinstance(other, FilePackIndex) and
-            self._fan_out_table != other._fan_out_table):
+                self._fan_out_table != other._fan_out_table):
             return False
 
         return super(FilePackIndex, self).__eq__(other)
@@ -506,7 +507,8 @@ class FilePackIndex(PackIndex):
         raise NotImplementedError(self._unpack_offset)
 
     def _unpack_crc32_checksum(self, i):
-        """Unpack the crc32 checksum for the i-th object from the index file."""
+        """Unpack the crc32 checksum for the ith object from the index file.
+        """
         raise NotImplementedError(self._unpack_crc32_checksum)
 
     def _itersha(self):
@@ -525,7 +527,8 @@ class FilePackIndex(PackIndex):
     def _read_fan_out_table(self, start_offset):
         ret = []
         for i in range(0x100):
-            fanout_entry = self._contents[start_offset+i*4:start_offset+(i+1)*4]
+            fanout_entry = self._contents[
+                start_offset+i*4:start_offset+(i+1)*4]
             ret.append(struct.unpack('>L', fanout_entry)[0])
         return ret
 
@@ -616,8 +619,8 @@ class PackIndex2(FilePackIndex):
         self._crc32_table_offset = self._name_table_offset + 20 * len(self)
         self._pack_offset_table_offset = (self._crc32_table_offset +
                                           4 * len(self))
-        self._pack_offset_largetable_offset = (self._pack_offset_table_offset +
-                                          4 * len(self))
+        self._pack_offset_largetable_offset = (
+            self._pack_offset_table_offset + 4 * len(self))
 
     def _unpack_entry(self, i):
         return (self._unpack_name(i), self._unpack_offset(i),
@@ -631,21 +634,23 @@ class PackIndex2(FilePackIndex):
         offset = self._pack_offset_table_offset + i * 4
         offset = unpack_from('>L', self._contents, offset)[0]
         if offset & (2**31):
-            offset = self._pack_offset_largetable_offset + (offset&(2**31-1)) * 8
+            offset = (
+                self._pack_offset_largetable_offset +
+                (offset & (2 ** 31 - 1)) * 8)
             offset = unpack_from('>Q', self._contents, offset)[0]
         return offset
 
     def _unpack_crc32_checksum(self, i):
         return unpack_from('>L', self._contents,
-                          self._crc32_table_offset + i * 4)[0]
+                           self._crc32_table_offset + i * 4)[0]
 
 
 def read_pack_header(read):
     """Read the header of a pack file.
 
     :param read: Read function
-    :return: Tuple of (pack version, number of objects). If no data is available
-        to read, returns (None, None).
+    :return: Tuple of (pack version, number of objects). If no data is
+        available to read, returns (None, None).
     """
     header = read(12)
     if not header:
@@ -779,7 +784,8 @@ class PackStreamReader(object):
         else:
             to_pop = max(n + tn - 20, 0)
             to_add = n
-        self.sha.update(bytes(bytearray([self._trailer.popleft() for _ in range(to_pop)])))
+        self.sha.update(
+            bytes(bytearray([self._trailer.popleft() for _ in range(to_pop)])))
         self._trailer.extend(data[-to_add:])
 
         # hash everything but the trailer
@@ -880,8 +886,8 @@ class PackStreamCopier(PackStreamReader):
     def __init__(self, read_all, read_some, outfile, delta_iter=None):
         """Initialize the copier.
 
-        :param read_all: Read function that blocks until the number of requested
-            bytes are read.
+        :param read_all: Read function that blocks until the number of
+            requested bytes are read.
         :param read_some: Read function that returns at least one byte, but may
             not return the number of bytes requested.
         :param outfile: File-like object to write output through.
@@ -924,7 +930,7 @@ def obj_sha(type, chunks):
     return sha.digest()
 
 
-def compute_file_sha(f, start_ofs=0, end_ofs=0, buffer_size=1<<16):
+def compute_file_sha(f, start_ofs=0, end_ofs=0, buffer_size=1 << 16):
     """Hash a portion of a file into a new SHA.
 
     :param f: A file-like object to read from that supports seek().
@@ -981,8 +987,8 @@ class PackData(object):
     def __init__(self, filename, file=None, size=None):
         """Create a PackData object representing the pack in the given filename.
 
-        The file must exist and stay readable until the object is disposed of. It
-        must also stay the same size. It will be mapped whenever needed.
+        The file must exist and stay readable until the object is disposed of.
+        It must also stay the same size. It will be mapped whenever needed.
 
         Currently there is a restriction on the size of the pack as the python
         mmap implementation is flawed.
@@ -995,8 +1001,8 @@ class PackData(object):
         else:
             self._file = file
         (version, self._num_objects) = read_pack_header(self._file.read)
-        self._offset_cache = LRUSizeCache(1024*1024*20,
-            compute_size=_compute_object_size)
+        self._offset_cache = LRUSizeCache(
+            1024*1024*20, compute_size=_compute_object_size)
         self.pack = None
 
     @property
@@ -1076,12 +1082,6 @@ class PackData(object):
             if base_type == OFS_DELTA:
                 (delta_offset, delta) = base_obj
                 # TODO: clean up asserts and replace with nicer error messages
-                assert (
-                    isinstance(base_offset, int)
-                    or isinstance(base_offset, long))
-                assert (
-                    isinstance(delta_offset, int)
-                    or isinstance(base_offset, long))
                 base_offset = base_offset - delta_offset
                 base_type, base_obj = self.get_object_at(base_offset)
                 assert isinstance(base_type, int)
@@ -1116,7 +1116,8 @@ class PackData(object):
                 progress(i, self._num_objects)
             yield (offset, unpacked.pack_type_num, unpacked._obj(),
                    unpacked.crc32)
-            self._file.seek(-len(unused), SEEK_CUR)  # Back up over unused data.
+            # Back up over unused data.
+            self._file.seek(-len(unused), SEEK_CUR)
 
     def _iter_unpacked(self):
         # TODO(dborowitz): Merge this with iterobjects, if we can change its
@@ -1132,7 +1133,8 @@ class PackData(object):
               self._file.read, compute_crc32=False)
             unpacked.offset = offset
             yield unpacked
-            self._file.seek(-len(unused), SEEK_CUR)  # Back up over unused data.
+            # Back up over unused data.
+            self._file.seek(-len(unused), SEEK_CUR)
 
     def iterentries(self, progress=None):
         """Yield entries summarizing the contents of this pack.
@@ -1305,9 +1307,9 @@ class DeltaChainIterator(object):
             try:
                 type_num, chunks = self._resolve_ext_ref(base_sha)
             except KeyError:
-                # Not an external ref, but may depend on one. Either it will get
-                # popped via a _follow_chain call, or we will raise an error
-                # below.
+                # Not an external ref, but may depend on one. Either it will
+                # get popped via a _follow_chain call, or we will raise an
+                # error below.
                 continue
             self._ext_refs.append(base_sha)
             self._pending_ref.pop(base_sha)
@@ -1373,7 +1375,7 @@ class PackInflater(DeltaChainIterator):
 
 
 class SHA1Reader(object):
-    """Wrapper around a file-like object that remembers the SHA1 of its data."""
+    """Wrapper for file-like object that remembers the SHA1 of its data."""
 
     def __init__(self, f):
         self.f = f
@@ -1397,7 +1399,7 @@ class SHA1Reader(object):
 
 
 class SHA1Writer(object):
-    """Wrapper around a file-like object that remembers the SHA1 of its data."""
+    """Wrapper for file-like object that remembers the SHA1 of its data."""
 
     def __init__(self, f):
         self.f = f
@@ -1492,8 +1494,8 @@ def write_pack(filename, objects, deltify=None, delta_window_size=None):
     :return: Tuple with checksum of pack file and index file
     """
     with GitFile(filename + '.pack', 'wb') as f:
-        entries, data_sum = write_pack_objects(f, objects,
-            delta_window_size=delta_window_size, deltify=deltify)
+        entries, data_sum = write_pack_objects(
+            f, objects, delta_window_size=delta_window_size, deltify=deltify)
     entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
     with GitFile(filename + '.idx', 'wb') as f:
         return data_sum, write_pack_index_v2(f, entries, data_sum)
@@ -1634,6 +1636,7 @@ def _delta_encode_size(size):
 # 24-bit lengths in copy operations, but we always make version 2 packs.
 _MAX_COPY_LEN = 0xffff
 
+
 def _encode_copy_operation(start, length):
     scratch = []
     op = 0x80
@@ -1664,7 +1667,7 @@ def create_delta(base_buf, target_buf):
     seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)
     for opcode, i1, i2, j1, j2 in seq.get_opcodes():
         # Git patch opcodes don't care about deletes!
-        #if opcode == 'replace' or opcode == 'delete':
+        # if opcode == 'replace' or opcode == 'delete':
         #    pass
         if opcode == 'equal':
             # If they are equal, unpacker will use data from base_buf
@@ -1704,6 +1707,7 @@ def apply_delta(src_buf, delta):
     out = []
     index = 0
     delta_length = len(delta)
+
     def get_delta_header_size(delta, index):
         size = 0
         i = 0
@@ -1738,8 +1742,8 @@ def apply_delta(src_buf, delta):
             if cp_size == 0:
                 cp_size = 0x10000
             if (cp_off + cp_size < cp_size or
-                cp_off + cp_size > src_size or
-                cp_size > dest_size):
+                    cp_off + cp_size > src_size or
+                    cp_size > dest_size):
                 break
             out.append(src_buf[cp_off:cp_off+cp_size])
         elif cmd != 0:
@@ -1945,8 +1949,8 @@ class Pack(object):
     def keep(self, msg=None):
         """Add a .keep file for the pack, preventing git from garbage collecting it.
 
-        :param msg: A message written inside the .keep file; can be used later to
-                    determine whether or not a .keep file is obsolete.
+        :param msg: A message written inside the .keep file; can be used later
+            to determine whether or not a .keep file is obsolete.
         :return: The path of the .keep file, as a string.
         """
         keepfile_name = '%s.keep' % self._basename
@@ -1958,6 +1962,6 @@ class Pack(object):
 
 
 try:
-    from dulwich._pack import apply_delta, bisect_find_sha
+    from dulwich._pack import apply_delta, bisect_find_sha  # noqa: F811
 except ImportError:
     pass

+ 18 - 12
dulwich/patch.py

@@ -37,7 +37,8 @@ from dulwich.objects import (
 FIRST_FEW_BYTES = 8000
 
 
-def write_commit_patch(f, commit, contents, progress, version=None, encoding=None):
+def write_commit_patch(f, commit, contents, progress, version=None,
+                       encoding=None):
     """Write a individual file patch.
 
     :param commit: Commit object
@@ -48,10 +49,13 @@ def write_commit_patch(f, commit, contents, progress, version=None, encoding=Non
     if isinstance(contents, str):
         contents = contents.encode(encoding)
     (num, total) = progress
-    f.write(b"From " + commit.id + b" " + time.ctime(commit.commit_time).encode(encoding) + b"\n")
+    f.write(b"From " + commit.id + b" " +
+            time.ctime(commit.commit_time).encode(encoding) + b"\n")
     f.write(b"From: " + commit.author + b"\n")
-    f.write(b"Date: " + time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n")
-    f.write(("Subject: [PATCH %d/%d] " % (num, total)).encode(encoding) + commit.message + b"\n")
+    f.write(b"Date: " +
+            time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n")
+    f.write(("Subject: [PATCH %d/%d] " % (num, total)).encode(encoding) +
+            commit.message + b"\n")
     f.write(b"\n")
     f.write(b"---\n")
     try:
@@ -59,7 +63,7 @@ def write_commit_patch(f, commit, contents, progress, version=None, encoding=Non
         p = subprocess.Popen(["diffstat"], stdout=subprocess.PIPE,
                              stdin=subprocess.PIPE)
     except (ImportError, OSError):
-        pass # diffstat not available?
+        pass  # diffstat not available?
     else:
         (diffstat, _) = p.communicate(contents)
         f.write(diffstat)
@@ -151,6 +155,7 @@ def write_object_diff(f, store, old_file, new_file, diff_binary=False):
     (new_path, new_mode, new_id) = new_file
     old_path = patch_filename(old_path, b"a")
     new_path = patch_filename(new_path, b"b")
+
     def content(mode, hexsha):
         if hexsha is None:
             return Blob.from_string(b'')
@@ -170,10 +175,11 @@ def write_object_diff(f, store, old_file, new_file, diff_binary=False):
     new_content = content(new_mode, new_id)
     if not diff_binary and (
             is_binary(old_content.data) or is_binary(new_content.data)):
-        f.write(b"Binary files " + old_path + b" and " + new_path + b" differ\n")
+        f.write(b"Binary files " + old_path + b" and " + new_path +
+                b" differ\n")
     else:
         f.writelines(unified_diff(lines(old_content), lines(new_content),
-            old_path, new_path))
+                     old_path, new_path))
 
 
 # TODO(jelmer): Support writing unicode, rather than bytes.
@@ -215,6 +221,7 @@ def write_blob_diff(f, old_file, new_file):
     (new_path, new_mode, new_blob) = new_file
     old_path = patch_filename(old_path, b"a")
     new_path = patch_filename(new_path, b"b")
+
     def lines(blob):
         if blob is not None:
             return blob.splitlines()
@@ -226,10 +233,9 @@ def write_blob_diff(f, old_file, new_file):
     old_contents = lines(old_blob)
     new_contents = lines(new_blob)
     f.writelines(unified_diff(old_contents, new_contents,
-        old_path, new_path))
+                 old_path, new_path))
 
 
-# TODO(jelmer): Support writing unicode, rather than bytes.
 def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False):
     """Write tree diff.
 
@@ -242,8 +248,7 @@ def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False):
     changes = store.tree_changes(old_tree, new_tree)
     for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
         write_object_diff(f, store, (oldpath, oldmode, oldsha),
-                                    (newpath, newmode, newsha),
-                                    diff_binary=diff_binary)
+                          (newpath, newmode, newsha), diff_binary=diff_binary)
 
 
 def git_am_patch_split(f, encoding=None):
@@ -255,7 +260,8 @@ def git_am_patch_split(f, encoding=None):
     """
     encoding = encoding or getattr(f, "encoding", "ascii")
     contents = f.read()
-    if isinstance(contents, bytes) and getattr(email.parser, "BytesParser", None):
+    if (isinstance(contents, bytes) and
+            getattr(email.parser, "BytesParser", None)):
         parser = email.parser.BytesParser()
         msg = parser.parsebytes(contents)
     else:

+ 199 - 71
dulwich/porcelain.py

@@ -24,6 +24,7 @@ Currently implemented:
  * archive
  * add
  * branch{_create,_delete,_list}
+ * check-ignore
  * clone
  * commit
  * commit-tree
@@ -55,6 +56,7 @@ from contextlib import (
     closing,
     contextmanager,
 )
+from io import BytesIO
 import os
 import posixpath
 import stat
@@ -79,7 +81,14 @@ from dulwich.errors import (
     SendPackError,
     UpdateRefsError,
     )
-from dulwich.index import get_unstaged_changes
+from dulwich.ignore import IgnoreFilterManager
+from dulwich.index import (
+    blob_from_path_and_stat,
+    get_unstaged_changes,
+    )
+from dulwich.object_store import (
+    tree_lookup_path,
+    )
 from dulwich.objects import (
     Commit,
     Tag,
@@ -90,6 +99,7 @@ from dulwich.objects import (
 from dulwich.objectspec import (
     parse_object,
     parse_reftuples,
+    parse_tree,
     )
 from dulwich.pack import (
     write_pack_index,
@@ -149,6 +159,19 @@ def open_repo_closing(path_or_repo):
     return closing(Repo(path_or_repo))
 
 
+def path_to_tree_path(repopath, path):
+    """Convert a path to a path usable in e.g. an index.
+
+    :param repo: Repository
+    :param path: A path
+    :return: A path formatted for use in e.g. an index
+    """
+    os.path.relpath(path, repopath)
+    if os.path.sep != '/':
+        path = path.replace(os.path.sep, '/')
+    return path.encode(sys.getfilesystemencoding())
+
+
 def archive(repo, committish=None, outstream=default_bytes_out_stream,
             errstream=default_bytes_err_stream):
     """Create an archive.
@@ -163,9 +186,9 @@ def archive(repo, committish=None, outstream=default_bytes_out_stream,
         committish = "HEAD"
     with open_repo_closing(repo) as repo_obj:
         c = repo_obj[committish]
-        tree = c.tree
-        for chunk in tar_stream(repo_obj.object_store,
-                repo_obj.object_store[c.tree], c.commit_time):
+        for chunk in tar_stream(
+                repo_obj.object_store, repo_obj.object_store[c.tree],
+                c.commit_time):
             outstream.write(chunk)
 
 
@@ -204,8 +227,7 @@ def commit(repo=".", message=None, author=None, committer=None):
     # FIXME: Support --all argument
     # FIXME: Support --signoff argument
     with open_repo_closing(repo) as r:
-        return r.do_commit(message=message, author=author,
-            committer=committer)
+        return r.do_commit(message=message, author=author, committer=committer)
 
 
 def commit_tree(repo, tree, message=None, author=None, committer=None):
@@ -217,8 +239,8 @@ def commit_tree(repo, tree, message=None, author=None, committer=None):
     :param committer: Optional committer name and email
     """
     with open_repo_closing(repo) as r:
-        return r.do_commit(message=message, tree=tree, committer=committer,
-                author=author)
+        return r.do_commit(
+            message=message, tree=tree, committer=committer, author=author)
 
 
 def init(path=".", bare=False):
@@ -252,8 +274,9 @@ def clone(source, target=None, bare=False, checkout=None,
     """
     if outstream is not None:
         import warnings
-        warnings.warn("outstream= has been deprecated in favour of errstream=.", DeprecationWarning,
-                stacklevel=3)
+        warnings.warn(
+            "outstream= has been deprecated in favour of errstream=.",
+            DeprecationWarning, stacklevel=3)
         errstream = outstream
 
     if checkout is None:
@@ -273,8 +296,8 @@ def clone(source, target=None, bare=False, checkout=None,
     else:
         r = Repo.init(target)
     try:
-        remote_refs = client.fetch(host_path, r,
-            determine_wants=r.object_store.determine_wants_all,
+        remote_refs = client.fetch(
+            host_path, r, determine_wants=r.object_store.determine_wants_all,
             progress=errstream.write)
         r.refs.import_refs(
             b'refs/remotes/' + origin,
@@ -293,7 +316,8 @@ def clone(source, target=None, bare=False, checkout=None,
         if not isinstance(source, bytes):
             source = source.encode(DEFAULT_ENCODING)
         target_config.set((b'remote', b'origin'), b'url', source)
-        target_config.set((b'remote', b'origin'), b'fetch',
+        target_config.set(
+            (b'remote', b'origin'), b'fetch',
             b'+refs/heads/*:refs/remotes/origin/*')
         target_config.write_to_path()
         if checkout and b"HEAD" in r.refs:
@@ -311,32 +335,29 @@ def add(repo=".", paths=None):
 
     :param repo: Repository for the files
     :param paths: Paths to add.  No value passed stages all modified files.
+    :return: Tuple with set of added files and ignored files
     """
+    ignored = set()
     with open_repo_closing(repo) as r:
+        ignore_manager = IgnoreFilterManager.from_repo(r)
         if not paths:
-            # If nothing is specified, add all non-ignored files.
-            paths = []
-            for dirpath, dirnames, filenames in os.walk(r.path):
-                # Skip .git and below.
-                if '.git' in dirnames:
-                    dirnames.remove('.git')
-                for filename in filenames:
-                    paths.append(os.path.join(dirpath[len(r.path)+1:], filename))
-        # TODO(jelmer): Possibly allow passing in absolute paths?
+            paths = list(
+                get_untracked_paths(os.getcwd(), r.path, r.open_index()))
         relpaths = []
         if not isinstance(paths, list):
             paths = [paths]
         for p in paths:
+            relpath = os.path.relpath(p, r.path)
             # FIXME: Support patterns, directories.
-            if os.path.isabs(p) and p.startswith(repo.path):
-                relpath = os.path.relpath(p, repo.path)
-            else:
-                relpath = p
+            if ignore_manager.is_ignored(relpath):
+                ignored.add(relpath)
+                continue
             relpaths.append(relpath)
         r.stage(relpaths)
+    return (relpaths, ignored)
 
 
-def rm(repo=".", paths=None):
+def remove(repo=".", paths=None, cached=False):
     """Remove files from the staging area.
 
     :param repo: Repository for the files
@@ -345,10 +366,46 @@ def rm(repo=".", paths=None):
     with open_repo_closing(repo) as r:
         index = r.open_index()
         for p in paths:
-            del index[p.encode(sys.getfilesystemencoding())]
+            full_path = os.path.abspath(p).encode(sys.getfilesystemencoding())
+            tree_path = path_to_tree_path(r.path, p)
+            try:
+                index_sha = index[tree_path].sha
+            except KeyError:
+                raise Exception('%s did not match any files' % p)
+
+            if not cached:
+                try:
+                    st = os.lstat(full_path)
+                except OSError:
+                    pass
+                else:
+                    try:
+                        blob = blob_from_path_and_stat(full_path, st)
+                    except IOError:
+                        pass
+                    else:
+                        try:
+                            committed_sha = tree_lookup_path(
+                                r.__getitem__, r[r.head()].tree, tree_path)[1]
+                        except KeyError:
+                            committed_sha = None
+
+                        if blob.id != index_sha and index_sha != committed_sha:
+                            raise Exception(
+                                'file has staged content differing '
+                                'from both the file and head: %s' % p)
+
+                        if index_sha != committed_sha:
+                            raise Exception(
+                                'file has staged changes: %s' % p)
+                        os.remove(full_path)
+            del index[tree_path]
         index.write()
 
 
+rm = remove
+
+
 def commit_decode(commit, contents, default_encoding=DEFAULT_ENCODING):
     if commit.encoding is not None:
         return contents.decode(commit.encoding, "replace")
@@ -364,7 +421,8 @@ def print_commit(commit, decode, outstream=sys.stdout):
     outstream.write("-" * 50 + "\n")
     outstream.write("commit: " + commit.id.decode('ascii') + "\n")
     if len(commit.parents) > 1:
-        outstream.write("merge: " +
+        outstream.write(
+            "merge: " +
             "...".join([c.decode('ascii') for c in commit.parents[1:]]) + "\n")
     outstream.write("Author: " + decode(commit.author) + "\n")
     if commit.author != commit.committer:
@@ -413,8 +471,19 @@ def show_commit(repo, commit, decode, outstream=sys.stdout):
     :param outstream: Stream to write to
     """
     print_commit(commit, decode=decode, outstream=outstream)
-    parent_commit = repo[commit.parents[0]]
-    write_tree_diff(outstream, repo.object_store, parent_commit.tree, commit.tree)
+    if commit.parents:
+        parent_commit = repo[commit.parents[0]]
+        base_tree = parent_commit.tree
+    else:
+        base_tree = None
+    diffstream = BytesIO()
+    write_tree_diff(
+        diffstream,
+        repo.object_store, base_tree, commit.tree)
+    diffstream.seek(0)
+    outstream.write(
+        diffstream.getvalue().decode(
+                commit.encoding or DEFAULT_ENCODING, 'replace'))
 
 
 def show_tree(repo, tree, decode, outstream=sys.stdout):
@@ -495,7 +564,8 @@ def log(repo=".", paths=None, outstream=sys.stdout, max_entries=None,
         walker = r.get_walker(
             max_entries=max_entries, paths=paths, reverse=reverse)
         for entry in walker:
-            decode = lambda x: commit_decode(entry.commit, x)
+            def decode(x):
+                return commit_decode(entry.commit, x)
             print_commit(entry.commit, decode, outstream)
             if name_status:
                 outstream.writelines(
@@ -510,7 +580,8 @@ def show(repo=".", objects=None, outstream=sys.stdout,
     :param repo: Path to repository
     :param objects: Objects to show (defaults to [HEAD])
     :param outstream: Stream to write to
-    :param default_encoding: Default encoding to use if none is set in the commit
+    :param default_encoding: Default encoding to use if none is set in the
+        commit
     """
     if objects is None:
         objects = ["HEAD"]
@@ -520,9 +591,11 @@ def show(repo=".", objects=None, outstream=sys.stdout,
         for objectish in objects:
             o = parse_object(r, objectish)
             if isinstance(o, Commit):
-                decode = lambda x: commit_decode(o, x, default_encoding)
+                def decode(x):
+                    return commit_decode(o, x, default_encoding)
             else:
-                decode = lambda x: x.decode(default_encoding)
+                def decode(x):
+                    return x.decode(default_encoding)
             show_object(r, o, decode, outstream)
 
 
@@ -552,11 +625,13 @@ def rev_list(repo, commits, outstream=sys.stdout):
 
 def tag(*args, **kwargs):
     import warnings
-    warnings.warn("tag has been deprecated in favour of tag_create.", DeprecationWarning)
+    warnings.warn("tag has been deprecated in favour of tag_create.",
+                  DeprecationWarning)
     return tag_create(*args, **kwargs)
 
 
-def tag_create(repo, tag, author=None, message=None, annotated=False,
+def tag_create(
+        repo, tag, author=None, message=None, annotated=False,
         objectish="HEAD", tag_time=None, tag_timezone=None):
     """Creates a tag in git via dulwich calls:
 
@@ -602,7 +677,8 @@ def tag_create(repo, tag, author=None, message=None, annotated=False,
 
 def list_tags(*args, **kwargs):
     import warnings
-    warnings.warn("list_tags has been deprecated in favour of tag_list.", DeprecationWarning)
+    warnings.warn("list_tags has been deprecated in favour of tag_list.",
+                  DeprecationWarning)
     return tag_list(*args, **kwargs)
 
 
@@ -634,28 +710,30 @@ def tag_delete(repo, name):
             del r.refs[b"refs/tags/" + name]
 
 
-def reset(repo, mode, committish="HEAD"):
+def reset(repo, mode, treeish="HEAD"):
     """Reset current HEAD to the specified state.
 
     :param repo: Path to repository
     :param mode: Mode ("hard", "soft", "mixed")
+    :param treeish: Treeish to reset to
     """
 
     if mode != "hard":
         raise ValueError("hard is the only mode currently supported")
 
     with open_repo_closing(repo) as r:
-        tree = r[committish].tree
-        r.reset_index(tree)
+        tree = parse_tree(r, treeish)
+        r.reset_index(tree.id)
 
 
-def push(repo, remote_location, refspecs=None,
-         outstream=default_bytes_out_stream, errstream=default_bytes_err_stream):
+def push(repo, remote_location, refspecs,
+         outstream=default_bytes_out_stream,
+         errstream=default_bytes_err_stream):
     """Remote push with dulwich via dulwich.client
 
     :param repo: Path to repository
     :param remote_location: Location of the remote
-    :param refspecs: relative path to the refs to push to remote
+    :param refspecs: Refs to push to remote
     :param outstream: A stream file to write output
     :param errstream: A stream file to write errors
     """
@@ -682,10 +760,11 @@ def push(repo, remote_location, refspecs=None,
         err_encoding = getattr(errstream, 'encoding', None) or DEFAULT_ENCODING
         remote_location_bytes = client.get_url(path).encode(err_encoding)
         try:
-            client.send_pack(path, update_refs,
-                r.object_store.generate_pack_contents, progress=errstream.write)
-            errstream.write(b"Push to " + remote_location_bytes +
-                            b" successful.\n")
+            client.send_pack(
+                path, update_refs, r.object_store.generate_pack_contents,
+                progress=errstream.write)
+            errstream.write(
+                b"Push to " + remote_location_bytes + b" successful.\n")
         except (UpdateRefsError, SendPackError) as e:
             errstream.write(b"Push to " + remote_location_bytes +
                             b" failed -> " + e.message.encode(err_encoding) +
@@ -693,7 +772,8 @@ def push(repo, remote_location, refspecs=None,
 
 
 def pull(repo, remote_location=None, refspecs=None,
-         outstream=default_bytes_out_stream, errstream=default_bytes_err_stream):
+         outstream=default_bytes_out_stream,
+         errstream=default_bytes_err_stream):
     """Pull from remote via dulwich.client
 
     :param repo: Path to repository
@@ -711,12 +791,14 @@ def pull(repo, remote_location=None, refspecs=None,
         if refspecs is None:
             refspecs = [b"HEAD"]
         selected_refs = []
+
         def determine_wants(remote_refs):
-            selected_refs.extend(parse_reftuples(remote_refs, r.refs, refspecs))
+            selected_refs.extend(
+                parse_reftuples(remote_refs, r.refs, refspecs))
             return [remote_refs[lh] for (lh, rh, force) in selected_refs]
         client, path = get_transport_and_path(remote_location)
-        remote_refs = client.fetch(path, r, progress=errstream.write,
-                determine_wants=determine_wants)
+        remote_refs = client.fetch(
+            path, r, progress=errstream.write, determine_wants=determine_wants)
         for (lh, rh, force) in selected_refs:
             r.refs[rh] = remote_refs[lh]
         if selected_refs:
@@ -724,13 +806,14 @@ def pull(repo, remote_location=None, refspecs=None,
 
         # Perform 'git checkout .' - syncs staged changes
         tree = r[b"HEAD"].tree
-        r.reset_index()
+        r.reset_index(tree=tree)
 
 
-def status(repo="."):
+def status(repo=".", ignored=False):
     """Returns staged, unstaged, and untracked changes relative to the HEAD.
 
     :param repo: Path to repository or repository object
+    :param ignored: Whether to include ignoed files in `untracked`
     :return: GitStatus tuple,
         staged -    list of staged paths (diff index/HEAD)
         unstaged -  list of unstaged paths (diff index/working-tree)
@@ -740,12 +823,44 @@ def status(repo="."):
         # 1. Get status of staged
         tracked_changes = get_tree_changes(r)
         # 2. Get status of unstaged
-        unstaged_changes = list(get_unstaged_changes(r.open_index(), r.path))
-        # TODO - Status of untracked - add untracked changes, need gitignore.
-        untracked_changes = []
+        index = r.open_index()
+        unstaged_changes = list(get_unstaged_changes(index, r.path))
+        ignore_manager = IgnoreFilterManager.from_repo(r)
+        untracked_paths = get_untracked_paths(r.path, r.path, index)
+        if ignored:
+            untracked_changes = list(untracked_paths)
+        else:
+            untracked_changes = [
+                    p for p in untracked_paths
+                    if not ignore_manager.is_ignored(p)]
         return GitStatus(tracked_changes, unstaged_changes, untracked_changes)
 
 
+def get_untracked_paths(frompath, basepath, index):
+    """Get untracked paths.
+
+    ;param frompath: Path to walk
+    :param basepath: Path to compare to
+    :param index: Index to check against
+    """
+    # If nothing is specified, add all non-ignored files.
+    for dirpath, dirnames, filenames in os.walk(frompath):
+        # Skip .git and below.
+        if '.git' in dirnames:
+            dirnames.remove('.git')
+            if dirpath != basepath:
+                continue
+        if '.git' in filenames:
+            filenames.remove('.git')
+            if dirpath != basepath:
+                continue
+        for filename in filenames:
+            ap = os.path.join(dirpath, filename)
+            ip = path_to_tree_path(basepath, ap)
+            if ip not in index:
+                yield os.path.relpath(ap, frompath)
+
+
 def get_tree_changes(repo):
     """Return add/delete/modify changes to tree by comparing index to HEAD.
 
@@ -827,6 +942,7 @@ def upload_pack(path=".", inf=None, outf=None):
         inf = getattr(sys.stdin, 'buffer', sys.stdin)
     path = os.path.expanduser(path)
     backend = FileSystemBackend(path)
+
     def send_fn(data):
         outf.write(data)
         outf.flush()
@@ -850,6 +966,7 @@ def receive_pack(path=".", inf=None, outf=None):
         inf = getattr(sys.stdin, 'buffer', sys.stdin)
     path = os.path.expanduser(path)
     backend = FileSystemBackend(path)
+
     def send_fn(data):
         outf.write(data)
         outf.flush()
@@ -886,12 +1003,6 @@ def branch_create(repo, name, objectish=None, force=False):
     :param force: Force creation of branch, even if it already exists
     """
     with open_repo_closing(repo) as r:
-        if isinstance(name, bytes):
-            names = [name]
-        elif isinstance(name, list):
-            names = name
-        else:
-            raise TypeError("Unexpected branch name type %r" % name)
         if objectish is None:
             objectish = "HEAD"
         object = parse_object(r, objectish)
@@ -911,7 +1022,7 @@ def branch_list(repo):
 
 
 def fetch(repo, remote_location, outstream=sys.stdout,
-        errstream=default_bytes_err_stream):
+          errstream=default_bytes_err_stream):
     """Fetch objects from a remote server.
 
     :param repo: Path to the repository
@@ -965,8 +1076,8 @@ def pack_objects(repo, object_ids, packf, idxf, delta_window_size=None):
         write_pack_index(idxf, entries, data_sum)
 
 
-def ls_tree(repo, tree_ish=None, outstream=sys.stdout, recursive=False,
-        name_only=False):
+def ls_tree(repo, treeish=b"HEAD", outstream=sys.stdout, recursive=False,
+            name_only=False):
     """List contents of a tree.
 
     :param repo: Path to the repository
@@ -985,12 +1096,9 @@ def ls_tree(repo, tree_ish=None, outstream=sys.stdout, recursive=False,
                 outstream.write(pretty_format_tree_entry(name, mode, sha))
             if stat.S_ISDIR(mode):
                 list_tree(store, sha, name)
-    if tree_ish is None:
-        tree_ish = "HEAD"
     with open_repo_closing(repo) as r:
-        c = r[tree_ish]
-        treeid = c.tree
-        list_tree(r.object_store, treeid, "")
+        tree = parse_tree(r, treeish)
+        list_tree(r.object_store, tree.id, "")
 
 
 def remote_add(repo, name, url):
@@ -1011,3 +1119,23 @@ def remote_add(repo, name, url):
             raise RemoteExists(section)
         c.set(section, b"url", url)
         c.write_to_path()
+
+
+def check_ignore(repo, paths, no_index=False):
+    """Debug gitignore files.
+
+    :param repo: Path to the repository
+    :param paths: List of paths to check for
+    :param no_index: Don't check index
+    :return: List of ignored files
+    """
+    with open_repo_closing(repo) as r:
+        index = r.open_index()
+        ignore_manager = IgnoreFilterManager.from_repo(r)
+        for path in paths:
+            if os.path.isabs(path):
+                path = os.path.relpath(path, r.path)
+            if not no_index and path_to_tree_path(r.path, path) in index:
+                continue
+            if ignore_manager.is_ignored(path):
+                yield path

+ 18 - 14
dulwich/protocol.py

@@ -114,8 +114,8 @@ class Protocol(object):
 
     Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
     consists of the length of the line as a 4-byte hex string, followed by the
-    payload data. The length includes the 4-byte header. The special line '0000'
-    indicates the end of a section of input and is called a 'flush-pkt'.
+    payload data. The length includes the 4-byte header. The special line
+    '0000' indicates the end of a section of input and is called a 'flush-pkt'.
 
     For details on the pkt-line format, see the cgit distribution:
         Documentation/technical/protocol-common.txt
@@ -169,13 +169,15 @@ class Protocol(object):
         else:
             if len(pkt_contents) + 4 != size:
                 raise GitProtocolError(
-                    'Length of pkt read %04x does not match length prefix %04x' % (len(pkt_contents) + 4, size))
+                    'Length of pkt read %04x does not match length prefix %04x'
+                    % (len(pkt_contents) + 4, size))
             return pkt_contents
 
     def eof(self):
         """Test whether the protocol stream has reached EOF.
 
-        Note that this refers to the actual stream EOF and not just a flush-pkt.
+        Note that this refers to the actual stream EOF and not just a
+        flush-pkt.
 
         :return: True if the stream is at EOF, False otherwise.
         """
@@ -202,7 +204,8 @@ class Protocol(object):
     def read_pkt_seq(self):
         """Read a sequence of pkt-lines from the remote git process.
 
-        :return: Yields each line of data up to but not including the next flush-pkt.
+        :return: Yields each line of data up to but not including the next
+            flush-pkt.
         """
         pkt = self.read_pkt_line()
         while pkt:
@@ -291,9 +294,9 @@ class ReceivableProtocol(Protocol):
     to a read() method.
 
     If you want to read n bytes from the wire and block until exactly n bytes
-    (or EOF) are read, use read(n). If you want to read at most n bytes from the
-    wire but don't care if you get less, use recv(n). Note that recv(n) will
-    still block until at least one byte is read.
+    (or EOF) are read, use read(n). If you want to read at most n bytes from
+    the wire but don't care if you get less, use recv(n). Note that recv(n)
+    will still block until at least one byte is read.
     """
 
     def __init__(self, recv, write, report_activity=None, rbufsize=_RBUFSIZE):
@@ -310,7 +313,8 @@ class ReceivableProtocol(Protocol):
         #  - seek back to start rather than 0 in case some buffer has been
         #    consumed.
         #  - use SEEK_END instead of the magic number.
-        # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
+        # Copyright (c) 2001-2010 Python Software Foundation; All Rights
+        # Reserved
         # Licensed under the Python Software Foundation License.
         # TODO: see if buffer is more efficient than cBytesIO.
         assert size > 0
@@ -359,7 +363,7 @@ class ReceivableProtocol(Protocol):
             buf.write(data)
             buf_len += n
             del data  # explicit free
-            #assert buf_len == buf.tell()
+            # assert buf_len == buf.tell()
         buf.seek(start)
         return buf.read()
 
@@ -393,7 +397,7 @@ def extract_capabilities(text):
     :param text: String to extract from
     :return: Tuple with text with capabilities removed and list of capabilities
     """
-    if not b"\0" in text:
+    if b"\0" not in text:
         return text, []
     text, capabilities = text.rstrip().split(b"\0")
     return (text, capabilities.strip().split(b" "))
@@ -428,9 +432,9 @@ def ack_type(capabilities):
 class BufferedPktLineWriter(object):
     """Writer that wraps its data in pkt-lines and has an independent buffer.
 
-    Consecutive calls to write() wrap the data in a pkt-line and then buffers it
-    until enough lines have been written such that their total length (including
-    length prefix) reach the buffer size.
+    Consecutive calls to write() wrap the data in a pkt-line and then buffers
+    it until enough lines have been written such that their total length
+    (including length prefix) reach the buffer size.
     """
 
     def __init__(self, write, bufsize=65515):

+ 5 - 3
dulwich/reflog.py

@@ -29,11 +29,13 @@ from dulwich.objects import (
     ZERO_SHA,
     )
 
-Entry = collections.namedtuple('Entry', ['old_sha', 'new_sha', 'committer',
-    'timestamp', 'timezone', 'message'])
+Entry = collections.namedtuple(
+    'Entry', ['old_sha', 'new_sha', 'committer', 'timestamp', 'timezone',
+              'message'])
 
 
-def format_reflog_line(old_sha, new_sha, committer, timestamp, timezone, message):
+def format_reflog_line(old_sha, new_sha, committer, timestamp, timezone,
+                       message):
     """Generate a single reflog line.
 
     :param old_sha: Old Commit SHA

+ 25 - 19
dulwich/refs.py

@@ -52,13 +52,14 @@ def check_ref_format(refname):
 
     Implements all the same rules as git-check-ref-format[1].
 
-    [1] http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
+    [1]
+    http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
 
     :param refname: The refname to check
     :return: True if refname is valid, False otherwise
     """
-    # These could be combined into one big expression, but are listed separately
-    # to parallel [1].
+    # These could be combined into one big expression, but are listed
+    # separately to parallel [1].
     if b'/.' in refname or refname.startswith(b'.'):
         return False
     if b'/' not in refname:
@@ -104,9 +105,9 @@ class RefsContainer(object):
         """Return the cached peeled value of a ref, if available.
 
         :param name: Name of the ref to peel
-        :return: The peeled value of the ref. If the ref is known not point to a
-            tag, this will be the SHA the ref refers to. If the ref may point to
-            a tag, but no cached information is available, None is returned.
+        :return: The peeled value of the ref. If the ref is known not point to
+            a tag, this will be the SHA the ref refers to. If the ref may point
+            to a tag, but no cached information is available, None is returned.
         """
         return None
 
@@ -222,8 +223,8 @@ class RefsContainer(object):
     def _follow(self, name):
         import warnings
         warnings.warn(
-            "RefsContainer._follow is deprecated. Use RefsContainer.follow instead.",
-            DeprecationWarning)
+            "RefsContainer._follow is deprecated. Use RefsContainer.follow "
+            "instead.", DeprecationWarning)
         refnames, contents = self.follow(name)
         if not refnames:
             return (None, contents)
@@ -285,8 +286,8 @@ class RefsContainer(object):
         operation.
 
         :param name: The refname to delete.
-        :param old_ref: The old sha the refname must refer to, or None to delete
-            unconditionally.
+        :param old_ref: The old sha the refname must refer to, or None to
+            delete unconditionally.
         :return: True if the delete was successful, False otherwise.
         """
         raise NotImplementedError(self.remove_if_equals)
@@ -438,7 +439,9 @@ class DiskRefsContainer(RefsContainer):
         for root, dirs, files in os.walk(self.refpath(b'refs')):
             dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
             for filename in files:
-                refname = ("%s/%s" % (dir, filename)).encode(sys.getfilesystemencoding())
+                refname = (
+                    "%s/%s" % (dir, filename)).encode(
+                            sys.getfilesystemencoding())
                 if check_ref_format(refname):
                     allkeys.add(refname)
         allkeys.update(self.get_packed_refs())
@@ -448,7 +451,8 @@ class DiskRefsContainer(RefsContainer):
         """Return the disk path of a ref.
 
         """
-        if getattr(self.path, "encode", None) and getattr(name, "decode", None):
+        if (getattr(self.path, "encode", None) and
+                getattr(name, "decode", None)):
             name = name.decode(sys.getfilesystemencoding())
         if os.path.sep != "/":
             name = name.replace("/", os.path.sep)
@@ -498,9 +502,9 @@ class DiskRefsContainer(RefsContainer):
         """Return the cached peeled value of a ref, if available.
 
         :param name: Name of the ref to peel
-        :return: The peeled value of the ref. If the ref is known not point to a
-            tag, this will be the SHA the ref refers to. If the ref may point to
-            a tag, but no cached information is available, None is returned.
+        :return: The peeled value of the ref. If the ref is known not point to
+            a tag, this will be the SHA the ref refers to. If the ref may point
+            to a tag, but no cached information is available, None is returned.
         """
         self.get_packed_refs()
         if self._peeled_refs is None or name not in self._packed_refs:
@@ -604,7 +608,8 @@ class DiskRefsContainer(RefsContainer):
                     # read again while holding the lock
                     orig_ref = self.read_loose_ref(realname)
                     if orig_ref is None:
-                        orig_ref = self.get_packed_refs().get(realname, ZERO_SHA)
+                        orig_ref = self.get_packed_refs().get(
+                                realname, ZERO_SHA)
                     if orig_ref != old_ref:
                         f.abort()
                         return False
@@ -656,8 +661,8 @@ class DiskRefsContainer(RefsContainer):
         perform an atomic compare-and-delete operation.
 
         :param name: The refname to delete.
-        :param old_ref: The old sha the refname must refer to, or None to delete
-            unconditionally.
+        :param old_ref: The old sha the refname must refer to, or None to
+            delete unconditionally.
         :return: True if the delete was successful, False otherwise.
         """
         self._check_refname(name)
@@ -786,4 +791,5 @@ def write_info_refs(refs, store):
             yield peeled.id + b'\t' + name + ANNOTATED_TAG_SUFFIX + b'\n'
 
 
-is_local_branch = lambda x: x.startswith(b'refs/heads/')
+def is_local_branch(x):
+    return x.startswith(b'refs/heads/')

+ 35 - 26
dulwich/repo.py

@@ -68,7 +68,7 @@ from dulwich.hooks import (
     CommitMsgShellHook,
     )
 
-from dulwich.refs import (
+from dulwich.refs import (  # noqa: F401
     check_ref_format,
     RefsContainer,
     DictRefsContainer,
@@ -261,8 +261,8 @@ class BaseRepo(object):
             that a revision is present.
         :param progress: Simple progress function that will be called with
             updated progress strings.
-        :param get_tagged: Function that returns a dict of pointed-to sha -> tag
-            sha for including tags.
+        :param get_tagged: Function that returns a dict of pointed-to sha ->
+            tag sha for including tags.
         :return: iterator over objects, with __len__ implemented
         """
         wants = determine_wants(self.get_refs())
@@ -290,8 +290,9 @@ class BaseRepo(object):
         # Deal with shallow requests separately because the haves do
         # not reflect what objects are missing
         if shallows or unshallows:
-            haves = []  # TODO: filter the haves commits from iter_shas.
-                        # the specific commits aren't missing.
+            # TODO: filter the haves commits from iter_shas. the specific
+            # commits aren't missing.
+            haves = []
 
         def get_parents(commit):
             if commit.id in shallows:
@@ -412,8 +413,8 @@ class BaseRepo(object):
 
         :param ref: The refname to peel.
         :return: The fully-peeled SHA1 of a tag object, after peeling all
-            intermediate tags; if the original ref does not point to a tag, this
-            will equal the original SHA1.
+            intermediate tags; if the original ref does not point to a tag,
+            this will equal the original SHA1.
         """
         cached = self.refs.get_peeled(ref)
         if cached is not None:
@@ -427,8 +428,8 @@ class BaseRepo(object):
             ancestors. Defaults to [HEAD]
         :param exclude: Iterable of SHAs of commits to exclude along with their
             ancestors, overriding includes.
-        :param order: ORDER_* constant specifying the order of results. Anything
-            other than ORDER_DATE may result in O(n) memory usage.
+        :param order: ORDER_* constant specifying the order of results.
+            Anything other than ORDER_DATE may result in O(n) memory usage.
         :param reverse: If True, reverse the order of output, requiring O(n)
             memory.
         :param max_entries: The maximum number of entries to yield, or None for
@@ -451,7 +452,8 @@ class BaseRepo(object):
         if isinstance(include, str):
             include = [include]
 
-        kwargs['get_parents'] = lambda commit: self.get_parents(commit.id, commit)
+        kwargs['get_parents'] = lambda commit: self.get_parents(
+            commit.id, commit)
 
         return Walker(self.object_store, include, *args, **kwargs)
 
@@ -464,7 +466,7 @@ class BaseRepo(object):
         """
         if not isinstance(name, bytes):
             raise TypeError("'name' must be bytestring, not %.80s" %
-                    type(name).__name__)
+                            type(name).__name__)
         if len(name) in (20, 40):
             try:
                 return self.object_store[name]
@@ -551,7 +553,8 @@ class BaseRepo(object):
         :param author: Author fullname (defaults to committer)
         :param commit_timestamp: Commit timestamp (defaults to now)
         :param commit_timezone: Commit timestamp timezone (defaults to GMT)
-        :param author_timestamp: Author timestamp (defaults to commit timestamp)
+        :param author_timestamp: Author timestamp (defaults to commit
+            timestamp)
         :param author_timezone: Author timestamp timezone
             (defaults to commit timestamp timezone)
         :param tree: SHA1 of the tree root to use (if not specified the
@@ -636,8 +639,8 @@ class BaseRepo(object):
                 self.object_store.add_object(c)
                 ok = self.refs.add_if_new(ref, c.id)
             if not ok:
-                # Fail if the atomic compare-and-swap failed, leaving the commit and
-                # all its objects as garbage.
+                # Fail if the atomic compare-and-swap failed, leaving the
+                # commit and all its objects as garbage.
                 raise CommitError("%s changed during commit" % (ref,))
 
         try:
@@ -650,7 +653,6 @@ class BaseRepo(object):
         return c.id
 
 
-
 def read_gitfile(f):
     """Read a ``.git`` file.
 
@@ -698,7 +700,8 @@ class Repo(BaseRepo):
             with commondir:
                 self._commondir = os.path.join(
                     self.controldir(),
-                    commondir.read().rstrip(b"\r\n").decode(sys.getfilesystemencoding()))
+                    commondir.read().rstrip(b"\r\n").decode(
+                        sys.getfilesystemencoding()))
         else:
             self._commondir = self._controldir
         self.path = root
@@ -795,7 +798,8 @@ class Repo(BaseRepo):
         pointing to a file in that location.
 
         :param path: The path to the file, relative to the control dir.
-        :param basedir: Optional argument that specifies an alternative to the control dir.
+        :param basedir: Optional argument that specifies an alternative to the
+            control dir.
         :return: An open file object, or None if the file does not exist.
         """
         # TODO(dborowitz): sanitize filenames, since this is used directly by
@@ -871,7 +875,7 @@ class Repo(BaseRepo):
         index.write()
 
     def clone(self, target_path, mkdir=True, bare=False,
-            origin=b"origin"):
+              origin=b"origin"):
         """Clone this repository.
 
         :param target_path: Target path
@@ -884,7 +888,7 @@ class Repo(BaseRepo):
         if not bare:
             target = self.init(target_path, mkdir=mkdir)
         else:
-            target = self.init_bare(target_path)
+            target = self.init_bare(target_path, mkdir=mkdir)
         self.fetch(target)
         target.refs.import_refs(
             b'refs/remotes/' + origin, self.refs.as_dict(b'refs/heads'))
@@ -900,7 +904,7 @@ class Repo(BaseRepo):
             encoded_path = encoded_path.encode(sys.getfilesystemencoding())
         target_config.set((b'remote', b'origin'), b'url', encoded_path)
         target_config.set((b'remote', b'origin'), b'fetch',
-            b'+refs/heads/*:refs/remotes/origin/*')
+                          b'+refs/heads/*:refs/remotes/origin/*')
         target_config.write_to_path()
 
         # Update target head
@@ -928,14 +932,16 @@ class Repo(BaseRepo):
         if tree is None:
             tree = self[b'HEAD'].tree
         config = self.get_config()
-        honor_filemode = config.get_boolean('core', 'filemode', os.name != "nt")
+        honor_filemode = config.get_boolean(
+            'core', 'filemode', os.name != "nt")
         if config.get_boolean('core', 'core.protectNTFS', os.name == "nt"):
             validate_path_element = validate_path_element_ntfs
         else:
             validate_path_element = validate_path_element_default
-        return build_index_from_tree(self.path, self.index_path(),
-                self.object_store, tree, honor_filemode=honor_filemode,
-                validate_path_element=validate_path_element)
+        return build_index_from_tree(
+            self.path, self.index_path(), self.object_store, tree,
+            honor_filemode=honor_filemode,
+            validate_path_element=validate_path_element)
 
     def get_config(self):
         """Retrieve the config object.
@@ -1004,7 +1010,8 @@ class Repo(BaseRepo):
         return cls(path)
 
     @classmethod
-    def _init_new_working_directory(cls, path, main_repo, identifier=None, mkdir=False):
+    def _init_new_working_directory(cls, path, main_repo, identifier=None,
+                                    mkdir=False):
         """Create a new working directory linked to a repository.
 
         :param path: Path in which to create the working tree.
@@ -1045,7 +1052,7 @@ class Repo(BaseRepo):
         return r
 
     @classmethod
-    def init_bare(cls, path):
+    def init_bare(cls, path, mkdir=False):
         """Create a new bare repository.
 
         ``path`` should already exist and be an empty directory.
@@ -1053,6 +1060,8 @@ class Repo(BaseRepo):
         :param path: Path to create bare repository in
         :return: a `Repo` instance
         """
+        if mkdir:
+            os.mkdir(path)
         return cls._init_maybe_bare(path, True)
 
     create = init_bare

+ 73 - 46
dulwich/server.py

@@ -68,7 +68,7 @@ from dulwich.objects import (
 from dulwich.pack import (
     write_pack_objects,
     )
-from dulwich.protocol import (
+from dulwich.protocol import (  # noqa: F401
     BufferedPktLineWriter,
     capability_agent,
     CAPABILITIES_REF,
@@ -165,8 +165,8 @@ class BackendRepo(object):
         Yield the objects required for a list of commits.
 
         :param progress: is a callback to send progress messages to the client
-        :param get_tagged: Function that returns a dict of pointed-to sha -> tag
-            sha for including tags.
+        :param get_tagged: Function that returns a dict of pointed-to sha ->
+            tag sha for including tags.
         """
         raise NotImplementedError
 
@@ -188,11 +188,12 @@ class DictBackend(Backend):
 
 
 class FileSystemBackend(Backend):
-    """Simple backend that looks up Git repositories in the local file system."""
+    """Simple backend looking up Git repositories in the local file system."""
 
     def __init__(self, root=os.sep):
         super(FileSystemBackend, self).__init__()
-        self.root = (os.path.abspath(root) + os.sep).replace(os.sep * 2, os.sep)
+        self.root = (os.path.abspath(root) + os.sep).replace(
+                os.sep * 2, os.sep)
 
     def open_repository(self, path):
         logger.debug('opening repository at %s', path)
@@ -200,7 +201,9 @@ class FileSystemBackend(Backend):
         normcase_abspath = os.path.normcase(abspath)
         normcase_root = os.path.normcase(self.root)
         if not normcase_abspath.startswith(normcase_root):
-            raise NotGitRepository("Path %r not inside root %r" % (path, self.root))
+            raise NotGitRepository(
+                    "Path %r not inside root %r" %
+                    (path, self.root))
         return Repo(abspath)
 
 
@@ -268,14 +271,13 @@ class PackHandler(Handler):
         self._done_received = True
 
 
-
 class UploadPackHandler(PackHandler):
     """Protocol handler for uploading a pack to the client."""
 
     def __init__(self, backend, args, proto, http_req=None,
                  advertise_refs=False):
-        super(UploadPackHandler, self).__init__(backend, proto,
-            http_req=http_req)
+        super(UploadPackHandler, self).__init__(
+                backend, proto, http_req=http_req)
         self.repo = backend.open_repository(args[0])
         self._graph_walker = None
         self.advertise_refs = advertise_refs
@@ -293,20 +295,22 @@ class UploadPackHandler(PackHandler):
 
     @classmethod
     def required_capabilities(cls):
-        return (CAPABILITY_SIDE_BAND_64K, CAPABILITY_THIN_PACK, CAPABILITY_OFS_DELTA)
+        return (CAPABILITY_SIDE_BAND_64K, CAPABILITY_THIN_PACK,
+                CAPABILITY_OFS_DELTA)
 
     def progress(self, message):
-        if self.has_capability(CAPABILITY_NO_PROGRESS) or self._processing_have_lines:
+        if (self.has_capability(CAPABILITY_NO_PROGRESS) or
+                self._processing_have_lines):
             return
         self.proto.write_sideband(SIDE_BAND_CHANNEL_PROGRESS, message)
 
     def get_tagged(self, refs=None, repo=None):
         """Get a dict of peeled values of tags to their original tag shas.
 
-        :param refs: dict of refname -> sha of possible tags; defaults to all of
-            the backend's refs.
-        :param repo: optional Repo instance for getting peeled refs; defaults to
-            the backend's repo, if available
+        :param refs: dict of refname -> sha of possible tags; defaults to all
+            of the backend's refs.
+        :param repo: optional Repo instance for getting peeled refs; defaults
+            to the backend's repo, if available
         :return: dict of peeled_sha -> tag_sha, where tag_sha is the sha of a
             tag whose peeled value is peeled_sha.
         """
@@ -330,10 +334,11 @@ class UploadPackHandler(PackHandler):
         return tagged
 
     def handle(self):
-        write = lambda x: self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
+        def write(x):
+            return self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
 
-        graph_walker = ProtocolGraphWalker(self, self.repo.object_store,
-            self.repo.get_peeled)
+        graph_walker = ProtocolGraphWalker(
+                self, self.repo.object_store, self.repo.get_peeled)
         objects_iter = self.repo.fetch_objects(
             graph_walker.determine_wants, graph_walker, self.progress,
             get_tagged=self.get_tagged)
@@ -357,11 +362,14 @@ class UploadPackHandler(PackHandler):
         self._processing_have_lines = False
 
         if not graph_walker.handle_done(
-                not self.has_capability(CAPABILITY_NO_DONE), self._done_received):
+                not self.has_capability(CAPABILITY_NO_DONE),
+                self._done_received):
             return
 
         self.progress(b"dul-daemon says what\n")
-        self.progress(("counting objects: %d, done.\n" % len(objects_iter)).encode('ascii'))
+        self.progress(
+                ("counting objects: %d, done.\n" % len(objects_iter)).encode(
+                    'ascii'))
         write_pack_objects(ProtocolFile(None, write), objects_iter)
         self.progress(b"how was that, then?\n")
         # we are done
@@ -417,6 +425,7 @@ def _find_shallow(store, heads, depth):
         these sets may overlap if a commit is reachable along multiple paths.
     """
     parents = {}
+
     def get_parents(sha):
         result = parents.get(sha, None)
         if not result:
@@ -447,6 +456,7 @@ def _find_shallow(store, heads, depth):
 def _want_satisfied(store, haves, want, earliest):
     o = store[want]
     pending = collections.deque([o])
+    known = set([want])
     while pending:
         commit = pending.popleft()
         if commit.id in haves:
@@ -455,6 +465,9 @@ def _want_satisfied(store, haves, want, earliest):
             # non-commit wants are assumed to be satisfied
             continue
         for parent in commit.parents:
+            if parent in known:
+                continue
+            known.add(parent)
             parent_obj = store[parent]
             # TODO: handle parents with later commit times than children
             if parent_obj.commit_time >= earliest:
@@ -493,8 +506,8 @@ class ProtocolGraphWalker(object):
     The work of determining which acks to send is passed on to the
     implementation instance stored in _impl. The reason for this is that we do
     not know at object creation time what ack level the protocol requires. A
-    call to set_ack_level() is required to set up the implementation, before any
-    calls to next() or ack() are made.
+    call to set_ack_type() is required to set up the implementation, before
+    any calls to next() or ack() are made.
     """
     def __init__(self, handler, object_store, get_peeled):
         self.handler = handler
@@ -572,8 +585,9 @@ class ProtocolGraphWalker(object):
 
         if self.http_req and self.proto.eof():
             # The client may close the socket at this point, expecting a
-            # flush-pkt from the server. We might be ready to send a packfile at
-            # this point, so we need to explicitly short-circuit in this case.
+            # flush-pkt from the server. We might be ready to send a packfile
+            # at this point, so we need to explicitly short-circuit in this
+            # case.
             return []
 
         return want_revs
@@ -615,7 +629,8 @@ class ProtocolGraphWalker(object):
 
     def _handle_shallow_request(self, wants):
         while True:
-            command, val = self.read_proto_line((COMMAND_DEEPEN, COMMAND_SHALLOW))
+            command, val = self.read_proto_line(
+                    (COMMAND_DEEPEN, COMMAND_SHALLOW))
             if command == COMMAND_DEEPEN:
                 depth = val
                 break
@@ -850,15 +865,16 @@ class ReceivePackHandler(PackHandler):
 
     def __init__(self, backend, args, proto, http_req=None,
                  advertise_refs=False):
-        super(ReceivePackHandler, self).__init__(backend, proto,
-            http_req=http_req)
+        super(ReceivePackHandler, self).__init__(
+                backend, proto, http_req=http_req)
         self.repo = backend.open_repository(args[0])
         self.advertise_refs = advertise_refs
 
     @classmethod
     def capabilities(cls):
-        return (CAPABILITY_REPORT_STATUS, CAPABILITY_DELETE_REFS, CAPABILITY_QUIET,
-                CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K, CAPABILITY_NO_DONE)
+        return (CAPABILITY_REPORT_STATUS, CAPABILITY_DELETE_REFS,
+                CAPABILITY_QUIET, CAPABILITY_OFS_DELTA,
+                CAPABILITY_SIDE_BAND_64K, CAPABILITY_NO_DONE)
 
     def _apply_pack(self, refs):
         all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
@@ -872,25 +888,26 @@ class ReceivePackHandler(PackHandler):
                 will_send_pack = True
 
         if will_send_pack:
-            # TODO: more informative error messages than just the exception string
+            # TODO: more informative error messages than just the exception
+            # string
             try:
                 recv = getattr(self.proto, "recv", None)
                 self.repo.object_store.add_thin_pack(self.proto.read, recv)
                 status.append((b'unpack', b'ok'))
             except all_exceptions as e:
                 status.append((b'unpack', str(e).replace('\n', '')))
-                # The pack may still have been moved in, but it may contain broken
-                # objects. We trust a later GC to clean it up.
+                # The pack may still have been moved in, but it may contain
+                # broken objects. We trust a later GC to clean it up.
         else:
-            # The git protocol want to find a status entry related to unpack process
-            # even if no pack data has been sent.
+            # The git protocol want to find a status entry related to unpack
+            # process even if no pack data has been sent.
             status.append((b'unpack', b'ok'))
 
         for oldsha, sha, ref in refs:
             ref_status = b'ok'
             try:
                 if sha == ZERO_SHA:
-                    if not CAPABILITY_DELETE_REFS in self.capabilities():
+                    if CAPABILITY_DELETE_REFS not in self.capabilities():
                         raise GitProtocolError(
                           'Attempted to delete refs without delete-refs '
                           'capability.')
@@ -920,7 +937,9 @@ class ReceivePackHandler(PackHandler):
                 self.proto.write_pkt_line(None)
         else:
             write = self.proto.write_pkt_line
-            flush = lambda: None
+
+            def flush():
+                pass
 
         for name, msg in status:
             if name == b'unpack':
@@ -987,8 +1006,8 @@ class UploadArchiveHandler(Handler):
 DEFAULT_HANDLERS = {
   b'git-upload-pack': UploadPackHandler,
   b'git-receive-pack': ReceivePackHandler,
-#  b'git-upload-archive': UploadArchiveHandler,
-  }
+  # b'git-upload-archive': UploadArchiveHandler,
+}
 
 
 class TCPGitRequestHandler(SocketServer.StreamRequestHandler):
@@ -1022,7 +1041,8 @@ class TCPGitServer(SocketServer.TCPServer):
         if handlers is not None:
             self.handlers.update(handlers)
         self.backend = backend
-        logger.info('Listening for TCP connections on %s:%d', listen_addr, port)
+        logger.info('Listening for TCP connections on %s:%d',
+                    listen_addr, port)
         SocketServer.TCPServer.__init__(self, (listen_addr, port),
                                         self._make_handler)
 
@@ -1052,16 +1072,18 @@ def main(argv=sys.argv):
         gitdir = args[1]
     else:
         gitdir = '.'
-    from dulwich import porcelain
-    porcelain.daemon(gitdir, address=options.listen_address,
-                     port=options.port)
+    # TODO(jelmer): Support git-daemon-export-ok and --export-all.
+    backend = FileSystemBackend(gitdir)
+    server = TCPGitServer(backend, options.listen_address, options.port)
+    server.serve_forever()
 
 
 def serve_command(handler_cls, argv=sys.argv, backend=None, inf=sys.stdin,
                   outf=sys.stdout):
     """Serve a single command.
 
-    This is mostly useful for the implementation of commands used by e.g. git+ssh.
+    This is mostly useful for the implementation of commands used by e.g.
+    git+ssh.
 
     :param handler_cls: `Handler` class to use for the request
     :param argv: execv-style command-line arguments. Defaults to sys.argv.
@@ -1072,6 +1094,7 @@ def serve_command(handler_cls, argv=sys.argv, backend=None, inf=sys.stdin,
     """
     if backend is None:
         backend = FileSystemBackend()
+
     def send_fn(data):
         outf.write(data)
         outf.flush()
@@ -1091,7 +1114,9 @@ def generate_info_refs(repo):
 def generate_objects_info_packs(repo):
     """Generate an index for for packs."""
     for pack in repo.object_store.packs:
-        yield b'P ' + pack.data.filename.encode(sys.getfilesystemencoding()) + b'\n'
+        yield (
+            b'P ' + pack.data.filename.encode(sys.getfilesystemencoding()) +
+            b'\n')
 
 
 def update_server_info(repo):
@@ -1100,10 +1125,12 @@ def update_server_info(repo):
     This generates info/refs and objects/info/packs,
     similar to "git update-server-info".
     """
-    repo._put_named_file(os.path.join('info', 'refs'),
+    repo._put_named_file(
+        os.path.join('info', 'refs'),
         b"".join(generate_info_refs(repo)))
 
-    repo._put_named_file(os.path.join('objects', 'info', 'packs'),
+    repo._put_named_file(
+        os.path.join('objects', 'info', 'packs'),
         b"".join(generate_objects_info_packs(repo)))
 
 

+ 19 - 9
dulwich/tests/__init__.py

@@ -30,7 +30,12 @@ import tempfile
 
 # If Python itself provides an exception, use that
 import unittest
-from unittest import SkipTest, TestCase as _TestCase, skipIf, expectedFailure
+from unittest import (  # noqa: F401
+    SkipTest,
+    TestCase as _TestCase,
+    skipIf,
+    expectedFailure,
+    )
 
 
 class TestCase(_TestCase):
@@ -52,8 +57,9 @@ class BlackboxTestCase(TestCase):
     """Blackbox testing."""
 
     # TODO(jelmer): Include more possible binary paths.
-    bin_directories = [os.path.abspath(os.path.join(os.path.dirname(__file__),
-        "..", "..", "bin")), '/usr/bin', '/usr/local/bin']
+    bin_directories = [os.path.abspath(os.path.join(
+            os.path.dirname(__file__), "..", "..", "bin")), '/usr/bin',
+            '/usr/local/bin']
 
     def bin_path(self, name):
         """Determine the full path of a binary.
@@ -83,10 +89,11 @@ class BlackboxTestCase(TestCase):
         #
         # Save us from all that headache and call python with the bin script.
         argv = [sys.executable, self.bin_path(name)] + args
-        return subprocess.Popen(argv,
-            stdout=subprocess.PIPE,
-            stdin=subprocess.PIPE, stderr=subprocess.PIPE,
-            env=env)
+        return subprocess.Popen(
+                argv,
+                stdout=subprocess.PIPE,
+                stdin=subprocess.PIPE, stderr=subprocess.PIPE,
+                env=env)
 
 
 def self_test_suite():
@@ -101,6 +108,7 @@ def self_test_suite():
         'grafts',
         'greenthreads',
         'hooks',
+        'ignore',
         'index',
         'lru_cache',
         'objects',
@@ -134,15 +142,17 @@ def tutorial_test_suite():
         'conclusion',
         ]
     tutorial_files = ["../../docs/tutorial/%s.txt" % name for name in tutorial]
+
     def setup(test):
         test.__old_cwd = os.getcwd()
         test.__dulwich_tempdir = tempfile.mkdtemp()
         os.chdir(test.__dulwich_tempdir)
+
     def teardown(test):
         os.chdir(test.__old_cwd)
         shutil.rmtree(test.__dulwich_tempdir)
-    return doctest.DocFileSuite(setUp=setup, tearDown=teardown,
-        *tutorial_files)
+    return doctest.DocFileSuite(
+            setUp=setup, tearDown=teardown, *tutorial_files)
 
 
 def nocompat_test_suite():

+ 1 - 0
dulwich/tests/compat/__init__.py

@@ -22,6 +22,7 @@
 
 import unittest
 
+
 def test_suite():
     names = [
         'client',

+ 32 - 22
dulwich/tests/compat/server_utils.py

@@ -169,8 +169,9 @@ class ServerTests(object):
         port = self._start_server(self._source_repo)
 
         # Fetch at depth 1
-        run_git_or_fail(['clone', '--mirror', '--depth=1', '--no-single-branch',
-                        self.url(port), self._stub_repo.path])
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=1', '--no-single-branch',
+             self.url(port), self._stub_repo.path])
         clone = self._stub_repo = Repo(self._stub_repo.path)
         expected_shallow = [b'35e0b59e187dd72a0af294aedffc213eaa4d03ff',
                             b'514dc6d3fbfe77361bcaef320c4d21b72bc10be9']
@@ -186,13 +187,14 @@ class ServerTests(object):
         self.addCleanup(tear_down_repo, self._stub_repo_dw)
 
         # shallow clone using stock git, then using dulwich
-        run_git_or_fail(['clone', '--mirror', '--depth=1', '--no-single-branch',
-                         'file://' + self._source_repo.path,
-                         self._stub_repo_git.path])
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=1', '--no-single-branch',
+             'file://' + self._source_repo.path, self._stub_repo_git.path])
 
         port = self._start_server(self._source_repo)
-        run_git_or_fail(['clone', '--mirror', '--depth=1', '--no-single-branch',
-                        self.url(port), self._stub_repo_dw.path])
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=1', '--no-single-branch',
+             self.url(port), self._stub_repo_dw.path])
 
         # compare the two clones; they should be equal
         self.assertReposEqual(Repo(self._stub_repo_git.path),
@@ -206,8 +208,9 @@ class ServerTests(object):
         port = self._start_server(self._source_repo)
 
         # Fetch at depth 2
-        run_git_or_fail(['clone', '--mirror', '--depth=2', '--no-single-branch',
-                        self.url(port), self._stub_repo.path])
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=2', '--no-single-branch',
+             self.url(port), self._stub_repo.path])
         clone = self._stub_repo = Repo(self._stub_repo.path)
 
         # Fetching at the same depth is a no-op.
@@ -227,8 +230,9 @@ class ServerTests(object):
         port = self._start_server(self._source_repo)
 
         # Fetch at depth 2
-        run_git_or_fail(['clone', '--mirror', '--depth=2', '--no-single-branch',
-                        self.url(port), self._stub_repo.path])
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=2', '--no-single-branch',
+             self.url(port), self._stub_repo.path])
         clone = self._stub_repo = Repo(self._stub_repo.path)
 
         # Fetching at the same depth is a no-op.
@@ -246,11 +250,13 @@ class ServerTests(object):
     def test_fetch_from_dulwich_issue_88_standard(self):
         # Basically an integration test to see that the ACK/NAK
         # generation works on repos with common head.
-        self._source_repo = self.import_repo('issue88_expect_ack_nak_server.export')
-        self._client_repo = self.import_repo('issue88_expect_ack_nak_client.export')
+        self._source_repo = self.import_repo(
+            'issue88_expect_ack_nak_server.export')
+        self._client_repo = self.import_repo(
+            'issue88_expect_ack_nak_client.export')
         port = self._start_server(self._source_repo)
 
-        run_git_or_fail(['fetch', self.url(port), 'master',],
+        run_git_or_fail(['fetch', self.url(port), 'master'],
                         cwd=self._client_repo.path)
         self.assertObjectStoreEqual(
             self._source_repo.object_store,
@@ -258,13 +264,16 @@ class ServerTests(object):
 
     def test_fetch_from_dulwich_issue_88_alternative(self):
         # likewise, but the case where the two repos have no common parent
-        self._source_repo = self.import_repo('issue88_expect_ack_nak_other.export')
-        self._client_repo = self.import_repo('issue88_expect_ack_nak_client.export')
+        self._source_repo = self.import_repo(
+            'issue88_expect_ack_nak_other.export')
+        self._client_repo = self.import_repo(
+            'issue88_expect_ack_nak_client.export')
         port = self._start_server(self._source_repo)
 
-        self.assertRaises(KeyError, self._client_repo.get_object,
+        self.assertRaises(
+            KeyError, self._client_repo.get_object,
             b'02a14da1fc1fc13389bbf32f0af7d8899f2b2323')
-        run_git_or_fail(['fetch', self.url(port), 'master',],
+        run_git_or_fail(['fetch', self.url(port), 'master'],
                         cwd=self._client_repo.path)
         self.assertEqual(b'commit', self._client_repo.get_object(
             b'02a14da1fc1fc13389bbf32f0af7d8899f2b2323').type_name)
@@ -272,11 +281,13 @@ class ServerTests(object):
     def test_push_to_dulwich_issue_88_standard(self):
         # Same thing, but we reverse the role of the server/client
         # and do a push instead.
-        self._source_repo = self.import_repo('issue88_expect_ack_nak_client.export')
-        self._client_repo = self.import_repo('issue88_expect_ack_nak_server.export')
+        self._source_repo = self.import_repo(
+            'issue88_expect_ack_nak_client.export')
+        self._client_repo = self.import_repo(
+            'issue88_expect_ack_nak_server.export')
         port = self._start_server(self._source_repo)
 
-        run_git_or_fail(['push', self.url(port), 'master',],
+        run_git_or_fail(['push', self.url(port), 'master'],
                         cwd=self._client_repo.path)
         self.assertReposEqual(self._source_repo, self._client_repo)
 
@@ -298,4 +309,3 @@ def ignore_error(error):
     (e_type, e_value, e_tb) = error
     return (issubclass(e_type, socket.error) and
             e_value[0] in (errno.ECONNRESET, errno.EPIPE))
-

+ 45 - 35
dulwich/tests/compat/test_client.py

@@ -45,9 +45,6 @@ except ImportError:
     BaseHTTPServer = http.server
     SimpleHTTPServer = http.server
 
-if sys.platform == 'win32':
-    import ctypes
-
 from dulwich import (
     client,
     errors,
@@ -71,11 +68,16 @@ from dulwich.tests.compat.utils import (
     )
 
 
+if sys.platform == 'win32':
+    import ctypes
+
+
 class DulwichClientTestBase(object):
     """Tests for client/server compatibility."""
 
     def setUp(self):
-        self.gitroot = os.path.dirname(import_repo_to_dir('server_new.export').rstrip(os.sep))
+        self.gitroot = os.path.dirname(
+                import_repo_to_dir('server_new.export').rstrip(os.sep))
         self.dest = os.path.join(self.gitroot, 'dest')
         file.ensure_dir_exists(self.dest)
         run_git_or_fail(['init', '--quiet', '--bare'], cwd=self.dest)
@@ -102,7 +104,7 @@ class DulwichClientTestBase(object):
         with repo.Repo(srcpath) as src:
             sendrefs = dict(src.get_refs())
             del sendrefs[b'HEAD']
-            c.send_pack(self._build_path(b'/dest'), lambda _: sendrefs,
+            c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
                         src.object_store.generate_pack_contents)
 
     def test_send_pack(self):
@@ -122,7 +124,7 @@ class DulwichClientTestBase(object):
         with repo.Repo(srcpath) as src:
             sendrefs = dict(src.get_refs())
             del sendrefs[b'HEAD']
-            c.send_pack(self._build_path(b'/dest'), lambda _: sendrefs,
+            c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
                         src.object_store.generate_pack_contents)
             self.assertDestEqualsSrc()
 
@@ -160,7 +162,8 @@ class DulwichClientTestBase(object):
             sendrefs, gen_pack = self.compute_send(src)
             c = self._client()
             try:
-                c.send_pack(self._build_path(b'/dest'), lambda _: sendrefs, gen_pack)
+                c.send_pack(self._build_path('/dest'),
+                            lambda _: sendrefs, gen_pack)
             except errors.UpdateRefsError as e:
                 self.assertEqual('refs/heads/master failed to update',
                                  e.args[0])
@@ -178,13 +181,15 @@ class DulwichClientTestBase(object):
             sendrefs, gen_pack = self.compute_send(src)
             c = self._client()
             try:
-                c.send_pack(self._build_path(b'/dest'), lambda _: sendrefs, gen_pack)
+                c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
+                            gen_pack)
             except errors.UpdateRefsError as e:
-                self.assertIn(str(e),
-                              ['{0}, {1} failed to update'.format(
-                                  branch.decode('ascii'), master.decode('ascii')),
-                               '{1}, {0} failed to update'.format(
-                                   branch.decode('ascii'), master.decode('ascii'))])
+                self.assertIn(
+                        str(e),
+                        ['{0}, {1} failed to update'.format(
+                            branch.decode('ascii'), master.decode('ascii')),
+                         '{1}, {0} failed to update'.format(
+                             branch.decode('ascii'), master.decode('ascii'))])
                 self.assertEqual({branch: b'non-fast-forward',
                                   master: b'non-fast-forward'},
                                  e.ref_status)
@@ -192,7 +197,7 @@ class DulwichClientTestBase(object):
     def test_archive(self):
         c = self._client()
         f = BytesIO()
-        c.archive(self._build_path(b'/server_new.export'), b'HEAD', f.write)
+        c.archive(self._build_path('/server_new.export'), b'HEAD', f.write)
         f.seek(0)
         tf = tarfile.open(fileobj=f)
         self.assertEqual(['baz', 'foo'], tf.getnames())
@@ -200,7 +205,7 @@ class DulwichClientTestBase(object):
     def test_fetch_pack(self):
         c = self._client()
         with repo.Repo(os.path.join(self.gitroot, 'dest')) as dest:
-            refs = c.fetch(self._build_path(b'/server_new.export'), dest)
+            refs = c.fetch(self._build_path('/server_new.export'), dest)
             for r in refs.items():
                 dest.refs.set_if_equals(r[0], None, r[1])
             self.assertDestEqualsSrc()
@@ -212,7 +217,7 @@ class DulwichClientTestBase(object):
         c = self._client()
         repo_dir = os.path.join(self.gitroot, 'server_new.export')
         with repo.Repo(repo_dir) as dest:
-            refs = c.fetch(self._build_path(b'/dest'), dest)
+            refs = c.fetch(self._build_path('/dest'), dest)
             for r in refs.items():
                 dest.refs.set_if_equals(r[0], None, r[1])
             self.assertDestEqualsSrc()
@@ -221,7 +226,7 @@ class DulwichClientTestBase(object):
         c = self._client()
         c._fetch_capabilities.remove(b'side-band-64k')
         with repo.Repo(os.path.join(self.gitroot, 'dest')) as dest:
-            refs = c.fetch(self._build_path(b'/server_new.export'), dest)
+            refs = c.fetch(self._build_path('/server_new.export'), dest)
             for r in refs.items():
                 dest.refs.set_if_equals(r[0], None, r[1])
             self.assertDestEqualsSrc()
@@ -231,7 +236,8 @@ class DulwichClientTestBase(object):
         # be ignored
         c = self._client()
         with repo.Repo(os.path.join(self.gitroot, 'dest')) as dest:
-            refs = c.fetch(self._build_path(b'/server_new.export'), dest,
+            refs = c.fetch(
+                self._build_path('/server_new.export'), dest,
                 lambda refs: [protocol.ZERO_SHA])
             for r in refs.items():
                 dest.refs.set_if_equals(r[0], None, r[1])
@@ -244,15 +250,18 @@ class DulwichClientTestBase(object):
             sendrefs = dict(dest.refs)
             sendrefs[b'refs/heads/abranch'] = b"00" * 20
             del sendrefs[b'HEAD']
-            gen_pack = lambda have, want: []
+
+            def gen_pack(have, want):
+                return []
             c = self._client()
             self.assertEqual(dest.refs[b"refs/heads/abranch"], dummy_commit)
-            c.send_pack(self._build_path(b'/dest'), lambda _: sendrefs, gen_pack)
+            c.send_pack(
+                self._build_path('/dest'), lambda _: sendrefs, gen_pack)
             self.assertFalse(b"refs/heads/abranch" in dest.refs)
 
     def test_get_refs(self):
         c = self._client()
-        refs = c.get_refs(self._build_path(b'/server_new.export'))
+        refs = c.get_refs(self._build_path('/server_new.export'))
 
         repo_dir = os.path.join(self.gitroot, 'server_new.export')
         with repo.Repo(repo_dir) as dest:
@@ -266,7 +275,7 @@ class DulwichTCPClientTest(CompatTestCase, DulwichClientTestBase):
         DulwichClientTestBase.setUp(self)
         if check_for_daemon(limit=1):
             raise SkipTest('git-daemon was already running on port %s' %
-                              protocol.TCP_GIT_PORT)
+                           protocol.TCP_GIT_PORT)
         fd, self.pidfile = tempfile.mkstemp(prefix='dulwich-test-git-client',
                                             suffix=".pid")
         os.fdopen(fd).close()
@@ -319,9 +328,9 @@ class TestSSHVendor(object):
 
     @staticmethod
     def run_command(host, command, username=None, port=None):
-        cmd, path = command.split(b' ')
-        cmd = cmd.split(b'-', 1)
-        path = path.replace(b"'", b"")
+        cmd, path = command.split(' ')
+        cmd = cmd.split('-', 1)
+        path = path.replace("'", "")
         p = subprocess.Popen(cmd + [path], bufsize=0, stdin=subprocess.PIPE,
                              stdout=subprocess.PIPE, stderr=subprocess.PIPE)
         return client.SubprocessWrapper(p)
@@ -344,7 +353,7 @@ class DulwichMockSSHClientTest(CompatTestCase, DulwichClientTestBase):
         return client.SSHGitClient('localhost')
 
     def _build_path(self, path):
-        return self.gitroot.encode(sys.getfilesystemencoding()) + path
+        return self.gitroot + path
 
 
 class DulwichSubprocessClientTest(CompatTestCase, DulwichClientTestBase):
@@ -361,7 +370,7 @@ class DulwichSubprocessClientTest(CompatTestCase, DulwichClientTestBase):
         return client.SubprocessGitClient(stderr=subprocess.PIPE)
 
     def _build_path(self, path):
-        return self.gitroot.encode(sys.getfilesystemencoding()) + path
+        return self.gitroot + path
 
 
 class GitHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
@@ -387,7 +396,8 @@ class GitHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
     def run_backend(self):
         """Call out to git http-backend."""
         # Based on CGIHTTPServer.CGIHTTPRequestHandler.run_cgi:
-        # Copyright (c) 2001-2010 Python Software Foundation; All Rights Reserved
+        # Copyright (c) 2001-2010 Python Software Foundation;
+        # All Rights Reserved
         # Licensed under the Python Software Foundation License.
         rest = self.path
         # find an explicit query string, if present.
@@ -419,7 +429,8 @@ class GitHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
         if authorization:
             authorization = authorization.split()
             if len(authorization) == 2:
-                import base64, binascii
+                import base64
+                import binascii
                 env['AUTH_TYPE'] = authorization[0]
                 if authorization[0].lower() == "basic":
                     try:
@@ -481,7 +492,8 @@ class GitHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
         args = ['http-backend']
         if '=' not in decoded_query:
             args.append(decoded_query)
-        stdout = run_git_or_fail(args, input=data, env=env, stderr=subprocess.PIPE)
+        stdout = run_git_or_fail(
+            args, input=data, env=env, stderr=subprocess.PIPE)
         self.wfile.write(stdout)
 
 
@@ -490,7 +502,8 @@ class HTTPGitServer(BaseHTTPServer.HTTPServer):
     allow_reuse_address = True
 
     def __init__(self, server_address, root_path):
-        BaseHTTPServer.HTTPServer.__init__(self, server_address, GitHTTPRequestHandler)
+        BaseHTTPServer.HTTPServer.__init__(
+            self, server_address, GitHTTPRequestHandler)
         self.root_path = root_path
         self.server_name = "localhost"
 
@@ -523,10 +536,7 @@ class DulwichHttpClientTest(CompatTestCase, DulwichClientTestBase):
         return client.HttpGitClient(self._httpd.get_url())
 
     def _build_path(self, path):
-        if sys.version_info[0] == 3:
-            return path.decode('ascii')
-        else:
-            return path
+        return path
 
     def test_archive(self):
         raise SkipTest("exporting archives not supported over http")

+ 1 - 0
dulwich/tests/compat/test_pack.py

@@ -48,6 +48,7 @@ from dulwich.tests.compat.utils import (
 
 _NON_DELTA_RE = re.compile(b'non delta: (?P<non_delta>\d+) objects')
 
+
 def _git_verify_pack_object_list(output):
     pack_shas = set()
     for line in output.splitlines():

+ 8 - 8
dulwich/tests/compat/test_repository.py

@@ -26,8 +26,6 @@ from itertools import chain
 import os
 import tempfile
 
-import sys
-
 from dulwich.objects import (
     hex_to_sha,
     )
@@ -94,7 +92,8 @@ class ObjectStoreTestCase(CompatTestCase):
     # TODO(dborowitz): peeled ref tests
 
     def _get_loose_shas(self):
-        output = self._run_git(['rev-list', '--all', '--objects', '--unpacked'])
+        output = self._run_git(
+            ['rev-list', '--all', '--objects', '--unpacked'])
         return self._parse_objects(output)
 
     def _get_all_shas(self):
@@ -110,8 +109,8 @@ class ObjectStoreTestCase(CompatTestCase):
         self.assertEqual(expected_shas, actual_shas)
 
     def test_loose_objects(self):
-        # TODO(dborowitz): This is currently not very useful since fast-imported
-        # repos only contained packed objects.
+        # TODO(dborowitz): This is currently not very useful since
+        # fast-imported repos only contained packed objects.
         expected_shas = self._get_loose_shas()
         self.assertShasMatch(expected_shas,
                              self._repo.object_store._iter_loose_objects())
@@ -147,7 +146,8 @@ class WorkingTreeTestCase(ObjectStoreTestCase):
 
     def setUp(self):
         super(WorkingTreeTestCase, self).setUp()
-        self._worktree_path = self.create_new_worktree(self._repo.path, 'branch')
+        self._worktree_path = self.create_new_worktree(
+            self._repo.path, 'branch')
         self._worktree_repo = Repo(self._worktree_path)
         self.addCleanup(self._worktree_repo.close)
         self._mainworktree_repo = self._repo
@@ -184,8 +184,8 @@ class WorkingTreeTestCase(ObjectStoreTestCase):
         self.assertEqual(os.path.normcase(worktrees[0][0]),
                          os.path.normcase(self._mainworktree_repo.path))
 
-        output = run_git_or_fail(['worktree', 'list'],
-            cwd=self._mainworktree_repo.path)
+        output = run_git_or_fail(
+            ['worktree', 'list'], cwd=self._mainworktree_repo.path)
         worktrees = self._parse_worktree_list(output)
         self.assertEqual(len(worktrees), self._number_of_working_tree)
         self.assertEqual(worktrees[0][1], '(bare)')

+ 5 - 3
dulwich/tests/compat/test_server.py

@@ -43,7 +43,9 @@ from dulwich.tests.compat.utils import (
     require_git_version,
     )
 
-@skipIf(sys.platform == 'win32', 'Broken on windows, with very long fail time.')
+
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
 class GitServerTestCase(ServerTests, CompatTestCase):
     """Tests for client/server compatibility.
 
@@ -73,7 +75,8 @@ class GitServerTestCase(ServerTests, CompatTestCase):
         return port
 
 
-@skipIf(sys.platform == 'win32', 'Broken on windows, with very long fail time.')
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
 class GitServerSideBand64kTestCase(GitServerTestCase):
     """Tests for client/server compatibility with side-band-64k support."""
 
@@ -88,7 +91,6 @@ class GitServerSideBand64kTestCase(GitServerTestCase):
         if os.name == 'nt':
             require_git_version((1, 9, 3))
 
-
     def _handlers(self):
         return None  # default handlers include side-band-64k
 

+ 9 - 5
dulwich/tests/compat/test_web.py

@@ -54,7 +54,8 @@ from dulwich.tests.compat.utils import (
     )
 
 
-@skipIf(sys.platform == 'win32', 'Broken on windows, with very long fail time.')
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
 class WebTests(ServerTests):
     """Base tests for web server tests.
 
@@ -78,7 +79,8 @@ class WebTests(ServerTests):
         return port
 
 
-@skipIf(sys.platform == 'win32', 'Broken on windows, with very long fail time.')
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
 class SmartWebTestCase(WebTests, CompatTestCase):
     """Test cases for smart HTTP server.
 
@@ -111,13 +113,15 @@ def patch_capabilities(handler, caps_removed):
     original_capabilities = handler.capabilities
     filtered_capabilities = tuple(
         i for i in original_capabilities() if i not in caps_removed)
+
     def capabilities(cls):
         return filtered_capabilities
     handler.capabilities = classmethod(capabilities)
     return original_capabilities
 
 
-@skipIf(sys.platform == 'win32', 'Broken on windows, with very long fail time.')
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
 class SmartWebSideBand64kTestCase(SmartWebTestCase):
     """Test cases for smart HTTP server with side-band-64k support."""
 
@@ -162,7 +166,8 @@ class SmartWebSideBand64kNoDoneTestCase(SmartWebTestCase):
         self.assertIn(b'no-done', caps)
 
 
-@skipIf(sys.platform == 'win32', 'Broken on windows, with very long fail time.')
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
 class DumbWebTestCase(WebTests, CompatTestCase):
     """Test cases for dumb HTTP server."""
 
@@ -199,4 +204,3 @@ class DumbWebTestCase(WebTests, CompatTestCase):
 
     def test_push_to_dulwich_issue_88_standard(self):
         raise SkipTest('Dumb web pushing not supported.')
-

+ 3 - 1
dulwich/tests/compat/utils.py

@@ -233,10 +233,12 @@ class CompatTestCase(TestCase):
 
         :param name: The name of the repository export file, relative to
             dulwich/tests/data/repos.
-        :returns: An initialized Repo object that lives in a temporary directory.
+        :returns: An initialized Repo object that lives in a temporary
+            directory.
         """
         path = import_repo_to_dir(name)
         repo = Repo(path)
+
         def cleanup():
             repo.close()
             rmtree_ro(os.path.dirname(path.rstrip(os.sep)))

+ 0 - 1
dulwich/tests/test_archive.py

@@ -21,7 +21,6 @@
 """Tests for archive support."""
 
 from io import BytesIO
-import sys
 import tarfile
 
 from dulwich.archive import tar_stream

+ 71 - 41
dulwich/tests/test_client.py

@@ -97,14 +97,15 @@ class GitClientTests(TestCase):
                                   self.rout.write)
 
     def test_caps(self):
-        agent_cap = ('agent=dulwich/%d.%d.%d' % dulwich.__version__).encode('ascii')
+        agent_cap = (
+            'agent=dulwich/%d.%d.%d' % dulwich.__version__).encode('ascii')
         self.assertEqual(set([b'multi_ack', b'side-band-64k', b'ofs-delta',
-                               b'thin-pack', b'multi_ack_detailed',
-                               agent_cap]),
-                          set(self.client._fetch_capabilities))
+                              b'thin-pack', b'multi_ack_detailed',
+                              agent_cap]),
+                         set(self.client._fetch_capabilities))
         self.assertEqual(set([b'ofs-delta', b'report-status', b'side-band-64k',
                               agent_cap]),
-                          set(self.client._send_capabilities))
+                         set(self.client._send_capabilities))
 
     def test_archive_ack(self):
         self.rin.write(
@@ -117,6 +118,7 @@ class GitClientTests(TestCase):
     def test_fetch_empty(self):
         self.rin.write(b'0000')
         self.rin.seek(0)
+
         def check_heads(heads):
             self.assertIs(heads, None)
             return []
@@ -125,11 +127,13 @@ class GitClientTests(TestCase):
 
     def test_fetch_pack_ignores_magic_ref(self):
         self.rin.write(
-            b'00000000000000000000000000000000000000000000 capabilities^{}\x00 multi_ack '
+            b'00000000000000000000000000000000000000000000 capabilities^{}'
+            b'\x00 multi_ack '
             b'thin-pack side-band side-band-64k ofs-delta shallow no-progress '
             b'include-tag\n'
             b'0000')
         self.rin.seek(0)
+
         def check_heads(heads):
             self.assertEquals({}, heads)
             return []
@@ -157,7 +161,7 @@ class GitClientTests(TestCase):
                 b"ng refs/foo/bar pre-receive hook declined",
                 b'']
         for pkt in pkts:
-            if pkt ==  b'':
+            if pkt == b'':
                 self.rin.write(b"0000")
             else:
                 self.rin.write(("%04x" % (len(pkt)+4)).encode('ascii') + pkt)
@@ -193,7 +197,8 @@ class GitClientTests(TestCase):
 
         def determine_wants(refs):
             return {
-                b'refs/heads/master': b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
+                b'refs/heads/master':
+                    b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
             }
 
         def generate_pack_contents(have, want):
@@ -266,7 +271,8 @@ class GitClientTests(TestCase):
             return {
                 b'refs/heads/blah12':
                 b'310ca9477129b8586fa2afc779c1f57cf64bba6c',
-                b'refs/heads/master': b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
+                b'refs/heads/master':
+                    b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
             }
 
         def generate_pack_contents(have, want):
@@ -308,7 +314,8 @@ class GitClientTests(TestCase):
         def determine_wants(refs):
             return {
                 b'refs/heads/blah12': commit.id,
-                b'refs/heads/master': b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
+                b'refs/heads/master':
+                    b'310ca9477129b8586fa2afc779c1f57cf64bba6c'
             }
 
         def generate_pack_contents(have, want):
@@ -320,9 +327,11 @@ class GitClientTests(TestCase):
         self.assertIn(
             self.rout.getvalue(),
             [b'007f0000000000000000000000000000000000000000 ' + commit.id +
-             b' refs/heads/blah12\x00report-status ofs-delta0000' + f.getvalue(),
+             b' refs/heads/blah12\x00report-status ofs-delta0000' +
+             f.getvalue(),
              b'007f0000000000000000000000000000000000000000 ' + commit.id +
-             b' refs/heads/blah12\x00ofs-delta report-status0000' + f.getvalue()])
+             b' refs/heads/blah12\x00ofs-delta report-status0000' +
+             f.getvalue()])
 
     def test_send_pack_no_deleteref_delete_only(self):
         pkts = [b'310ca9477129b8586fa2afc779c1f57cf64bba6c refs/heads/master'
@@ -545,7 +554,8 @@ class TestGetTransportAndPathFromUrl(TestCase):
         self.assertEqual('/bar/baz', path)
 
     def test_ssh_homepath(self):
-        c, path = get_transport_and_path_from_url('git+ssh://foo.com/~/bar/baz')
+        c, path = get_transport_and_path_from_url(
+            'git+ssh://foo.com/~/bar/baz')
         self.assertTrue(isinstance(c, SSHGitClient))
         self.assertEqual('foo.com', c.host)
         self.assertEqual(None, c.port)
@@ -561,21 +571,25 @@ class TestGetTransportAndPathFromUrl(TestCase):
         self.assertEqual('/~/bar/baz', path)
 
     def test_ssh_host_relpath(self):
-        self.assertRaises(ValueError, get_transport_and_path_from_url,
+        self.assertRaises(
+            ValueError, get_transport_and_path_from_url,
             'foo.com:bar/baz')
 
     def test_ssh_user_host_relpath(self):
-        self.assertRaises(ValueError, get_transport_and_path_from_url,
+        self.assertRaises(
+            ValueError, get_transport_and_path_from_url,
             'user@foo.com:bar/baz')
 
     def test_local_path(self):
-        self.assertRaises(ValueError, get_transport_and_path_from_url,
+        self.assertRaises(
+            ValueError, get_transport_and_path_from_url,
             'foo.bar/baz')
 
     def test_error(self):
         # Need to use a known urlparse.uses_netloc URL scheme to get the
         # expected parsing of the URL on Python versions less than 2.6.5
-        self.assertRaises(ValueError, get_transport_and_path_from_url,
+        self.assertRaises(
+            ValueError, get_transport_and_path_from_url,
             'prospero://bar/baz')
 
     def test_http(self):
@@ -599,15 +613,13 @@ class TestSSHVendor(object):
         self.port = None
 
     def run_command(self, host, command, username=None, port=None):
-        if not isinstance(command, bytes):
-            raise TypeError(command)
-
         self.host = host
         self.command = command
         self.username = username
         self.port = port
 
-        class Subprocess: pass
+        class Subprocess:
+            pass
         setattr(Subprocess, 'read', lambda: None)
         setattr(Subprocess, 'write', lambda: None)
         setattr(Subprocess, 'close', lambda: None)
@@ -645,20 +657,22 @@ class SSHGitClientTests(TestCase):
         self.assertEqual('ssh://user@git.samba.org:2222/tmp/repo.git', url)
 
     def test_default_command(self):
-        self.assertEqual(b'git-upload-pack',
-                self.client._get_cmd_path(b'upload-pack'))
+        self.assertEqual(
+            b'git-upload-pack',
+            self.client._get_cmd_path(b'upload-pack'))
 
     def test_alternative_command_path(self):
         self.client.alternative_paths[b'upload-pack'] = (
             b'/usr/lib/git/git-upload-pack')
-        self.assertEqual(b'/usr/lib/git/git-upload-pack',
+        self.assertEqual(
+            b'/usr/lib/git/git-upload-pack',
             self.client._get_cmd_path(b'upload-pack'))
 
     def test_alternative_command_path_spaces(self):
         self.client.alternative_paths[b'upload-pack'] = (
             b'/usr/lib/git/git-upload-pack -ibla')
         self.assertEqual(b"/usr/lib/git/git-upload-pack -ibla",
-            self.client._get_cmd_path(b'upload-pack'))
+                         self.client._get_cmd_path(b'upload-pack'))
 
     def test_connect(self):
         server = self.server
@@ -670,11 +684,11 @@ class SSHGitClientTests(TestCase):
         client._connect(b"command", b"/path/to/repo")
         self.assertEqual(b"username", server.username)
         self.assertEqual(1337, server.port)
-        self.assertEqual(b"git-command '/path/to/repo'", server.command)
+        self.assertEqual("git-command '/path/to/repo'", server.command)
 
         client._connect(b"relative-command", b"/~/path/to/repo")
-        self.assertEqual(b"git-relative-command '~/path/to/repo'",
-                          server.command)
+        self.assertEqual("git-relative-command '~/path/to/repo'",
+                         server.command)
 
 
 class ReportStatusParserTests(TestCase):
@@ -723,16 +737,19 @@ class LocalGitClientTests(TestCase):
         self.addCleanup(tear_down_repo, s)
         out = BytesIO()
         walker = {}
-        ret = c.fetch_pack(s.path, lambda heads: [], graph_walker=walker,
-            pack_data=out.write)
+        ret = c.fetch_pack(
+            s.path, lambda heads: [], graph_walker=walker, pack_data=out.write)
         self.assertEqual({
             b'HEAD': b'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
             b'refs/heads/master': b'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
             b'refs/tags/mytag': b'28237f4dc30d0d462658d6b937b08a0f0b6ef55a',
-            b'refs/tags/mytag-packed': b'b0931cadc54336e78a1d980420e3268903b57a50'
+            b'refs/tags/mytag-packed':
+                b'b0931cadc54336e78a1d980420e3268903b57a50'
             }, ret)
-        self.assertEqual(b"PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08"
-            b"\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e", out.getvalue())
+        self.assertEqual(
+                b"PACK\x00\x00\x00\x02\x00\x00\x00\x00\x02\x9d\x08"
+                b"\x82;\xd8\xa8\xea\xb5\x10\xadj\xc7\\\x82<\xfd>\xd3\x1e",
+                out.getvalue())
 
     def test_fetch_pack_none(self):
         c = LocalGitClient()
@@ -740,11 +757,13 @@ class LocalGitClientTests(TestCase):
         self.addCleanup(tear_down_repo, s)
         out = BytesIO()
         walker = MemoryRepo().get_graph_walker()
-        c.fetch_pack(s.path,
+        c.fetch_pack(
+            s.path,
             lambda heads: [b"a90fa2d900a17e99b433217e988c4eb4a2e9a097"],
             graph_walker=walker, pack_data=out.write)
         # Hardcoding is not ideal, but we'll fix that some other day..
-        self.assertTrue(out.getvalue().startswith(b'PACK\x00\x00\x00\x02\x00\x00\x00\x07'))
+        self.assertTrue(out.getvalue().startswith(
+                b'PACK\x00\x00\x00\x02\x00\x00\x00\x07'))
 
     def test_send_pack_without_changes(self):
         local = open_repo('a.git')
@@ -773,11 +792,11 @@ class LocalGitClientTests(TestCase):
         self.assertDictEqual(local.refs.as_dict(), refs)
 
     def send_and_verify(self, branch, local, target):
-        """Send a branch from local to remote repository and verify it worked."""
+        """Send branch from local to remote repository and verify it worked."""
         client = LocalGitClient()
         ref_name = b"refs/heads/" + branch
         new_refs = client.send_pack(target.path,
-                                    lambda _: { ref_name: local.refs[ref_name] },
+                                    lambda _: {ref_name: local.refs[ref_name]},
                                     local.object_store.generate_pack_contents)
 
         self.assertEqual(local.refs[ref_name], new_refs[ref_name])
@@ -797,6 +816,14 @@ class HttpGitClientTests(TestCase):
         url = c.get_url(path)
         self.assertEqual('https://github.com/jelmer/dulwich', url)
 
+    def test_get_url_bytes_path(self):
+        base_url = 'https://github.com/jelmer/dulwich'
+        path_bytes = b'/jelmer/dulwich'
+        c = HttpGitClient(base_url)
+
+        url = c.get_url(path_bytes)
+        self.assertEqual('https://github.com/jelmer/dulwich', url)
+
     def test_get_url_with_username_and_passwd(self):
         base_url = 'https://github.com/jelmer/dulwich'
         path = '/jelmer/dulwich'
@@ -812,7 +839,8 @@ class HttpGitClientTests(TestCase):
         self.assertEqual('user', c._username)
         self.assertEqual('passwd', c._password)
         [pw_handler] = [
-            h for h in c.opener.handlers if getattr(h, 'passwd', None) is not None]
+            h for h in c.opener.handlers
+            if getattr(h, 'passwd', None) is not None]
         self.assertEqual(
             ('user', 'passwd'),
             pw_handler.passwd.find_user_password(
@@ -825,7 +853,8 @@ class HttpGitClientTests(TestCase):
         self.assertIs(None, c._username)
         self.assertIs(None, c._password)
         pw_handler = [
-            h for h in c.opener.handlers if getattr(h, 'passwd', None) is not None]
+            h for h in c.opener.handlers
+            if getattr(h, 'passwd', None) is not None]
         self.assertEqual(0, len(pw_handler))
 
     def test_from_parsedurl_on_url_with_quoted_credentials(self):
@@ -844,7 +873,8 @@ class HttpGitClientTests(TestCase):
         self.assertEqual(original_username, c._username)
         self.assertEqual(original_password, c._password)
         [pw_handler] = [
-            h for h in c.opener.handlers if getattr(h, 'passwd', None) is not None]
+            h for h in c.opener.handlers
+            if getattr(h, 'passwd', None) is not None]
         self.assertEqual(
             (original_username, original_password),
             pw_handler.passwd.find_user_password(
@@ -865,7 +895,7 @@ class TCPGitClientTests(TestCase):
         host = 'github.com'
         path = '/jelmer/dulwich'
         port = 9090
-        c = TCPGitClient(host, port=9090)
+        c = TCPGitClient(host, port=port)
 
         url = c.get_url(path)
         self.assertEqual('git://github.com:9090/jelmer/dulwich', url)

+ 19 - 20
dulwich/tests/test_config.py

@@ -21,7 +21,6 @@
 """Tests for reading and writing configuration files."""
 
 from io import BytesIO
-import os
 from dulwich.config import (
     ConfigDict,
     ConfigFile,
@@ -51,10 +50,10 @@ class ConfigFileTests(TestCase):
 
     def test_default_config(self):
         cf = self.from_file(b"""[core]
-	repositoryformatversion = 0
-	filemode = true
-	bare = false
-	logallrefupdates = true
+\trepositoryformatversion = 0
+\tfilemode = true
+\tbare = false
+\tlogallrefupdates = true
 """)
         self.assertEqual(ConfigFile({(b"core", ): {
             b"repositoryformatversion": b"0",
@@ -97,8 +96,7 @@ class ConfigFileTests(TestCase):
         self.assertEqual(b"barla", cf.get((b"core", ), b"foo"))
 
     def test_from_file_with_open_quoted(self):
-        self.assertRaises(ValueError,
-            self.from_file, b"[core]\nfoo = \"bar\n")
+        self.assertRaises(ValueError, self.from_file, b"[core]\nfoo = \"bar\n")
 
     def test_from_file_with_quotes(self):
         cf = self.from_file(
@@ -124,8 +122,8 @@ class ConfigFileTests(TestCase):
         self.assertEqual(b"bar", cf.get((b"branch", b"foo"), b"foo"))
 
     def test_from_file_subsection_invalid(self):
-        self.assertRaises(ValueError,
-            self.from_file, b"[branch \"foo]\nfoo = bar\n")
+        self.assertRaises(
+                ValueError, self.from_file, b"[branch \"foo]\nfoo = bar\n")
 
     def test_from_file_subsection_not_quoted(self):
         cf = self.from_file(b"[branch.foo]\nfoo = bar\n")
@@ -157,21 +155,23 @@ class ConfigFileTests(TestCase):
 
     def test_quoted(self):
         cf = self.from_file(b"""[gui]
-	fontdiff = -family \\\"Ubuntu Mono\\\" -size 11 -weight normal -slant roman -underline 0 -overstrike 0
+\tfontdiff = -family \\\"Ubuntu Mono\\\" -size 11 -overstrike 0
 """)
         self.assertEqual(ConfigFile({(b'gui', ): {
-            b'fontdiff': b'-family "Ubuntu Mono" -size 11 -weight normal -slant roman -underline 0 -overstrike 0',
+            b'fontdiff': b'-family "Ubuntu Mono" -size 11 -overstrike 0',
         }}), cf)
 
     def test_quoted_multiline(self):
         cf = self.from_file(b"""[alias]
 who = \"!who() {\\
-  git log --no-merges --pretty=format:'%an - %ae' $@ | sort | uniq -c | sort -rn;\\
+  git log --no-merges --pretty=format:'%an - %ae' $@ | uniq -c | sort -rn;\\
 };\\
 who\"
 """)
         self.assertEqual(ConfigFile({(b'alias', ): {
-            b'who': b"!who() {git log --no-merges --pretty=format:'%an - %ae' $@ | sort | uniq -c | sort -rn;};who"}}), cf)
+            b'who': (b"!who() {git log --no-merges --pretty=format:'%an - "
+                     b"%ae' $@ | uniq -c | sort -rn;};who")
+            }}), cf)
 
     def test_set_hash_gets_quoted(self):
         c = ConfigFile()
@@ -224,15 +224,13 @@ class ConfigDictTests(TestCase):
         cd = ConfigDict()
         cd.set((b"core2", ), b"foo", b"bloe")
 
-        self.assertEqual([],
-            list(cd.iteritems((b"core", ))))
+        self.assertEqual([], list(cd.iteritems((b"core", ))))
 
     def test_itersections(self):
         cd = ConfigDict()
         cd.set((b"core2", ), b"foo", b"bloe")
 
-        self.assertEqual([(b"core2", )],
-            list(cd.itersections()))
+        self.assertEqual([(b"core2", )], list(cd.itersections()))
 
 
 class StackedConfigTests(TestCase):
@@ -318,9 +316,10 @@ class SubmodulesTests(TestCase):
     def testSubmodules(self):
         cf = ConfigFile.from_file(BytesIO(b"""\
 [submodule "core/lib"]
-	path = core/lib
-	url = https://github.com/phhusson/QuasselC.git
+\tpath = core/lib
+\turl = https://github.com/phhusson/QuasselC.git
 """))
         got = list(parse_submodules(cf))
         self.assertEqual([
-            (b'core/lib', b'https://github.com/phhusson/QuasselC.git', b'core/lib')], got)
+            (b'core/lib', b'https://github.com/phhusson/QuasselC.git',
+             b'core/lib')], got)

+ 86 - 46
dulwich/tests/test_diff_tree.py

@@ -171,7 +171,8 @@ class TreeChangesTest(DiffTestCase):
         self.assertChangesEqual([], self.empty_tree, self.empty_tree)
         self.assertChangesEqual([], tree, tree)
         self.assertChangesEqual(
-            [TreeChange(CHANGE_UNCHANGED, (b'a', F, blob.id), (b'a', F, blob.id)),
+            [TreeChange(CHANGE_UNCHANGED, (b'a', F, blob.id),
+                        (b'a', F, blob.id)),
              TreeChange(CHANGE_UNCHANGED, (b'b/c', F, blob.id),
                         (b'b/c', F, blob.id))],
             tree, tree, want_unchanged=True)
@@ -269,7 +270,8 @@ class TreeChangesTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob), (b'a.', blob), (b'a..', blob)])
         # Tree order is the reverse of this, so if we used tree order, 'a..'
         # would not be merged.
-        tree2 = self.commit_tree([(b'a/x', blob), (b'a./x', blob), (b'a..', blob)])
+        tree2 = self.commit_tree(
+                [(b'a/x', blob), (b'a./x', blob), (b'a..', blob)])
 
         self.assertChangesEqual(
             [TreeChange.delete((b'a', F, blob.id)),
@@ -352,7 +354,8 @@ class TreeChangesTest(DiffTestCase):
         merge = self.commit_tree([(b'a', blob2)])
         self.assertChangesForMergeEqual(
             [[TreeChange.add((b'a', F, blob2.id)),
-              TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob2.id))]],
+              TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
+                         (b'a', F, blob2.id))]],
             [parent1, parent2], merge)
 
     def test_tree_changes_for_merge_modify_modify_conflict(self):
@@ -363,8 +366,10 @@ class TreeChangesTest(DiffTestCase):
         parent2 = self.commit_tree([(b'a', blob2)])
         merge = self.commit_tree([(b'a', blob3)])
         self.assertChangesForMergeEqual(
-            [[TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob3.id)),
-              TreeChange(CHANGE_MODIFY, (b'a', F, blob2.id), (b'a', F, blob3.id))]],
+            [[TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
+                         (b'a', F, blob3.id)),
+              TreeChange(CHANGE_MODIFY, (b'a', F, blob2.id),
+                         (b'a', F, blob3.id))]],
             [parent1, parent2], merge)
 
     def test_tree_changes_for_merge_modify_no_conflict(self):
@@ -434,7 +439,8 @@ class TreeChangesTest(DiffTestCase):
         parent2 = self.commit_tree([])
         merge = self.commit_tree([(b'b', blob)])
         add = TreeChange.add((b'b', F, blob.id))
-        self.assertChangesForMergeEqual([[add, add]], [parent1, parent2], merge)
+        self.assertChangesForMergeEqual(
+                [[add, add]], [parent1, parent2], merge)
 
     def test_tree_changes_for_merge_add_exact_rename_conflict(self):
         blob = make_object(Blob, data=b'a\nb\nc\nd\n')
@@ -442,7 +448,8 @@ class TreeChangesTest(DiffTestCase):
         parent2 = self.commit_tree([])
         merge = self.commit_tree([(b'b', blob)])
         self.assertChangesForMergeEqual(
-            [[TreeChange(CHANGE_RENAME, (b'a', F, blob.id), (b'b', F, blob.id)),
+            [[TreeChange(CHANGE_RENAME, (b'a', F, blob.id),
+                         (b'b', F, blob.id)),
               TreeChange.add((b'b', F, blob.id))]],
             [parent1, parent2], merge, rename_detector=self.detector)
 
@@ -453,7 +460,8 @@ class TreeChangesTest(DiffTestCase):
         parent2 = self.commit_tree([])
         merge = self.commit_tree([(b'b', blob2)])
         self.assertChangesForMergeEqual(
-            [[TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob2.id)),
+            [[TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                         (b'b', F, blob2.id)),
               TreeChange.add((b'b', F, blob2.id))]],
             [parent1, parent2], merge, rename_detector=self.detector)
 
@@ -464,8 +472,10 @@ class TreeChangesTest(DiffTestCase):
         parent2 = self.commit_tree([(b'b', blob1)])
         merge = self.commit_tree([(b'b', blob2)])
         self.assertChangesForMergeEqual(
-            [[TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob2.id)),
-              TreeChange(CHANGE_MODIFY, (b'b', F, blob1.id), (b'b', F, blob2.id))]],
+            [[TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                         (b'b', F, blob2.id)),
+              TreeChange(CHANGE_MODIFY, (b'b', F, blob1.id),
+                         (b'b', F, blob2.id))]],
             [parent1, parent2], merge, rename_detector=self.detector)
 
 
@@ -473,7 +483,8 @@ class RenameDetectionTest(DiffTestCase):
 
     def _do_test_count_blocks(self, count_blocks):
         blob = make_object(Blob, data=b'a\nb\na\n')
-        self.assertEqual({hash(b'a\n'): 4, hash(b'b\n'): 2}, count_blocks(blob))
+        self.assertEqual({hash(b'a\n'): 4, hash(b'b\n'): 2},
+                         count_blocks(blob))
 
     test_count_blocks = functest_builder(_do_test_count_blocks,
                                          _count_blocks_py)
@@ -491,7 +502,8 @@ class RenameDetectionTest(DiffTestCase):
 
     def _do_test_count_blocks_chunks(self, count_blocks):
         blob = ShaFile.from_raw_chunks(Blob.type_num, [b'a\nb', b'\na\n'])
-        self.assertEqual({hash(b'a\n'): 4, hash(b'b\n'): 2}, _count_blocks(blob))
+        self.assertEqual({hash(b'a\n'): 4, hash(b'b\n'): 2},
+                         _count_blocks(blob))
 
     test_count_blocks_chunks = functest_builder(_do_test_count_blocks_chunks,
                                                 _count_blocks_py)
@@ -502,8 +514,8 @@ class RenameDetectionTest(DiffTestCase):
         a = b'a' * 64
         data = a + b'xxx\ny\n' + a + b'zzz\n'
         blob = make_object(Blob, data=data)
-        self.assertEqual({hash(b'a' * 64): 128, hash(b'xxx\n'): 4, hash(b'y\n'): 2,
-                          hash(b'zzz\n'): 4},
+        self.assertEqual({hash(b'a' * 64): 128, hash(b'xxx\n'): 4,
+                          hash(b'y\n'): 2, hash(b'zzz\n'): 4},
                          _count_blocks(blob))
 
     test_count_blocks_long_lines = functest_builder(
@@ -577,7 +589,8 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
         tree2 = self.commit_tree([(b'a', blob1), (b'b', blob3)])
         self.assertEqual(
-            [TreeChange(CHANGE_MODIFY, (b'b', F, blob2.id), (b'b', F, blob3.id))],
+            [TreeChange(CHANGE_MODIFY, (b'b', F, blob2.id),
+                        (b'b', F, blob3.id))],
             self.detect_renames(tree1, tree2))
 
     def test_exact_rename_one_to_one(self):
@@ -586,9 +599,11 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
         tree2 = self.commit_tree([(b'c', blob1), (b'd', blob2)])
         self.assertEqual(
-            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'c', F, blob1.id)),
-             TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'd', F, blob2.id))],
-            self.detect_renames(tree1, tree2))
+                [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                            (b'c', F, blob1.id)),
+                 TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
+                            (b'd', F, blob2.id))],
+                self.detect_renames(tree1, tree2))
 
     def test_exact_rename_split_different_type(self):
         blob = make_object(Blob, data=b'/foo')
@@ -605,9 +620,10 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob1)])
         tree2 = self.commit_tree([(b'a', blob2, 0o120000), (b'b', blob1)])
         self.assertEqual(
-            [TreeChange.add((b'a', 0o120000, blob2.id)),
-             TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob1.id))],
-            self.detect_renames(tree1, tree2))
+                [TreeChange.add((b'a', 0o120000, blob2.id)),
+                 TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                            (b'b', F, blob1.id))],
+                self.detect_renames(tree1, tree2))
 
     def test_exact_rename_one_to_many(self):
         blob = make_object(Blob, data=b'1')
@@ -632,10 +648,13 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob), (b'b', blob)])
         tree2 = self.commit_tree([(b'c', blob), (b'd', blob), (b'e', blob)])
         self.assertEqual(
-            [TreeChange(CHANGE_RENAME, (b'a', F, blob.id), (b'c', F, blob.id)),
-             TreeChange(CHANGE_COPY, (b'a', F, blob.id), (b'e', F, blob.id)),
-             TreeChange(CHANGE_RENAME, (b'b', F, blob.id), (b'd', F, blob.id))],
-            self.detect_renames(tree1, tree2))
+                [TreeChange(CHANGE_RENAME, (b'a', F, blob.id),
+                            (b'c', F, blob.id)),
+                 TreeChange(CHANGE_COPY, (b'a', F, blob.id),
+                            (b'e', F, blob.id)),
+                 TreeChange(CHANGE_RENAME, (b'b', F, blob.id),
+                            (b'd', F, blob.id))],
+                self.detect_renames(tree1, tree2))
 
     def test_exact_copy_modify(self):
         blob1 = make_object(Blob, data=b'a\nb\nc\nd\n')
@@ -643,8 +662,10 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob1)])
         tree2 = self.commit_tree([(b'a', blob2), (b'b', blob1)])
         self.assertEqual(
-            [TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob2.id)),
-             TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob1.id))],
+            [TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
+                        (b'a', F, blob2.id)),
+             TreeChange(CHANGE_COPY, (b'a', F, blob1.id),
+                        (b'b', F, blob1.id))],
             self.detect_renames(tree1, tree2))
 
     def test_exact_copy_change_mode(self):
@@ -663,7 +684,8 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob1)])
         tree2 = self.commit_tree([(b'b', blob2)])
         self.assertEqual(
-            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob2.id))],
+            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                        (b'b', F, blob2.id))],
             self.detect_renames(tree1, tree2, rename_threshold=50))
         self.assertEqual(
             [TreeChange.delete((b'a', F, blob1.id)),
@@ -678,8 +700,10 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
         tree2 = self.commit_tree([(b'c', blob3), (b'd', blob4)])
         self.assertEqual(
-            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'd', F, blob4.id)),
-             TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'c', F, blob3.id))],
+            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                        (b'd', F, blob4.id)),
+             TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
+                        (b'c', F, blob3.id))],
             self.detect_renames(tree1, tree2))
         self.assertEqual(
             [TreeChange.delete((b'a', F, blob1.id)),
@@ -709,13 +733,15 @@ class RenameDetectionTest(DiffTestCase):
         tree2 = self.commit_tree([(b'c', blob3)])
         self.assertEqual(
             [TreeChange.delete((b'a', F, blob1.id)),
-             TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'c', F, blob3.id))],
+             TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
+                        (b'c', F, blob3.id))],
             self.detect_renames(tree1, tree2))
 
         tree3 = self.commit_tree([(b'a', blob2), (b'b', blob1)])
         tree4 = self.commit_tree([(b'c', blob3)])
         self.assertEqual(
-            [TreeChange(CHANGE_RENAME, (b'a', F, blob2.id), (b'c', F, blob3.id)),
+            [TreeChange(CHANGE_RENAME, (b'a', F, blob2.id),
+                        (b'c', F, blob3.id)),
              TreeChange.delete((b'b', F, blob1.id))],
             self.detect_renames(tree3, tree4))
 
@@ -727,7 +753,8 @@ class RenameDetectionTest(DiffTestCase):
         tree2 = self.commit_tree([(b'b', blob2), (b'c', blob3)])
         self.assertEqual(
             [TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob2.id)),
-             TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'c', F, blob3.id))],
+             TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                        (b'c', F, blob3.id))],
             self.detect_renames(tree1, tree2))
 
     def test_content_rename_many_to_one(self):
@@ -737,7 +764,8 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
         tree2 = self.commit_tree([(b'c', blob3)])
         self.assertEqual(
-            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'c', F, blob3.id)),
+            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                        (b'c', F, blob3.id)),
              TreeChange.delete((b'b', F, blob2.id))],
             self.detect_renames(tree1, tree2))
 
@@ -751,7 +779,8 @@ class RenameDetectionTest(DiffTestCase):
         # TODO(dborowitz): Distribute renames rather than greedily choosing
         # copies.
         self.assertEqual(
-            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'c', F, blob3.id)),
+            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                        (b'c', F, blob3.id)),
              TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'd', F, blob4.id)),
              TreeChange.delete((b'b', F, blob2.id))],
             self.detect_renames(tree1, tree2))
@@ -789,12 +818,16 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
         tree2 = self.commit_tree([(b'a', blob2), (b'b', blob1)])
         self.assertEqual(
-            [TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob2.id)),
-             TreeChange(CHANGE_MODIFY, (b'b', F, blob2.id), (b'b', F, blob1.id))],
+            [TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
+                        (b'a', F, blob2.id)),
+             TreeChange(CHANGE_MODIFY, (b'b', F, blob2.id),
+                        (b'b', F, blob1.id))],
             self.detect_renames(tree1, tree2))
         self.assertEqual(
-            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob1.id)),
-             TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'a', F, blob2.id))],
+            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                        (b'b', F, blob1.id)),
+             TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
+                        (b'a', F, blob2.id))],
             self.detect_renames(tree1, tree2, rewrite_threshold=50))
 
     def test_content_rename_swap(self):
@@ -805,8 +838,10 @@ class RenameDetectionTest(DiffTestCase):
         tree1 = self.commit_tree([(b'a', blob1), (b'b', blob2)])
         tree2 = self.commit_tree([(b'a', blob4), (b'b', blob3)])
         self.assertEqual(
-            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob3.id)),
-             TreeChange(CHANGE_RENAME, (b'b', F, blob2.id), (b'a', F, blob4.id))],
+            [TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                        (b'b', F, blob3.id)),
+             TreeChange(CHANGE_RENAME, (b'b', F, blob2.id),
+                        (b'a', F, blob4.id))],
             self.detect_renames(tree1, tree2, rewrite_threshold=60))
 
     def test_rewrite_threshold(self):
@@ -818,15 +853,18 @@ class RenameDetectionTest(DiffTestCase):
         tree2 = self.commit_tree([(b'a', blob3), (b'b', blob2)])
 
         no_renames = [
-            TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id), (b'a', F, blob3.id)),
+            TreeChange(CHANGE_MODIFY, (b'a', F, blob1.id),
+                       (b'a', F, blob3.id)),
             TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob2.id))]
         self.assertEqual(
             no_renames, self.detect_renames(tree1, tree2))
         self.assertEqual(
-            no_renames, self.detect_renames(tree1, tree2, rewrite_threshold=40))
+            no_renames, self.detect_renames(
+                tree1, tree2, rewrite_threshold=40))
         self.assertEqual(
             [TreeChange.add((b'a', F, blob3.id)),
-             TreeChange(CHANGE_RENAME, (b'a', F, blob1.id), (b'b', F, blob2.id))],
+             TreeChange(CHANGE_RENAME, (b'a', F, blob1.id),
+                        (b'b', F, blob2.id))],
             self.detect_renames(tree1, tree2, rewrite_threshold=80))
 
     def test_find_copies_harder_exact(self):
@@ -847,7 +885,8 @@ class RenameDetectionTest(DiffTestCase):
         self.assertEqual([TreeChange.add((b'b', F, blob2.id))],
                          self.detect_renames(tree1, tree2))
         self.assertEqual(
-            [TreeChange(CHANGE_COPY, (b'a', F, blob1.id), (b'b', F, blob2.id))],
+            [TreeChange(CHANGE_COPY, (b'a', F, blob1.id),
+                        (b'b', F, blob2.id))],
             self.detect_renames(tree1, tree2, find_copies_harder=True))
 
     def test_find_copies_harder_with_rewrites(self):
@@ -859,7 +898,8 @@ class RenameDetectionTest(DiffTestCase):
         self.assertEqual(
             [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
                         (b'a', F, blob_a2.id)),
-             TreeChange(CHANGE_COPY, (b'a', F, blob_a1.id), (b'b', F, blob_b2.id))],
+             TreeChange(CHANGE_COPY, (b'a', F, blob_a1.id),
+                        (b'b', F, blob_b2.id))],
             self.detect_renames(tree1, tree2, find_copies_harder=True))
         self.assertEqual(
             [TreeChange.add((b'a', F, blob_a2.id)),

+ 52 - 24
dulwich/tests/test_fastexport.py

@@ -29,6 +29,7 @@ from dulwich.objects import (
     Blob,
     Commit,
     Tree,
+    ZERO_SHA,
     )
 from dulwich.repo import (
     MemoryRepo,
@@ -60,7 +61,7 @@ class GitFastExporterTests(TestCase):
         b.data = b"fooBAR"
         self.fastexporter.emit_blob(b)
         self.assertEqual(b'blob\nmark :1\ndata 6\nfooBAR\n',
-            self.stream.getvalue())
+                         self.stream.getvalue())
 
     def test_emit_commit(self):
         b = Blob()
@@ -107,13 +108,30 @@ class GitImportProcessorTests(TestCase):
         cmd = commands.ResetCommand(b"refs/heads/foo", c1.id)
         self.processor.reset_handler(cmd)
         self.assertEqual(c1.id, self.repo.get_refs()[b"refs/heads/foo"])
+        self.assertEqual(c1.id, self.processor.last_commit)
+
+    def test_reset_handler_marker(self):
+        from fastimport import commands
+        [c1, c2] = build_commit_graph(self.repo.object_store, [[1], [2]])
+        self.processor.markers[b'10'] = c1.id
+        cmd = commands.ResetCommand(b"refs/heads/foo", b':10')
+        self.processor.reset_handler(cmd)
+        self.assertEqual(c1.id, self.repo.get_refs()[b"refs/heads/foo"])
+
+    def test_reset_handler_default(self):
+        from fastimport import commands
+        [c1, c2] = build_commit_graph(self.repo.object_store, [[1], [2]])
+        cmd = commands.ResetCommand(b"refs/heads/foo", None)
+        self.processor.reset_handler(cmd)
+        self.assertEqual(ZERO_SHA, self.repo.get_refs()[b"refs/heads/foo"])
 
     def test_commit_handler(self):
         from fastimport import commands
-        cmd = commands.CommitCommand(b"refs/heads/foo",  b"mrkr",
-            (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
-            (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
-            b"FOO", None, [], [])
+        cmd = commands.CommitCommand(
+                b"refs/heads/foo",  b"mrkr",
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                b"FOO", None, [], [])
         self.processor.commit_handler(cmd)
         commit = self.repo[self.processor.last_commit]
         self.assertEqual(b"Jelmer <jelmer@samba.org>", commit.author)
@@ -148,10 +166,12 @@ M 100644 :1 a
         from fastimport import commands
         cmd = commands.BlobCommand(b"23", b"data")
         self.processor.blob_handler(cmd)
-        cmd = commands.CommitCommand(b"refs/heads/foo", b"mrkr",
-            (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
-            (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
-            b"FOO", None, [], [commands.FileModifyCommand(b"path", 0o100644, b":23", None)])
+        cmd = commands.CommitCommand(
+                b"refs/heads/foo", b"mrkr",
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                b"FOO", None, [],
+                [commands.FileModifyCommand(b"path", 0o100644, b":23", None)])
         self.processor.commit_handler(cmd)
         commit = self.repo[self.processor.last_commit]
         self.assertEqual([
@@ -162,10 +182,12 @@ M 100644 :1 a
         from fastimport import commands
         cmd = commands.BlobCommand(b"23", b"data")
         self.processor.blob_handler(cmd)
-        cmd = commands.CommitCommand(b"refs/heads/foo", b"mrkr",
-            (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
-            (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
-            b"FOO", None, [], [commands.FileModifyCommand(b"path", 0o100644, b":23", None)])
+        cmd = commands.CommitCommand(
+                b"refs/heads/foo", b"mrkr",
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                b"FOO", None, [],
+                [commands.FileModifyCommand(b"path", 0o100644, b":23", None)])
         self.processor.commit_handler(cmd)
         commit = self.repo[self.processor.last_commit]
         return commit
@@ -177,29 +199,35 @@ M 100644 :1 a
         :return: The created commit object
         """
         from fastimport import commands
-        cmd = commands.CommitCommand(b"refs/heads/foo", b"mrkr",
-            (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
-            (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
-            b"FOO", None, [], file_cmds)
+        cmd = commands.CommitCommand(
+                b"refs/heads/foo", b"mrkr",
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                (b"Jelmer", b"jelmer@samba.org", 432432432.0, 3600),
+                b"FOO", None, [], file_cmds)
         self.processor.commit_handler(cmd)
         return self.repo[self.processor.last_commit]
 
     def test_file_copy(self):
         from fastimport import commands
         self.simple_commit()
-        commit = self.make_file_commit([commands.FileCopyCommand(b"path", b"new_path")])
+        commit = self.make_file_commit(
+                [commands.FileCopyCommand(b"path", b"new_path")])
         self.assertEqual([
-            (b'new_path', 0o100644, b'6320cd248dd8aeaab759d5871f8781b5c0505172'),
-            (b'path', 0o100644, b'6320cd248dd8aeaab759d5871f8781b5c0505172'),
-            ], self.repo[commit.tree].items())
+                (b'new_path', 0o100644,
+                 b'6320cd248dd8aeaab759d5871f8781b5c0505172'),
+                (b'path', 0o100644,
+                 b'6320cd248dd8aeaab759d5871f8781b5c0505172'),
+                ], self.repo[commit.tree].items())
 
     def test_file_move(self):
         from fastimport import commands
         self.simple_commit()
-        commit = self.make_file_commit([commands.FileRenameCommand(b"path", b"new_path")])
+        commit = self.make_file_commit(
+                [commands.FileRenameCommand(b"path", b"new_path")])
         self.assertEqual([
-            (b'new_path', 0o100644, b'6320cd248dd8aeaab759d5871f8781b5c0505172'),
-            ], self.repo[commit.tree].items())
+                (b'new_path', 0o100644,
+                 b'6320cd248dd8aeaab759d5871f8781b5c0505172'),
+                ], self.repo[commit.tree].items())
 
     def test_file_delete(self):
         from fastimport import commands

+ 10 - 13
dulwich/tests/test_greenthreads.py

@@ -38,7 +38,7 @@ from dulwich.objects import (
     )
 
 try:
-    import gevent
+    import gevent  # noqa: F401
     gevent_support = True
 except ImportError:
     gevent_support = False
@@ -51,6 +51,7 @@ if gevent_support:
 
 skipmsg = "Gevent library is not installed"
 
+
 def create_commit(marker=None):
     blob = Blob.from_string(b'The blob content ' + marker)
     tree = Tree()
@@ -87,24 +88,21 @@ class TestGreenThreadsObjectStoreIterator(TestCase):
     def test_len(self):
         wants = [sha.id for sha in self.objs if isinstance(sha, Commit)]
         finder = MissingObjectFinder(self.store, (), wants)
-        iterator = GreenThreadsObjectStoreIterator(self.store,
-                                               iter(finder.next, None),
-                                               finder)
+        iterator = GreenThreadsObjectStoreIterator(
+                self.store, iter(finder.next, None), finder)
         # One commit refers one tree and one blob
         self.assertEqual(len(iterator), self.cmt_amount * 3)
         haves = wants[0:self.cmt_amount-1]
         finder = MissingObjectFinder(self.store, haves, wants)
-        iterator = GreenThreadsObjectStoreIterator(self.store,
-                                               iter(finder.next, None),
-                                               finder)
+        iterator = GreenThreadsObjectStoreIterator(
+            self.store, iter(finder.next, None), finder)
         self.assertEqual(len(iterator), 3)
 
     def test_iter(self):
         wants = [sha.id for sha in self.objs if isinstance(sha, Commit)]
         finder = MissingObjectFinder(self.store, (), wants)
-        iterator = GreenThreadsObjectStoreIterator(self.store,
-                                               iter(finder.next, None),
-                                               finder)
+        iterator = GreenThreadsObjectStoreIterator(
+            self.store, iter(finder.next, None), finder)
         objs = []
         for sha, path in iterator:
             self.assertIn(sha, self.objs)
@@ -127,9 +125,8 @@ class TestGreenThreadsMissingObjectFinder(TestCase):
         self.assertEqual(len(finder.sha_done), 0)
         self.assertEqual(len(finder.objects_to_send), self.cmt_amount)
 
-        finder = GreenThreadsMissingObjectFinder(self.store,
-                                             wants[0:int(self.cmt_amount/2)],
-                                             wants)
+        finder = GreenThreadsMissingObjectFinder(
+            self.store, wants[0:int(self.cmt_amount/2)], wants)
         # sha_done will contains commit id and sha of blob refered in tree
         self.assertEqual(len(finder.sha_done), (self.cmt_amount/2)*2)
         self.assertEqual(len(finder.objects_to_send), self.cmt_amount/2)

+ 260 - 0
dulwich/tests/test_ignore.py

@@ -0,0 +1,260 @@
+# test_ignore.py -- Tests for ignore files.
+# Copyright (C) 2017 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for ignore files."""
+
+from io import BytesIO
+import os
+import re
+import shutil
+import tempfile
+from dulwich.tests import TestCase
+
+from dulwich.ignore import (
+    IgnoreFilter,
+    IgnoreFilterManager,
+    IgnoreFilterStack,
+    Pattern,
+    match_pattern,
+    read_ignore_patterns,
+    translate,
+    )
+from dulwich.repo import Repo
+
+
+POSITIVE_MATCH_TESTS = [
+    (b"foo.c", b"*.c"),
+    (b".c", b"*.c"),
+    (b"foo/foo.c", b"*.c"),
+    (b"foo/foo.c", b"foo.c"),
+    (b"foo.c", b"/*.c"),
+    (b"foo.c", b"/foo.c"),
+    (b"foo.c", b"foo.c"),
+    (b"foo.c", b"foo.[ch]"),
+    (b"foo/bar/bla.c", b"foo/**"),
+    (b"foo/bar/bla/blie.c", b"foo/**/blie.c"),
+    (b"foo/bar/bla.c", b"**/bla.c"),
+    (b"bla.c", b"**/bla.c"),
+    (b"foo/bar", b"foo/**/bar"),
+    (b"foo/bla/bar", b"foo/**/bar"),
+    (b"foo/bar/", b"bar/"),
+    (b"foo/bar/", b"bar"),
+    (b"foo/bar/something", b"foo/bar/*"),
+]
+
+NEGATIVE_MATCH_TESTS = [
+    (b"foo.c", b"foo.[dh]"),
+    (b"foo/foo.c", b"/foo.c"),
+    (b"foo/foo.c", b"/*.c"),
+    (b"foo/bar/", b"/bar/"),
+    (b"foo/bar/", b"foo/bar/*"),
+]
+
+
+TRANSLATE_TESTS = [
+    (b"*.c", b'(?ms)(.*/)?[^/]*\\.c/?\\Z'),
+    (b"foo.c", b'(?ms)(.*/)?foo\\.c/?\\Z'),
+    (b"/*.c", b'(?ms)[^/]*\\.c/?\\Z'),
+    (b"/foo.c", b'(?ms)foo\\.c/?\\Z'),
+    (b"foo.c", b'(?ms)(.*/)?foo\\.c/?\\Z'),
+    (b"foo.[ch]", b'(?ms)(.*/)?foo\\.[ch]/?\\Z'),
+    (b"bar/", b'(?ms)(.*/)?bar\\/\\Z'),
+    (b"foo/**", b'(?ms)foo(/.*)?/?\\Z'),
+    (b"foo/**/blie.c", b'(?ms)foo(/.*)?\\/blie\\.c/?\\Z'),
+    (b"**/bla.c", b'(?ms)(.*/)?bla\\.c/?\\Z'),
+    (b"foo/**/bar", b'(?ms)foo(/.*)?\\/bar/?\\Z'),
+    (b"foo/bar/*", b'(?ms)foo\\/bar\\/[^/]+/?\\Z'),
+]
+
+
+class TranslateTests(TestCase):
+
+    def test_translate(self):
+        for (pattern, regex) in TRANSLATE_TESTS:
+            if re.escape(b'/') == b'/':
+                # Slash is no longer escaped in Python3.7, so undo the escaping
+                # in the expected return value..
+                regex = regex.replace(b'\\/', b'/')
+            self.assertEqual(
+                regex, translate(pattern),
+                "orig pattern: %r, regex: %r, expected: %r" %
+                (pattern, translate(pattern), regex))
+
+
+class ReadIgnorePatterns(TestCase):
+
+    def test_read_file(self):
+        f = BytesIO(b"""
+# a comment
+
+# and an empty line:
+
+\#not a comment
+!negative
+with trailing whitespace 
+with escaped trailing whitespace\ 
+""")  # noqa: W291
+        self.assertEqual(list(read_ignore_patterns(f)), [
+            b'\\#not a comment',
+            b'!negative',
+            b'with trailing whitespace',
+            b'with escaped trailing whitespace '
+        ])
+
+
+class MatchPatternTests(TestCase):
+
+    def test_matches(self):
+        for (path, pattern) in POSITIVE_MATCH_TESTS:
+            self.assertTrue(
+                match_pattern(path, pattern),
+                "path: %r, pattern: %r" % (path, pattern))
+
+    def test_no_matches(self):
+        for (path, pattern) in NEGATIVE_MATCH_TESTS:
+            self.assertFalse(
+                match_pattern(path, pattern),
+                "path: %r, pattern: %r" % (path, pattern))
+
+
+class IgnoreFilterTests(TestCase):
+
+    def test_included(self):
+        filter = IgnoreFilter([b'a.c', b'b.c'])
+        self.assertTrue(filter.is_ignored(b'a.c'))
+        self.assertIs(None, filter.is_ignored(b'c.c'))
+        self.assertEqual(
+            [Pattern(b'a.c')],
+            list(filter.find_matching(b'a.c')))
+        self.assertEqual(
+            [],
+            list(filter.find_matching(b'c.c')))
+
+    def test_included_ignorecase(self):
+        filter = IgnoreFilter([b'a.c', b'b.c'], ignorecase=False)
+        self.assertTrue(filter.is_ignored(b'a.c'))
+        self.assertFalse(filter.is_ignored(b'A.c'))
+        filter = IgnoreFilter([b'a.c', b'b.c'], ignorecase=True)
+        self.assertTrue(filter.is_ignored(b'a.c'))
+        self.assertTrue(filter.is_ignored(b'A.c'))
+        self.assertTrue(filter.is_ignored(b'A.C'))
+
+    def test_excluded(self):
+        filter = IgnoreFilter([b'a.c', b'b.c', b'!c.c'])
+        self.assertFalse(filter.is_ignored(b'c.c'))
+        self.assertIs(None, filter.is_ignored(b'd.c'))
+        self.assertEqual(
+            [Pattern(b'!c.c')],
+            list(filter.find_matching(b'c.c')))
+        self.assertEqual([], list(filter.find_matching(b'd.c')))
+
+    def test_include_exclude_include(self):
+        filter = IgnoreFilter([b'a.c', b'!a.c', b'a.c'])
+        self.assertTrue(filter.is_ignored(b'a.c'))
+        self.assertEqual(
+            [Pattern(b'a.c'), Pattern(b'!a.c'), Pattern(b'a.c')],
+            list(filter.find_matching(b'a.c')))
+
+    def test_manpage(self):
+        # A specific example from the gitignore manpage
+        filter = IgnoreFilter([
+            b'/*',
+            b'!/foo',
+            b'/foo/*',
+            b'!/foo/bar'])
+        self.assertTrue(filter.is_ignored(b'a.c'))
+        self.assertTrue(filter.is_ignored(b'foo/blie'))
+        self.assertFalse(filter.is_ignored(b'foo'))
+        self.assertFalse(filter.is_ignored(b'foo/bar'))
+        self.assertFalse(filter.is_ignored(b'foo/bar/'))
+        self.assertFalse(filter.is_ignored(b'foo/bar/bloe'))
+
+
+class IgnoreFilterStackTests(TestCase):
+
+    def test_stack_first(self):
+        filter1 = IgnoreFilter([b'[a].c', b'[b].c', b'![d].c'])
+        filter2 = IgnoreFilter([b'[a].c', b'![b],c', b'[c].c', b'[d].c'])
+        stack = IgnoreFilterStack([filter1, filter2])
+        self.assertIs(True, stack.is_ignored(b'a.c'))
+        self.assertIs(True, stack.is_ignored(b'b.c'))
+        self.assertIs(True, stack.is_ignored(b'c.c'))
+        self.assertIs(False, stack.is_ignored(b'd.c'))
+        self.assertIs(None, stack.is_ignored(b'e.c'))
+
+
+class IgnoreFilterManagerTests(TestCase):
+
+    def test_load_ignore(self):
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+        with open(os.path.join(repo.path, '.gitignore'), 'wb') as f:
+            f.write(b'/foo/bar\n')
+            f.write(b'/dir2\n')
+            f.write(b'/dir3/\n')
+        os.mkdir(os.path.join(repo.path, 'dir'))
+        with open(os.path.join(repo.path, 'dir', '.gitignore'), 'wb') as f:
+            f.write(b'/blie\n')
+        with open(os.path.join(repo.path, 'dir', 'blie'), 'wb') as f:
+            f.write(b'IGNORED')
+        p = os.path.join(repo.controldir(), 'info', 'exclude')
+        with open(p, 'wb') as f:
+            f.write(b'/excluded\n')
+        m = IgnoreFilterManager.from_repo(repo)
+        self.assertTrue(m.is_ignored('dir/blie'))
+        self.assertIs(None,
+                      m.is_ignored(os.path.join('dir', 'bloe')))
+        self.assertIs(None, m.is_ignored('dir'))
+        self.assertTrue(m.is_ignored(os.path.join('foo', 'bar')))
+        self.assertTrue(m.is_ignored(os.path.join('excluded')))
+        self.assertTrue(m.is_ignored(os.path.join(
+            'dir2', 'fileinignoreddir')))
+        self.assertFalse(m.is_ignored('dir3'))
+        self.assertTrue(m.is_ignored('dir3/'))
+        self.assertTrue(m.is_ignored('dir3/bla'))
+
+    def test_load_ignore_ignorecase(self):
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+        config = repo.get_config()
+        config.set(b'core', b'ignorecase', True)
+        config.write_to_path()
+        with open(os.path.join(repo.path, '.gitignore'), 'wb') as f:
+            f.write(b'/foo/bar\n')
+            f.write(b'/dir\n')
+        m = IgnoreFilterManager.from_repo(repo)
+        self.assertTrue(m.is_ignored(os.path.join('dir', 'blie')))
+        self.assertTrue(m.is_ignored(os.path.join('DIR', 'blie')))
+
+    def test_ignored_contents(self):
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+        with open(os.path.join(repo.path, '.gitignore'), 'wb') as f:
+            f.write(b'a/*\n')
+            f.write(b'!a/*.txt\n')
+        m = IgnoreFilterManager.from_repo(repo)
+        os.mkdir(os.path.join(repo.path, 'a'))
+        self.assertIs(None, m.is_ignored('a'))
+        self.assertIs(None, m.is_ignored('a/'))
+        self.assertFalse(m.is_ignored('a/b.txt'))
+        self.assertTrue(m.is_ignored('a/c.dat'))

+ 64 - 50
dulwich/tests/test_index.py

@@ -63,6 +63,7 @@ from dulwich.tests import (
     skipIf,
     )
 
+
 class IndexTestCase(TestCase):
 
     datadir = os.path.join(os.path.dirname(__file__), 'data/indexes')
@@ -80,10 +81,11 @@ class SimpleIndexTestCase(IndexTestCase):
         self.assertEqual([b'bla'], list(self.get_simple_index("index")))
 
     def test_getitem(self):
-        self.assertEqual(((1230680220, 0), (1230680220, 0), 2050, 3761020,
-                           33188, 1000, 1000, 0,
-                           b'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391', 0),
-                          self.get_simple_index("index")[b"bla"])
+        self.assertEqual(
+                ((1230680220, 0), (1230680220, 0), 2050, 3761020,
+                 33188, 1000, 1000, 0,
+                 b'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391', 0),
+                self.get_simple_index("index")[b"bla"])
 
     def test_empty(self):
         i = self.get_simple_index("notanindex")
@@ -98,6 +100,7 @@ class SimpleIndexTestCase(IndexTestCase):
         self.assertEqual(b'bla', newname)
         self.assertEqual(b'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391', newsha)
 
+
 class SimpleIndexWriterTestCase(IndexTestCase):
 
     def setUp(self):
@@ -131,9 +134,11 @@ class ReadIndexDictTests(IndexTestCase):
         shutil.rmtree(self.tempdir)
 
     def test_simple_write(self):
-        entries = {b'barbla': ((1230680220, 0), (1230680220, 0), 2050, 3761020,
-                    33188, 1000, 1000, 0,
-                    b'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391', 0)}
+        entries = {
+                b'barbla':
+                ((1230680220, 0), (1230680220, 0), 2050, 3761020, 33188,
+                 1000, 1000, 0,
+                 b'e69de29bb2d1d6434b8b29ae775ad8c2e48c5391', 0)}
         filename = os.path.join(self.tempdir, 'test-simple-write-index')
         with open(filename, 'wb+') as x:
             write_index_dict(x, entries)
@@ -170,7 +175,7 @@ class CommitTreeTests(TestCase):
         self.assertEqual((stat.S_IFDIR, dirid), self.store[rootid][b"bla"])
         self.assertEqual((stat.S_IFREG, blob.id), self.store[dirid][b"bar"])
         self.assertEqual(set([rootid, dirid, blob.id]),
-                          set(self.store._data.keys()))
+                         set(self.store._data.keys()))
 
 
 class CleanupModeTests(TestCase):
@@ -216,9 +221,9 @@ class WriteCacheTimeTests(TestCase):
 class IndexEntryFromStatTests(TestCase):
 
     def test_simple(self):
-        st = os.stat_result((16877, 131078, 64769,
-                154, 1000, 1000, 12288,
-                1323629595, 1324180496, 1324180496))
+        st = os.stat_result(
+                (16877, 131078, 64769, 154, 1000, 1000, 12288,
+                 1323629595, 1324180496, 1324180496))
         entry = index_entry_from_stat(st, "22" * 20, 0)
         self.assertEqual(entry, (
             1324180496,
@@ -233,11 +238,12 @@ class IndexEntryFromStatTests(TestCase):
             0))
 
     def test_override_mode(self):
-        st = os.stat_result((stat.S_IFREG + 0o644, 131078, 64769,
-                154, 1000, 1000, 12288,
-                1323629595, 1324180496, 1324180496))
-        entry = index_entry_from_stat(st, "22" * 20, 0,
-                mode=stat.S_IFREG + 0o755)
+        st = os.stat_result(
+                (stat.S_IFREG + 0o644, 131078, 64769,
+                 154, 1000, 1000, 12288,
+                 1323629595, 1324180496, 1324180496))
+        entry = index_entry_from_stat(
+            st, "22" * 20, 0, mode=stat.S_IFREG + 0o755)
         self.assertEqual(entry, (
             1324180496,
             1324180496,
@@ -272,7 +278,8 @@ class BuildIndexTests(TestCase):
             tree = Tree()
             repo.object_store.add_object(tree)
 
-            build_index_from_tree(repo.path, repo.index_path(),
+            build_index_from_tree(
+                    repo.path, repo.index_path(),
                     repo.object_store, tree.id)
 
             # Verify index entries
@@ -295,11 +302,11 @@ class BuildIndexTests(TestCase):
             tree[b'.git/a'] = (stat.S_IFREG | 0o644, filea.id)
             tree[b'c/e'] = (stat.S_IFREG | 0o644, filee.id)
 
-            repo.object_store.add_objects([(o, None)
-                for o in [filea, filee, tree]])
+            repo.object_store.add_objects(
+                    [(o, None) for o in [filea, filee, tree]])
 
-            build_index_from_tree(repo.path, repo.index_path(),
-                    repo.object_store, tree.id)
+            build_index_from_tree(
+                repo.path, repo.index_path(), repo.object_store, tree.id)
 
             # Verify index entries
             index = repo.open_index()
@@ -312,8 +319,8 @@ class BuildIndexTests(TestCase):
             # filee
             epath = os.path.join(repo.path, 'c', 'e')
             self.assertTrue(os.path.exists(epath))
-            self.assertReasonableIndexEntry(index[b'c/e'],
-                stat.S_IFREG | 0o644, 1, filee.id)
+            self.assertReasonableIndexEntry(
+                index[b'c/e'], stat.S_IFREG | 0o644, 1, filee.id)
             self.assertFileContents(epath, b'd')
 
     def test_nonempty(self):
@@ -331,11 +338,11 @@ class BuildIndexTests(TestCase):
             tree[b'b'] = (stat.S_IFREG | 0o644, fileb.id)
             tree[b'c/d'] = (stat.S_IFREG | 0o644, filed.id)
 
-            repo.object_store.add_objects([(o, None)
-                for o in [filea, fileb, filed, tree]])
+            repo.object_store.add_objects(
+                [(o, None) for o in [filea, fileb, filed, tree]])
 
-            build_index_from_tree(repo.path, repo.index_path(),
-                    repo.object_store, tree.id)
+            build_index_from_tree(
+                repo.path, repo.index_path(), repo.object_store, tree.id)
 
             # Verify index entries
             index = repo.open_index()
@@ -344,29 +351,29 @@ class BuildIndexTests(TestCase):
             # filea
             apath = os.path.join(repo.path, 'a')
             self.assertTrue(os.path.exists(apath))
-            self.assertReasonableIndexEntry(index[b'a'],
-                stat.S_IFREG | 0o644, 6, filea.id)
+            self.assertReasonableIndexEntry(
+                    index[b'a'], stat.S_IFREG | 0o644, 6, filea.id)
             self.assertFileContents(apath, b'file a')
 
             # fileb
             bpath = os.path.join(repo.path, 'b')
             self.assertTrue(os.path.exists(bpath))
-            self.assertReasonableIndexEntry(index[b'b'],
-                stat.S_IFREG | 0o644, 6, fileb.id)
+            self.assertReasonableIndexEntry(
+                    index[b'b'], stat.S_IFREG | 0o644, 6, fileb.id)
             self.assertFileContents(bpath, b'file b')
 
             # filed
             dpath = os.path.join(repo.path, 'c', 'd')
             self.assertTrue(os.path.exists(dpath))
-            self.assertReasonableIndexEntry(index[b'c/d'],
-                stat.S_IFREG | 0o644, 6, filed.id)
+            self.assertReasonableIndexEntry(
+                    index[b'c/d'], stat.S_IFREG | 0o644, 6, filed.id)
             self.assertFileContents(dpath, b'file d')
 
             # Verify no extra files
-            self.assertEqual(['.git', 'a', 'b', 'c'],
-                sorted(os.listdir(repo.path)))
-            self.assertEqual(['d'],
-                sorted(os.listdir(os.path.join(repo.path, 'c'))))
+            self.assertEqual(
+                    ['.git', 'a', 'b', 'c'], sorted(os.listdir(repo.path)))
+            self.assertEqual(
+                    ['d'], sorted(os.listdir(os.path.join(repo.path, 'c'))))
 
     @skipIf(not getattr(os, 'sync', None), 'Requires sync support')
     def test_norewrite(self):
@@ -379,8 +386,7 @@ class BuildIndexTests(TestCase):
             tree = Tree()
             tree[b'a'] = (stat.S_IFREG | 0o644, filea.id)
 
-            repo.object_store.add_objects([(o, None)
-                for o in [filea, tree]])
+            repo.object_store.add_objects([(o, None) for o in [filea, tree]])
 
             # First Write
             build_index_from_tree(repo.path, repo.index_path(),
@@ -408,7 +414,6 @@ class BuildIndexTests(TestCase):
             with open(filea_path, 'rb') as fh:
                 self.assertEqual(b'file a', fh.read())
 
-
     @skipIf(not getattr(os, 'symlink', None), 'Requires symlink support')
     def test_symlink(self):
         repo_dir = tempfile.mkdtemp()
@@ -423,11 +428,11 @@ class BuildIndexTests(TestCase):
             tree[b'c/d'] = (stat.S_IFREG | 0o644, filed.id)
             tree[b'c/e'] = (stat.S_IFLNK, filee.id)  # symlink
 
-            repo.object_store.add_objects([(o, None)
-                for o in [filed, filee, tree]])
+            repo.object_store.add_objects(
+                    [(o, None) for o in [filed, filee, tree]])
 
-            build_index_from_tree(repo.path, repo.index_path(),
-                    repo.object_store, tree.id)
+            build_index_from_tree(
+                    repo.path, repo.index_path(), repo.object_store, tree.id)
 
             # Verify index entries
             index = repo.open_index()
@@ -452,24 +457,33 @@ class BuildIndexTests(TestCase):
 
             tree = Tree()
             latin1_name = u'À'.encode('latin1')
+            latin1_path = os.path.join(repo_dir_bytes, latin1_name)
             utf8_name = u'À'.encode('utf8')
+            utf8_path = os.path.join(repo_dir_bytes, utf8_name)
             tree[latin1_name] = (stat.S_IFREG | 0o644, file.id)
             tree[utf8_name] = (stat.S_IFREG | 0o644, file.id)
 
             repo.object_store.add_objects(
                 [(o, None) for o in [file, tree]])
 
+            try:
+                os.path.exists(latin1_path)
+            except UnicodeDecodeError:
+                # This happens e.g. with python3.6 on Windows.
+                # It implicitly decodes using utf8, which doesn't work.
+                self.skipTest('can not implicitly convert as utf8')
+
             build_index_from_tree(
                 repo.path, repo.index_path(),
                 repo.object_store, tree.id)
 
             # Verify index entries
             index = repo.open_index()
+            self.assertIn(latin1_name, index)
+            self.assertIn(utf8_name, index)
 
-            latin1_path = os.path.join(repo_dir_bytes, latin1_name)
             self.assertTrue(os.path.exists(latin1_path))
 
-            utf8_path = os.path.join(repo_dir_bytes, utf8_name)
             self.assertTrue(os.path.exists(utf8_path))
 
     def test_git_submodule(self):
@@ -495,8 +509,8 @@ class BuildIndexTests(TestCase):
             repo.object_store.add_objects(
                 [(o, None) for o in [tree]])
 
-            build_index_from_tree(repo.path, repo.index_path(),
-                    repo.object_store, tree.id)
+            build_index_from_tree(
+                    repo.path, repo.index_path(), repo.object_store, tree.id)
 
             # Verify index entries
             index = repo.open_index()
@@ -536,8 +550,8 @@ class BuildIndexTests(TestCase):
             repo.object_store.add_objects(
                 [(o, None) for o in [tree]])
 
-            build_index_from_tree(repo.path, repo.index_path(),
-                    repo.object_store, tree.id)
+            build_index_from_tree(
+                    repo.path, repo.index_path(), repo.object_store, tree.id)
 
             # Verify index entries
             index = repo.open_index()

+ 30 - 27
dulwich/tests/test_lru_cache.py

@@ -26,6 +26,7 @@ from dulwich.tests import (
     TestCase,
     )
 
+
 class TestLRUCache(TestCase):
     """Test that LRU cache properly keeps track of entries."""
 
@@ -101,6 +102,7 @@ class TestLRUCache(TestCase):
     def test_cleanup(self):
         """Test that we can use a cleanup function."""
         cleanup_called = []
+
         def cleanup_func(key, val):
             cleanup_called.append((key, val))
 
@@ -121,6 +123,7 @@ class TestLRUCache(TestCase):
     def test_cleanup_on_replace(self):
         """Replacing an object should cleanup the old value."""
         cleanup_called = []
+
         def cleanup_func(key, val):
             cleanup_called.append((key, val))
 
@@ -153,7 +156,7 @@ class TestLRUCache(TestCase):
 
         self.assertEqual(8, len(cache))
 
-        cache[1] = 15 # replacement
+        cache[1] = 15  # replacement
 
         self.assertEqual(8, len(cache))
 
@@ -285,7 +288,7 @@ class TestLRUCache(TestCase):
         cache[9] = 10
         cache[10] = 11
         self.assertEqual([3, 4, 5, 6, 7, 8, 9, 10], sorted(cache.keys()))
-        cache[11] = 12 # triggers cleanup back to new after_cleanup_count
+        cache[11] = 12  # triggers cleanup back to new after_cleanup_count
         self.assertEqual([6, 7, 8, 9, 10, 11], sorted(cache.keys()))
 
 
@@ -326,20 +329,21 @@ class TestLRUSizeCache(TestCase):
         self.assertEqual({'test': 'key'}, cache.items())
         cache.add('test2', 'key that is too big')
         self.assertEqual(3, cache._value_size)
-        self.assertEqual({'test':'key'}, cache.items())
+        self.assertEqual({'test': 'key'}, cache.items())
         # If we would add a key, only to cleanup and remove all cached entries,
         # then obviously that value should not be stored
         cache.add('test3', 'bigkey')
         self.assertEqual(3, cache._value_size)
-        self.assertEqual({'test':'key'}, cache.items())
+        self.assertEqual({'test': 'key'}, cache.items())
 
         cache.add('test4', 'bikey')
         self.assertEqual(3, cache._value_size)
-        self.assertEqual({'test':'key'}, cache.items())
+        self.assertEqual({'test': 'key'}, cache.items())
 
     def test_no_add_over_size_cleanup(self):
         """If a large value is not cached, we will call cleanup right away."""
         cleanup_calls = []
+
         def cleanup(key, value):
             cleanup_calls.append((key, value))
 
@@ -356,28 +360,28 @@ class TestLRUSizeCache(TestCase):
     def test_adding_clears_cache_based_on_size(self):
         """The cache is cleared in LRU order until small enough"""
         cache = lru_cache.LRUSizeCache(max_size=20)
-        cache.add('key1', 'value') # 5 chars
-        cache.add('key2', 'value2') # 6 chars
-        cache.add('key3', 'value23') # 7 chars
+        cache.add('key1', 'value')  # 5 chars
+        cache.add('key2', 'value2')  # 6 chars
+        cache.add('key3', 'value23')  # 7 chars
         self.assertEqual(5+6+7, cache._value_size)
-        cache['key2'] # reference key2 so it gets a newer reference time
-        cache.add('key4', 'value234') # 8 chars, over limit
+        cache['key2']  # reference key2 so it gets a newer reference time
+        cache.add('key4', 'value234')  # 8 chars, over limit
         # We have to remove 2 keys to get back under limit
         self.assertEqual(6+8, cache._value_size)
-        self.assertEqual({'key2':'value2', 'key4':'value234'},
+        self.assertEqual({'key2': 'value2', 'key4': 'value234'},
                          cache.items())
 
     def test_adding_clears_to_after_cleanup_size(self):
         cache = lru_cache.LRUSizeCache(max_size=20, after_cleanup_size=10)
-        cache.add('key1', 'value') # 5 chars
-        cache.add('key2', 'value2') # 6 chars
-        cache.add('key3', 'value23') # 7 chars
+        cache.add('key1', 'value')  # 5 chars
+        cache.add('key2', 'value2')  # 6 chars
+        cache.add('key3', 'value23')  # 7 chars
         self.assertEqual(5+6+7, cache._value_size)
-        cache['key2'] # reference key2 so it gets a newer reference time
-        cache.add('key4', 'value234') # 8 chars, over limit
+        cache['key2']  # reference key2 so it gets a newer reference time
+        cache.add('key4', 'value234')  # 8 chars, over limit
         # We have to remove 3 keys to get back under limit
         self.assertEqual(8, cache._value_size)
-        self.assertEqual({'key4':'value234'}, cache.items())
+        self.assertEqual({'key4': 'value234'}, cache.items())
 
     def test_custom_sizes(self):
         def size_of_list(lst):
@@ -385,23 +389,23 @@ class TestLRUSizeCache(TestCase):
         cache = lru_cache.LRUSizeCache(max_size=20, after_cleanup_size=10,
                                        compute_size=size_of_list)
 
-        cache.add('key1', ['val', 'ue']) # 5 chars
-        cache.add('key2', ['val', 'ue2']) # 6 chars
-        cache.add('key3', ['val', 'ue23']) # 7 chars
+        cache.add('key1', ['val', 'ue'])  # 5 chars
+        cache.add('key2', ['val', 'ue2'])  # 6 chars
+        cache.add('key3', ['val', 'ue23'])  # 7 chars
         self.assertEqual(5+6+7, cache._value_size)
-        cache['key2'] # reference key2 so it gets a newer reference time
-        cache.add('key4', ['value', '234']) # 8 chars, over limit
+        cache['key2']  # reference key2 so it gets a newer reference time
+        cache.add('key4', ['value', '234'])  # 8 chars, over limit
         # We have to remove 3 keys to get back under limit
         self.assertEqual(8, cache._value_size)
-        self.assertEqual({'key4':['value', '234']}, cache.items())
+        self.assertEqual({'key4': ['value', '234']}, cache.items())
 
     def test_cleanup(self):
         cache = lru_cache.LRUSizeCache(max_size=20, after_cleanup_size=10)
 
         # Add these in order
-        cache.add('key1', 'value') # 5 chars
-        cache.add('key2', 'value2') # 6 chars
-        cache.add('key3', 'value23') # 7 chars
+        cache.add('key1', 'value')  # 5 chars
+        cache.add('key2', 'value2')  # 6 chars
+        cache.add('key3', 'value23')  # 7 chars
         self.assertEqual(5+6+7, cache._value_size)
 
         cache.cleanup()
@@ -448,4 +452,3 @@ class TestLRUSizeCache(TestCase):
         self.assertEqual([2, 3, 4, 5, 6], sorted(cache.keys()))
         cache[7] = 'stu'
         self.assertEqual([4, 5, 6, 7], sorted(cache.keys()))
-

+ 37 - 27
dulwich/tests/test_missing_obj_finder.py

@@ -44,28 +44,33 @@ class MissingObjectFinderTest(TestCase):
 
     def assertMissingMatch(self, haves, wants, expected):
         for sha, path in self.store.find_missing_objects(haves, wants):
-            self.assertTrue(sha in expected,
-                "(%s,%s) erroneously reported as missing" % (sha, path))
+            self.assertTrue(
+                    sha in expected,
+                    "(%s,%s) erroneously reported as missing" % (sha, path))
             expected.remove(sha)
 
-        self.assertEqual(len(expected), 0,
-            "some objects are not reported as missing: %s" % (expected, ))
+        self.assertEqual(
+                len(expected), 0,
+                "some objects are not reported as missing: %s" % (expected, ))
 
 
 class MOFLinearRepoTest(MissingObjectFinderTest):
 
     def setUp(self):
         super(MOFLinearRepoTest, self).setUp()
-        f1_1 = make_object(Blob, data=b'f1') # present in 1, removed in 3
-        f2_1 = make_object(Blob, data=b'f2') # present in all revisions, changed in 2 and 3
+        # present in 1, removed in 3
+        f1_1 = make_object(Blob, data=b'f1')
+        # present in all revisions, changed in 2 and 3
+        f2_1 = make_object(Blob, data=b'f2')
         f2_2 = make_object(Blob, data=b'f2-changed')
         f2_3 = make_object(Blob, data=b'f2-changed-again')
-        f3_2 = make_object(Blob, data=b'f3') # added in 2, left unmodified in 3
+        # added in 2, left unmodified in 3
+        f3_2 = make_object(Blob, data=b'f3')
 
         commit_spec = [[1], [2, 1], [3, 2]]
         trees = {1: [(b'f1', f1_1), (b'f2', f2_1)],
                  2: [(b'f1', f1_1), (b'f2', f2_2), (b'f3', f3_2)],
-                 3: [(b'f2', f2_3), (b'f3', f3_2)] }
+                 3: [(b'f2', f2_3), (b'f3', f3_2)]}
         # commit 1: f1 and f2
         # commit 2: f3 added, f2 changed. Missing shall report commit id and a
         # tree referenced by commit
@@ -80,16 +85,19 @@ class MOFLinearRepoTest(MissingObjectFinderTest):
             f2_2.id, f3_2.id, f2_3.id]
 
     def test_1_to_2(self):
-        self.assertMissingMatch([self.cmt(1).id], [self.cmt(2).id],
-            self.missing_1_2)
+        self.assertMissingMatch(
+                [self.cmt(1).id], [self.cmt(2).id],
+                self.missing_1_2)
 
     def test_2_to_3(self):
-        self.assertMissingMatch([self.cmt(2).id], [self.cmt(3).id],
-            self.missing_2_3)
+        self.assertMissingMatch(
+                [self.cmt(2).id], [self.cmt(3).id],
+                self.missing_2_3)
 
     def test_1_to_3(self):
-        self.assertMissingMatch([self.cmt(1).id], [self.cmt(3).id],
-            self.missing_1_3)
+        self.assertMissingMatch(
+                [self.cmt(1).id], [self.cmt(3).id],
+                self.missing_1_3)
 
     def test_bogus_haves(self):
         """Ensure non-existent SHA in haves are tolerated"""
@@ -103,8 +111,8 @@ class MOFLinearRepoTest(MissingObjectFinderTest):
         bogus_sha = self.cmt(2).id[::-1]
         haves = [self.cmt(1).id]
         wants = [self.cmt(3).id, bogus_sha]
-        self.assertRaises(KeyError, self.store.find_missing_objects,
-            haves, wants)
+        self.assertRaises(
+                KeyError, self.store.find_missing_objects, haves, wants)
 
     def test_no_changes(self):
         self.assertMissingMatch([self.cmt(3).id], [self.cmt(3).id], [])
@@ -122,21 +130,22 @@ class MOFMergeForkRepoTest(MissingObjectFinderTest):
         f1_1 = make_object(Blob, data=b'f1')
         f1_2 = make_object(Blob, data=b'f1-2')
         f1_4 = make_object(Blob, data=b'f1-4')
-        f1_7 = make_object(Blob, data=b'f1-2') # same data as in rev 2
+        f1_7 = make_object(Blob, data=b'f1-2')  # same data as in rev 2
         f2_1 = make_object(Blob, data=b'f2')
         f2_3 = make_object(Blob, data=b'f2-3')
         f3_3 = make_object(Blob, data=b'f3')
         f3_5 = make_object(Blob, data=b'f3-5')
         commit_spec = [[1], [2, 1], [3, 2], [4, 2], [5, 3], [6, 3, 4], [7, 6]]
         trees = {1: [(b'f1', f1_1), (b'f2', f2_1)],
-                2: [(b'f1', f1_2), (b'f2', f2_1)], # f1 changed
-                # f3 added, f2 changed
-                3: [(b'f1', f1_2), (b'f2', f2_3), (b'f3', f3_3)],
-                4: [(b'f1', f1_4), (b'f2', f2_1)],  # f1 changed
-                5: [(b'f1', f1_2), (b'f3', f3_5)], # f2 removed, f3 changed
-                6: [(b'f1', f1_4), (b'f2', f2_3), (b'f3', f3_3)], # merged 3 and 4
-                # f1 changed to match rev2. f3 removed
-                7: [(b'f1', f1_7), (b'f2', f2_3)]}
+                 2: [(b'f1', f1_2), (b'f2', f2_1)],  # f1 changed
+                 # f3 added, f2 changed
+                 3: [(b'f1', f1_2), (b'f2', f2_3), (b'f3', f3_3)],
+                 4: [(b'f1', f1_4), (b'f2', f2_1)],  # f1 changed
+                 5: [(b'f1', f1_2), (b'f3', f3_5)],  # f2 removed, f3 changed
+                 # merged 3 and 4
+                 6: [(b'f1', f1_4), (b'f2', f2_3), (b'f3', f3_3)],
+                 # f1 changed to match rev2. f3 removed
+                 7: [(b'f1', f1_7), (b'f2', f2_3)]}
         self.commits = build_commit_graph(self.store, commit_spec, trees)
 
         self.f1_2_id = f1_2.id
@@ -154,8 +163,9 @@ class MOFMergeForkRepoTest(MissingObjectFinderTest):
         # which is an overkill (i.e. in sha_done it records f1_4 as known, and
         # doesn't record f1_2 was known prior to that, hence can't detect f1_7
         # is in fact f1_2 and shall not be reported)
-        self.assertMissingMatch([self.cmt(6).id], [self.cmt(7).id],
-            [self.cmt(7).id, self.cmt(7).tree, self.f1_7_id])
+        self.assertMissingMatch(
+                [self.cmt(6).id], [self.cmt(7).id],
+                [self.cmt(7).id, self.cmt(7).tree, self.f1_7_id])
 
     def test_have4_want7(self):
         # have 4, want 7. Shall not include rev5 as it is not in the tree

+ 30 - 21
dulwich/tests/test_object_store.py

@@ -65,12 +65,13 @@ testobject = make_object(Blob, data=b"yummy data")
 class ObjectStoreTests(object):
 
     def test_determine_wants_all(self):
-        self.assertEqual([b"1" * 40],
+        self.assertEqual(
+            [b"1" * 40],
             self.store.determine_wants_all({b"refs/heads/foo": b"1" * 40}))
 
     def test_determine_wants_all_zero(self):
-        self.assertEqual([],
-            self.store.determine_wants_all({b"refs/heads/foo": b"0" * 40}))
+        self.assertEqual(
+            [], self.store.determine_wants_all({b"refs/heads/foo": b"0" * 40}))
 
     def test_iter(self):
         self.assertEqual([], list(self.store))
@@ -129,13 +130,15 @@ class ObjectStoreTests(object):
         tree1_id = commit_tree(self.store, blobs_1)
         blobs_2 = [(b'a', blob_a2.id, 0o100644), (b'b', blob_b.id, 0o100644)]
         tree2_id = commit_tree(self.store, blobs_2)
-        change_a = ((b'a', b'a'), (0o100644, 0o100644), (blob_a1.id, blob_a2.id))
+        change_a = ((b'a', b'a'), (0o100644, 0o100644),
+                    (blob_a1.id, blob_a2.id))
         self.assertEqual([change_a],
-                          list(self.store.tree_changes(tree1_id, tree2_id)))
+                         list(self.store.tree_changes(tree1_id, tree2_id)))
         self.assertEqual(
-            [change_a, ((b'b', b'b'), (0o100644, 0o100644), (blob_b.id, blob_b.id))],
+            [change_a, ((b'b', b'b'), (0o100644, 0o100644),
+             (blob_b.id, blob_b.id))],
             list(self.store.tree_changes(tree1_id, tree2_id,
-                                         want_unchanged=True)))
+                 want_unchanged=True)))
 
     def test_iter_tree_contents(self):
         blob_a = make_object(Blob, data=b'a')
@@ -153,7 +156,7 @@ class ObjectStoreTests(object):
         ]
         tree_id = commit_tree(self.store, blobs)
         self.assertEqual([TreeEntry(p, m, h) for (p, h, m) in blobs],
-                          list(self.store.iter_tree_contents(tree_id)))
+                         list(self.store.iter_tree_contents(tree_id)))
 
     def test_iter_tree_contents_include_trees(self):
         blob_a = make_object(Blob, data=b'a')
@@ -244,7 +247,6 @@ class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
         self.assertEqual((Blob.type_num, b'more yummy data'),
                          o.get_raw(packed_blob_sha))
 
-
     def test_add_thin_pack_empty(self):
         o = MemoryObjectStore()
 
@@ -316,7 +318,8 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         alternate_store.add_object(b2)
         store = DiskObjectStore(self.store_dir)
         self.assertRaises(KeyError, store.__getitem__, b2.id)
-        store.add_alternate_path(os.path.relpath(alternate_dir, self.store_dir))
+        store.add_alternate_path(
+            os.path.relpath(alternate_dir, self.store_dir))
         self.assertEqual(list(alternate_store), list(store.alternates[0]))
         self.assertIn(b2.id, store)
         self.assertEqual(b2, store[b2.id])
@@ -351,7 +354,8 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
             with o.add_thin_pack(f.read, None) as pack:
                 packed_blob_sha = sha_to_hex(entries[0][3])
                 pack.check_length_and_checksum()
-                self.assertEqual(sorted([blob.id, packed_blob_sha]), list(pack))
+                self.assertEqual(
+                    sorted([blob.id, packed_blob_sha]), list(pack))
                 self.assertTrue(o.contains_packed(packed_blob_sha))
                 self.assertTrue(o.contains_packed(blob.id))
                 self.assertEqual((Blob.type_num, b'more yummy data'),
@@ -403,19 +407,23 @@ class TreeLookupPathTests(TestCase):
         self.assertTrue(isinstance(self.store[o_id], Tree))
 
     def test_lookup_nonexistent(self):
-        self.assertRaises(KeyError, tree_lookup_path, self.get_object, self.tree_id, b'j')
+        self.assertRaises(
+            KeyError, tree_lookup_path, self.get_object, self.tree_id, b'j')
 
     def test_lookup_not_tree(self):
-        self.assertRaises(NotTreeError, tree_lookup_path, self.get_object, self.tree_id, b'ad/b/j')
+        self.assertRaises(
+            NotTreeError, tree_lookup_path, self.get_object, self.tree_id,
+            b'ad/b/j')
 
 
 class ObjectStoreGraphWalkerTests(TestCase):
 
     def get_walker(self, heads, parent_map):
-        new_parent_map = dict([
-            (k * 40, [(p * 40) for p in ps]) for (k, ps) in parent_map.items()])
+        new_parent_map = dict(
+                [(k * 40, [(p * 40) for p in ps])
+                 for (k, ps) in parent_map.items()])
         return ObjectStoreGraphWalker([x * 40 for x in heads],
-            new_parent_map.__getitem__)
+                                      new_parent_map.__getitem__)
 
     def test_ack_invalid_value(self):
         gw = self.get_walker([], {})
@@ -470,17 +478,18 @@ class ObjectStoreGraphWalkerTests(TestCase):
         # A branch (a, c) or (b, d) may be done after 2 steps or 3 depending on
         # the order walked: 3-step walks include (a, b, c) and (b, a, d), etc.
         if walk == [b"a" * 40, b"c" * 40] or walk == [b"b" * 40, b"d" * 40]:
-          gw.ack(walk[0])
-          acked = True
+            gw.ack(walk[0])
+            acked = True
 
         walk.append(next(gw))
         if not acked and walk[2] == b"c" * 40:
-          gw.ack(b"a" * 40)
+            gw.ack(b"a" * 40)
         elif not acked and walk[2] == b"d" * 40:
-          gw.ack(b"b" * 40)
+            gw.ack(b"b" * 40)
         walk.append(next(gw))
         self.assertIs(None, next(gw))
 
-        self.assertEqual([b"a" * 40, b"b" * 40, b"c" * 40, b"d" * 40], sorted(walk))
+        self.assertEqual([b"a" * 40, b"b" * 40, b"c" * 40, b"d" * 40],
+                         sorted(walk))
         self.assertLess(walk.index(b"a" * 40), walk.index(b"c" * 40))
         self.assertLess(walk.index(b"b" * 40), walk.index(b"d" * 40))

+ 74 - 49
dulwich/tests/test_objects.py

@@ -138,16 +138,16 @@ class BlobReadTests(TestCase):
 
     def test_splitlines(self):
         for case in [
-            [],
-            [b'foo\nbar\n'],
-            [b'bl\na', b'blie'],
-            [b'bl\na', b'blie', b'bloe\n'],
-            [b'', b'bl\na', b'blie', b'bloe\n'],
-            [b'', b'', b'', b'bla\n'],
-            [b'', b'', b'', b'bla\n', b''],
-            [b'bl', b'', b'a\naaa'],
-            [b'a\naaa', b'a'],
-            ]:
+                [],
+                [b'foo\nbar\n'],
+                [b'bl\na', b'blie'],
+                [b'bl\na', b'blie', b'bloe\n'],
+                [b'', b'bl\na', b'blie', b'bloe\n'],
+                [b'', b'', b'', b'bla\n'],
+                [b'', b'', b'', b'bla\n', b''],
+                [b'bl', b'', b'a\naaa'],
+                [b'a\naaa', b'a'],
+                ]:
             b = Blob()
             b.chunked = case
             self.assertEqual(b.data.splitlines(True), b.splitlines())
@@ -177,10 +177,12 @@ class BlobReadTests(TestCase):
 
     def test_read_tree_from_file_parse_count(self):
         old_deserialize = Tree._deserialize
+
         def reset_deserialize():
             Tree._deserialize = old_deserialize
         self.addCleanup(reset_deserialize)
         self.deserialize_count = 0
+
         def counting_deserialize(*args, **kwargs):
             self.deserialize_count += 1
             return old_deserialize(*args, **kwargs)
@@ -197,7 +199,17 @@ class BlobReadTests(TestCase):
         self.assertEqual(t.name, b'signed')
         self.assertEqual(t.tagger, b'Ali Sabil <ali.sabil@gmail.com>')
         self.assertEqual(t.tag_time, 1231203091)
-        self.assertEqual(t.message, b'This is a signed tag\n-----BEGIN PGP SIGNATURE-----\nVersion: GnuPG v1.4.9 (GNU/Linux)\n\niEYEABECAAYFAkliqx8ACgkQqSMmLy9u/kcx5ACfakZ9NnPl02tOyYP6pkBoEkU1\n5EcAn0UFgokaSvS371Ym/4W9iJj6vh3h\n=ql7y\n-----END PGP SIGNATURE-----\n')
+        self.assertEqual(
+                t.message,
+                b'This is a signed tag\n'
+                b'-----BEGIN PGP SIGNATURE-----\n'
+                b'Version: GnuPG v1.4.9 (GNU/Linux)\n'
+                b'\n'
+                b'iEYEABECAAYFAkliqx8ACgkQqSMmLy9u/'
+                b'kcx5ACfakZ9NnPl02tOyYP6pkBoEkU1\n'
+                b'5EcAn0UFgokaSvS371Ym/4W9iJj6vh3h\n'
+                b'=ql7y\n'
+                b'-----END PGP SIGNATURE-----\n')
 
     def test_read_commit_from_file(self):
         sha = b'60dacdc733de308bb77bb76ce0fb0f9b44c9769e'
@@ -256,6 +268,7 @@ class ShaFileCheckTests(TestCase):
 
     def assertCheckFails(self, cls, data):
         obj = cls()
+
         def do_check():
             obj.set_raw_string(data)
             obj.check()
@@ -404,7 +417,7 @@ gpgsig -----BEGIN PGP SIGNATURE-----
  -----END PGP SIGNATURE-----
 
 Merge ../b
-""", commit.as_raw_string())
+""", commit.as_raw_string())  # noqa: W291,W293
 
     def test_serialize_mergetag(self):
         tag = make_object(
@@ -437,7 +450,7 @@ mergetag object a38d6181ff27824c79fc7df825164a212eff6a3f
  -----END PGP SIGNATURE-----
 
 Merge ../b
-""", commit.as_raw_string())
+""", commit.as_raw_string())  # noqa: W291,W293
 
     def test_serialize_mergetags(self):
         tag = make_object(
@@ -483,7 +496,7 @@ mergetag object a38d6181ff27824c79fc7df825164a212eff6a3f
  -----END PGP SIGNATURE-----
 
 Merge ../b
-""", commit.as_raw_string())
+""", commit.as_raw_string())  # noqa: W291,W293
 
     def test_deserialize_mergetag(self):
         tag = make_object(
@@ -516,14 +529,17 @@ Merge ../b
         self.assertEqual(commit, d)
 
 
-default_committer = b'James Westby <jw+debian@jameswestby.net> 1174773719 +0000'
+default_committer = (
+        b'James Westby <jw+debian@jameswestby.net> 1174773719 +0000')
+
 
 class CommitParseTests(ShaFileCheckTests):
 
     def make_commit_lines(self,
                           tree=b'd80c186a03f423a81b39df39dc87fd269736ca86',
-                          parents=[b'ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',
-                                   b'4cffe90e0a41ad3f5190079d7c8f036bde29cbe6'],
+                          parents=[
+                              b'ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',
+                              b'4cffe90e0a41ad3f5190079d7c8f036bde29cbe6'],
                           author=default_committer,
                           committer=default_committer,
                           encoding=None,
@@ -563,10 +579,10 @@ class CommitParseTests(ShaFileCheckTests):
                          c.parents)
         expected_time = datetime.datetime(2007, 3, 24, 22, 1, 59)
         self.assertEqual(expected_time,
-                          datetime.datetime.utcfromtimestamp(c.commit_time))
+                         datetime.datetime.utcfromtimestamp(c.commit_time))
         self.assertEqual(0, c.commit_timezone)
         self.assertEqual(expected_time,
-                          datetime.datetime.utcfromtimestamp(c.author_time))
+                         datetime.datetime.utcfromtimestamp(c.author_time))
         self.assertEqual(0, c.author_timezone)
         self.assertEqual(None, c.encoding)
 
@@ -646,7 +662,7 @@ gpgsig -----BEGIN PGP SIGNATURE-----
  -----END PGP SIGNATURE-----
 
 foo
-""")
+""")  # noqa: W291,W293
         self.assertEqual(b'foo\n', c.message)
         self.assertEqual([], c.extra)
         self.assertEqual(b"""-----BEGIN PGP SIGNATURE-----
@@ -686,7 +702,7 @@ gpgsig -----BEGIN PGP SIGNATURE-----
  
 
 3.3.0 version bump and docs
-''')
+''')  # noqa: W291,W293
         self.assertEqual([], c.extra)
         self.assertEqual(b'''\
 -----BEGIN PGP SIGNATURE-----
@@ -710,7 +726,8 @@ _TREE_ITEMS = {
 _SORTED_TREE_ITEMS = [
     TreeEntry(b'a.c', 0o100755, b'd80c186a03f423a81b39df39dc87fd269736ca86'),
     TreeEntry(b'a', stat.S_IFDIR, b'd80c186a03f423a81b39df39dc87fd269736ca86'),
-    TreeEntry(b'a/c', stat.S_IFDIR, b'd80c186a03f423a81b39df39dc87fd269736ca86'),
+    TreeEntry(b'a/c', stat.S_IFDIR,
+              b'd80c186a03f423a81b39df39dc87fd269736ca86'),
 ]
 
 
@@ -721,7 +738,8 @@ class TreeTests(ShaFileCheckTests):
         x = Tree()
         x.add(b'myname', 0o100755, myhexsha)
         self.assertEqual(x[b'myname'], (0o100755, myhexsha))
-        self.assertEqual(b'100755 myname\0' + hex_to_sha(myhexsha),
+        self.assertEqual(
+                b'100755 myname\0' + hex_to_sha(myhexsha),
                 x.as_raw_string())
 
     def test_add_old_order(self):
@@ -793,7 +811,8 @@ class TreeTests(ShaFileCheckTests):
         # C/Python implementations may differ in specific error types, but
         # should all error on invalid inputs.
         # For example, the C implementation has stricter type checks, so may
-        # raise TypeError where the Python implementation raises AttributeError.
+        # raise TypeError where the Python implementation raises
+        # AttributeError.
         errors = (TypeError, ValueError, AttributeError)
         self.assertRaises(errors, do_sort, b'foo')
         self.assertRaises(errors, do_sort, {b'foo': (1, 2, 3)})
@@ -846,12 +865,15 @@ class TreeTests(ShaFileCheckTests):
         # shas
         self.assertCheckFails(t, b'100644 a\0' + (b'x' * 5))
         self.assertCheckFails(t, b'100644 a\0' + (b'x' * 18) + b'\0')
-        self.assertCheckFails(t, b'100644 a\0' + (b'x' * 21) + b'\n100644 b\0' + sha)
+        self.assertCheckFails(
+                t, b'100644 a\0' + (b'x' * 21) + b'\n100644 b\0' + sha)
 
         # ordering
         sha2 = hex_to_sha(b_sha)
-        self.assertCheckSucceeds(t, b'100644 a\0' + sha + b'\n100644 b\0' + sha)
-        self.assertCheckSucceeds(t, b'100644 a\0' + sha + b'\n100644 b\0' + sha2)
+        self.assertCheckSucceeds(
+                t, b'100644 a\0' + sha + b'\n100644 b\0' + sha)
+        self.assertCheckSucceeds(
+                t, b'100644 a\0' + sha + b'\n100644 b\0' + sha2)
         self.assertCheckFails(t, b'100644 a\0' + sha + b'\n100755 a\0' + sha2)
         self.assertCheckFails(t, b'100644 b\0' + sha2 + b'\n100644 a\0' + sha)
 
@@ -864,13 +886,14 @@ class TreeTests(ShaFileCheckTests):
 class TagSerializeTests(TestCase):
 
     def test_serialize_simple(self):
-        x = make_object(Tag,
-                        tagger=b'Jelmer Vernooij <jelmer@samba.org>',
-                        name=b'0.1',
-                        message=b'Tag 0.1',
-                        object=(Blob, b'd80c186a03f423a81b39df39dc87fd269736ca86'),
-                        tag_time=423423423,
-                        tag_timezone=0)
+        x = make_object(
+            Tag,
+            tagger=b'Jelmer Vernooij <jelmer@samba.org>',
+            name=b'0.1',
+            message=b'Tag 0.1',
+            object=(Blob, b'd80c186a03f423a81b39df39dc87fd269736ca86'),
+            tag_time=423423423,
+            tag_timezone=0)
         self.assertEqual((b'object d80c186a03f423a81b39df39dc87fd269736ca86\n'
                           b'type blob\n'
                           b'tag 0.1\n'
@@ -880,13 +903,14 @@ class TagSerializeTests(TestCase):
                           b'Tag 0.1'), x.as_raw_string())
 
     def test_serialize_none_message(self):
-        x = make_object(Tag,
-                        tagger=b'Jelmer Vernooij <jelmer@samba.org>',
-                        name=b'0.1',
-                        message=None,
-                        object=(Blob, b'd80c186a03f423a81b39df39dc87fd269736ca86'),
-                        tag_time=423423423,
-                        tag_timezone=0)
+        x = make_object(
+            Tag,
+            tagger=b'Jelmer Vernooij <jelmer@samba.org>',
+            name=b'0.1',
+            message=None,
+            object=(Blob, b'd80c186a03f423a81b39df39dc87fd269736ca86'),
+            tag_time=423423423,
+            tag_timezone=0)
         self.assertEqual((b'object d80c186a03f423a81b39df39dc87fd269736ca86\n'
                           b'type blob\n'
                           b'tag 0.1\n'
@@ -943,7 +967,7 @@ class TagParseTests(ShaFileCheckTests):
                          object_sha)
         self.assertEqual(Commit, object_type)
         self.assertEqual(datetime.datetime.utcfromtimestamp(x.tag_time),
-                          datetime.datetime(2007, 7, 1, 19, 54, 34))
+                         datetime.datetime(2007, 7, 1, 19, 54, 34))
         self.assertEqual(-25200, x.tag_timezone)
 
     def test_parse_no_tagger(self):
@@ -960,7 +984,7 @@ class TagParseTests(ShaFileCheckTests):
         self.assertEqual(
             b'Linus Torvalds <torvalds@woody.linux-foundation.org>', x.tagger)
         self.assertEqual(datetime.datetime.utcfromtimestamp(x.tag_time),
-                          datetime.datetime(2007, 7, 1, 19, 54, 34))
+                         datetime.datetime(2007, 7, 1, 19, 54, 34))
         self.assertEqual(-25200, x.tag_timezone)
         self.assertEqual(b'v2.6.22-rc7', x.name)
 
@@ -1134,12 +1158,12 @@ class ShaFileCopyTests(TestCase):
 
 
 class ShaFileSerializeTests(TestCase):
-    """
-    Test that `ShaFile` objects only gets serialized once if they haven't changed.
+    """`ShaFile` objects only gets serialized once if they haven't changed.
     """
 
     @contextmanager
-    def assert_serialization_on_change(self, obj, needs_serialization_after_change=True):
+    def assert_serialization_on_change(
+            self, obj, needs_serialization_after_change=True):
         old_id = obj.id
         self.assertFalse(obj._needs_serialization)
 
@@ -1172,7 +1196,8 @@ class ShaFileSerializeTests(TestCase):
     def test_blob_serialize(self):
         blob = make_object(Blob, data=b'i am a blob')
 
-        with self.assert_serialization_on_change(blob, needs_serialization_after_change=False):
+        with self.assert_serialization_on_change(
+                blob, needs_serialization_after_change=False):
             blob.data = b'i am another blob'
 
     def test_tree_serialize(self):
@@ -1199,6 +1224,6 @@ class PrettyFormatTreeEntryTests(TestCase):
     def test_format(self):
         self.assertEqual(
                 '40000 tree 40820c38cfb182ce6c8b261555410d8382a5918b\tfoo\n',
-                pretty_format_tree_entry(b"foo", 0o40000,
+                pretty_format_tree_entry(
+                    b"foo", 0o40000,
                     b"40820c38cfb182ce6c8b261555410d8382a5918b"))
-

+ 27 - 11
dulwich/tests/test_objectspec.py

@@ -33,6 +33,7 @@ from dulwich.objectspec import (
     parse_refs,
     parse_reftuple,
     parse_reftuples,
+    parse_tree,
     )
 from dulwich.repo import MemoryRepo
 from dulwich.tests import (
@@ -66,8 +67,8 @@ class ParseCommitRangeTests(TestCase):
 
     def test_commit_by_sha(self):
         r = MemoryRepo()
-        c1, c2, c3 = build_commit_graph(r.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                r.object_store, [[1], [2, 1], [3, 1, 2]])
         self.assertEqual([c1], list(parse_commit_range(r, c1.id)))
 
 
@@ -152,26 +153,26 @@ class ParseReftupleTests(TestCase):
     def test_head(self):
         r = {b"refs/heads/foo": "bla"}
         self.assertEqual((b"refs/heads/foo", b"refs/heads/foo", False),
-            parse_reftuple(r, r, b"foo"))
+                         parse_reftuple(r, r, b"foo"))
         self.assertEqual((b"refs/heads/foo", b"refs/heads/foo", True),
-            parse_reftuple(r, r, b"+foo"))
+                         parse_reftuple(r, r, b"+foo"))
         self.assertEqual((b"refs/heads/foo", b"refs/heads/foo", True),
-            parse_reftuple(r, {}, b"+foo"))
+                         parse_reftuple(r, {}, b"+foo"))
 
     def test_full(self):
         r = {b"refs/heads/foo": "bla"}
         self.assertEqual((b"refs/heads/foo", b"refs/heads/foo", False),
-            parse_reftuple(r, r, b"refs/heads/foo"))
+                         parse_reftuple(r, r, b"refs/heads/foo"))
 
     def test_no_left_ref(self):
         r = {b"refs/heads/foo": "bla"}
         self.assertEqual((None, b"refs/heads/foo", False),
-            parse_reftuple(r, r, b":refs/heads/foo"))
+                         parse_reftuple(r, r, b":refs/heads/foo"))
 
     def test_no_right_ref(self):
         r = {b"refs/heads/foo": "bla"}
         self.assertEqual((b"refs/heads/foo", None, False),
-            parse_reftuple(r, r, b"refs/heads/foo:"))
+                         parse_reftuple(r, r, b"refs/heads/foo:"))
 
 
 class ParseReftuplesTests(TestCase):
@@ -179,14 +180,29 @@ class ParseReftuplesTests(TestCase):
     def test_nonexistent(self):
         r = {}
         self.assertRaises(KeyError, parse_reftuples, r, r,
-            [b"thisdoesnotexist"])
+                          [b"thisdoesnotexist"])
 
     def test_head(self):
         r = {b"refs/heads/foo": "bla"}
         self.assertEqual([(b"refs/heads/foo", b"refs/heads/foo", False)],
-            parse_reftuples(r, r, [b"foo"]))
+                         parse_reftuples(r, r, [b"foo"]))
 
     def test_full(self):
         r = {b"refs/heads/foo": "bla"}
         self.assertEqual([(b"refs/heads/foo", b"refs/heads/foo", False)],
-            parse_reftuples(r, r, b"refs/heads/foo"))
+                         parse_reftuples(r, r, b"refs/heads/foo"))
+
+
+class ParseTreeTests(TestCase):
+    """Test parse_tree."""
+
+    def test_nonexistent(self):
+        r = MemoryRepo()
+        self.assertRaises(KeyError, parse_tree, r, "thisdoesnotexist")
+
+    def test_from_commit(self):
+        r = MemoryRepo()
+        c1, c2, c3 = build_commit_graph(
+                r.object_store, [[1], [2, 1], [3, 1, 2]])
+        self.assertEqual(r[c1.tree], parse_tree(r, c1.id))
+        self.assertEqual(r[c1.tree], parse_tree(r, c1.tree))

+ 57 - 37
dulwich/tests/test_pack.py

@@ -93,19 +93,24 @@ class PackTests(TestCase):
         self.tempdir = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, self.tempdir)
 
-    datadir = os.path.abspath(os.path.join(os.path.dirname(__file__),
-        'data/packs'))
+    datadir = os.path.abspath(
+            os.path.join(os.path.dirname(__file__), 'data/packs'))
 
     def get_pack_index(self, sha):
         """Returns a PackIndex from the datadir with the given sha"""
-        return load_pack_index(os.path.join(self.datadir, 'pack-%s.idx' % sha.decode('ascii')))
+        return load_pack_index(
+                os.path.join(self.datadir,
+                             'pack-%s.idx' % sha.decode('ascii')))
 
     def get_pack_data(self, sha):
         """Returns a PackData object from the datadir with the given sha"""
-        return PackData(os.path.join(self.datadir, 'pack-%s.pack' % sha.decode('ascii')))
+        return PackData(
+                os.path.join(
+                        self.datadir, 'pack-%s.pack' % sha.decode('ascii')))
 
     def get_pack(self, sha):
-        return Pack(os.path.join(self.datadir, 'pack-%s' % sha.decode('ascii')))
+        return Pack(
+                os.path.join(self.datadir, 'pack-%s' % sha.decode('ascii')))
 
     def assertSucceeds(self, func, *args, **kwargs):
         try:
@@ -165,8 +170,9 @@ class TestPackDeltas(TestCase):
     test_string_huge = b'Z' * 100000
 
     def _test_roundtrip(self, base, target):
-        self.assertEqual(target,
-                          b''.join(apply_delta(base, create_delta(base, target))))
+        self.assertEqual(
+                target,
+                b''.join(apply_delta(base, create_delta(base, target))))
 
     def test_nochange(self):
         self._test_roundtrip(self.test_string1, self.test_string1)
@@ -191,9 +197,9 @@ class TestPackDeltas(TestCase):
                              self.test_string_huge + self.test_string2)
 
     def test_dest_overflow(self):
-        self.assertRaises(
-            ApplyDeltaError,
-            apply_delta, b'a'*0x10000, b'\x80\x80\x04\x80\x80\x04\x80' + b'a'*0x10000)
+        self.assertRaises(ApplyDeltaError, apply_delta,
+                          b'a'*0x10000, b'\x80\x80\x04\x80\x80\x04\x80' +
+                          b'a'*0x10000)
         self.assertRaises(
             ApplyDeltaError,
             apply_delta, b'', b'\x00\x80\x02\xb0\x11\x11')
@@ -204,12 +210,16 @@ class TestPackDeltas(TestCase):
         chunks = [
             b'tree 03207ccf58880a748188836155ceed72f03d65d6\n'
             b'parent 408fbab530fd4abe49249a636a10f10f44d07a21\n'
-            b'author Victor Stinner <victor.stinner@gmail.com> 1421355207 +0100\n'
-            b'committer Victor Stinner <victor.stinner@gmail.com> 1421355207 +0100\n'
+            b'author Victor Stinner <victor.stinner@gmail.com> '
+            b'1421355207 +0100\n'
+            b'committer Victor Stinner <victor.stinner@gmail.com> '
+            b'1421355207 +0100\n'
             b'\n'
             b'Backout changeset 3a06020af8cf\n'
-            b'\nStreamWriter: close() now clears the reference to the transport\n'
-            b'\nStreamWriter now raises an exception if it is closed: write(), writelines(),\n'
+            b'\nStreamWriter: close() now clears the reference to the '
+            b'transport\n'
+            b'\nStreamWriter now raises an exception if it is closed: '
+            b'write(), writelines(),\n'
             b'write_eof(), can_write_eof(), get_extra_info(), drain().\n']
         delta = [
             b'\xcd\x03\xad\x03]tree ff3c181a393d5a7270cddc01ea863818a8621ca8\n'
@@ -221,10 +231,13 @@ class TestPackDeltas(TestCase):
             b'parent 20a103cc90135494162e819f98d0edfc1f1fba6b',
             b'\nauthor Victor Stinner <victor.stinner@gmail.com> 14213',
             b'10738',
-            b' +0100\ncommitter Victor Stinner <victor.stinner@gmail.com> 14213',
+            b' +0100\ncommitter Victor Stinner <victor.stinner@gmail.com> '
+            b'14213',
             b'10738 +0100',
-            b'\n\nStreamWriter: close() now clears the reference to the transport\n\n'
-            b'StreamWriter now raises an exception if it is closed: write(), writelines(),\n'
+            b'\n\nStreamWriter: close() now clears the reference to the '
+            b'transport\n\n'
+            b'StreamWriter now raises an exception if it is closed: '
+            b'write(), writelines(),\n'
             b'write_eof(), can_write_eof(), get_extra_info(), drain().\n']
         self.assertEqual(b''.join(expected), b''.join(res))
 
@@ -236,7 +249,8 @@ class TestPackData(PackTests):
         self.get_pack_data(pack1_sha).close()
 
     def test_from_file(self):
-        path = os.path.join(self.datadir, 'pack-%s.pack' % pack1_sha.decode('ascii'))
+        path = os.path.join(self.datadir,
+                            'pack-%s.pack' % pack1_sha.decode('ascii'))
         with open(path, 'rb') as f:
             PackData.from_file(f, os.path.getsize(path))
 
@@ -250,13 +264,14 @@ class TestPackData(PackTests):
 
     def test_iterobjects(self):
         with self.get_pack_data(pack1_sha) as p:
-            commit_data = (b'tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n'
-                           b'author James Westby <jw+debian@jameswestby.net> '
-                           b'1174945067 +0100\n'
-                           b'committer James Westby <jw+debian@jameswestby.net> '
-                           b'1174945067 +0100\n'
-                           b'\n'
-                           b'Test commit\n')
+            commit_data = (
+                    b'tree b2a2766a2879c209ab1176e7e778b81ae422eeaa\n'
+                    b'author James Westby <jw+debian@jameswestby.net> '
+                    b'1174945067 +0100\n'
+                    b'committer James Westby <jw+debian@jameswestby.net> '
+                    b'1174945067 +0100\n'
+                    b'\n'
+                    b'Test commit\n')
             blob_sha = b'6f670c0fb53f9463760b7295fbb814e965fb20c8'
             tree_data = b'100644 a\0' + hex_to_sha(blob_sha)
             actual = []
@@ -312,7 +327,7 @@ class TestPackData(PackTests):
         self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=-20)
         self.assertRaises(AssertionError, compute_file_sha, f, end_ofs=20)
         self.assertRaises(AssertionError, compute_file_sha, f, start_ofs=10,
-            end_ofs=-12)
+                          end_ofs=-12)
 
 
 class TestPack(PackTests):
@@ -341,7 +356,8 @@ class TestPack(PackTests):
     def test_pack_tuples(self):
         with self.get_pack(pack1_sha) as p:
             tuples = p.pack_tuples()
-            expected = set([(p[s], None) for s in [commit_sha, tree_sha, a_sha]])
+            expected = set(
+                    [(p[s], None) for s in [commit_sha, tree_sha, a_sha]])
             self.assertEqual(expected, set(list(tuples)))
             self.assertEqual(expected, set(list(tuples)))
             self.assertEqual(3, len(tuples))
@@ -370,7 +386,7 @@ class TestPack(PackTests):
                 self.assertSucceeds(newpack.index.check)
                 self.assertEqual(origpack.name(), newpack.name())
                 self.assertEqual(origpack.index.get_pack_checksum(),
-                                  newpack.index.get_pack_checksum())
+                                 newpack.index.get_pack_checksum())
 
                 wrong_version = origpack.index.version != newpack.index.version
                 orig_checksum = origpack.index.get_stored_checksum()
@@ -577,10 +593,10 @@ class BaseTestPackIndexWriting(object):
         entry1_sha = hex_to_sha('4e6388232ec39792661e2e75db8fb117fc869ce6')
         entry2_sha = hex_to_sha('e98f071751bd77f59967bfa671cd2caebdccc9a2')
         entries = [(entry1_sha, 0xf2972d0830529b87, 24),
-                   (entry2_sha, (~0xf2972d0830529b87)&(2**64-1), 92)]
+                   (entry2_sha, (~0xf2972d0830529b87) & (2 ** 64 - 1), 92)]
         if not self._supports_large:
             self.assertRaises(TypeError, self.index, 'single.idx',
-                entries, pack_checksum)
+                              entries, pack_checksum)
             return
         idx = self.index('single.idx', entries, pack_checksum)
         self.assertEqual(idx.get_pack_checksum(), pack_checksum)
@@ -697,7 +713,8 @@ class ReadZlibTests(TestCase):
     def setUp(self):
         super(ReadZlibTests, self).setUp()
         self.read = BytesIO(self.comp + self.extra).read
-        self.unpacked = UnpackedObject(Tree.type_num, None, len(self.decomp), 0)
+        self.unpacked = UnpackedObject(
+                Tree.type_num, None, len(self.decomp), 0)
 
     def test_decompress_size(self):
         good_decomp_len = len(self.decomp)
@@ -963,7 +980,8 @@ class DeltaChainIteratorTests(TestCase):
         n = 100
         objects_spec = [(Blob.type_num, b'blob')]
         for i in range(n):
-            objects_spec.append((OFS_DELTA, (i, b'blob' + str(i).encode('ascii'))))
+            objects_spec.append(
+                    (OFS_DELTA, (i, b'blob' + str(i).encode('ascii'))))
         f = BytesIO()
         entries = build_pack(f, objects_spec)
         self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
@@ -972,7 +990,8 @@ class DeltaChainIteratorTests(TestCase):
         n = 100
         objects_spec = [(Blob.type_num, b'blob')]
         for i in range(n):
-            objects_spec.append((OFS_DELTA, (0, b'blob' + str(i).encode('ascii'))))
+            objects_spec.append(
+                    (OFS_DELTA, (0, b'blob' + str(i).encode('ascii'))))
         f = BytesIO()
         entries = build_pack(f, objects_spec)
         self.assertEntriesMatch(range(n + 1), entries, self.make_pack_iter(f))
@@ -1039,8 +1058,7 @@ class DeltaChainIteratorTests(TestCase):
     def test_bad_ext_ref_non_thin_pack(self):
         blob, = self.store_blobs([b'blob'])
         f = BytesIO()
-        entries = build_pack(f, [(REF_DELTA, (blob.id, b'blob1'))],
-                             store=self.store)
+        build_pack(f, [(REF_DELTA, (blob.id, b'blob1'))], store=self.store)
         pack_iter = self.make_pack_iter(f, thin=False)
         try:
             list(pack_iter._walk_all_chains())
@@ -1082,5 +1100,7 @@ class EncodeCopyOperationTests(TestCase):
     def test_basic(self):
         self.assertEqual(b'\x80', _encode_copy_operation(0, 0))
         self.assertEqual(b'\x91\x01\x0a', _encode_copy_operation(1, 10))
-        self.assertEqual(b'\xb1\x64\xe8\x03', _encode_copy_operation(100, 1000))
-        self.assertEqual(b'\x93\xe8\x03\x01', _encode_copy_operation(1000, 1))
+        self.assertEqual(b'\xb1\x64\xe8\x03',
+                         _encode_copy_operation(100, 1000))
+        self.assertEqual(b'\x93\xe8\x03\x01',
+                         _encode_copy_operation(1000, 1))

+ 88 - 46
dulwich/tests/test_patch.py

@@ -57,7 +57,8 @@ class WriteCommitPatchTests(TestCase):
         write_commit_patch(f, c, b"CONTENTS", (1, 1), version="custom")
         f.seek(0)
         lines = f.readlines()
-        self.assertTrue(lines[0].startswith(b"From 0b0d34d1b5b596c928adc9a727a4b9e03d025298"))
+        self.assertTrue(lines[0].startswith(
+                    b"From 0b0d34d1b5b596c928adc9a727a4b9e03d025298"))
         self.assertEqual(lines[1], b"From: Jelmer <jelmer@samba.org>\n")
         self.assertTrue(lines[2].startswith(b"Date: "))
         self.assertEqual([
@@ -77,10 +78,11 @@ class WriteCommitPatchTests(TestCase):
 class ReadGitAmPatch(TestCase):
 
     def test_extract_string(self):
-        text = b"""From ff643aae102d8870cac88e8f007e70f58f3a7363 Mon Sep 17 00:00:00 2001
+        text = b"""\
+From ff643aae102d8870cac88e8f007e70f58f3a7363 Mon Sep 17 00:00:00 2001
 From: Jelmer Vernooij <jelmer@samba.org>
 Date: Thu, 15 Apr 2010 15:40:28 +0200
-Subject: [PATCH 1/2] Remove executable bit from prey.ico (triggers a lintian warning).
+Subject: [PATCH 1/2] Remove executable bit from prey.ico (triggers a warning).
 
 ---
  pixmaps/prey.ico |  Bin 9662 -> 9662 bytes
@@ -89,12 +91,13 @@ Subject: [PATCH 1/2] Remove executable bit from prey.ico (triggers a lintian war
 
 -- 
 1.7.0.4
-"""
-        c, diff, version = git_am_patch_split(StringIO(text.decode("utf-8")), "utf-8")
+"""  # noqa: W291
+        c, diff, version = git_am_patch_split(
+                StringIO(text.decode("utf-8")), "utf-8")
         self.assertEqual(b"Jelmer Vernooij <jelmer@samba.org>", c.committer)
         self.assertEqual(b"Jelmer Vernooij <jelmer@samba.org>", c.author)
         self.assertEqual(b"Remove executable bit from prey.ico "
-            b"(triggers a lintian warning).\n", c.message)
+                         b"(triggers a warning).\n", c.message)
         self.assertEqual(b""" pixmaps/prey.ico |  Bin 9662 -> 9662 bytes
  1 files changed, 0 insertions(+), 0 deletions(-)
  mode change 100755 => 100644 pixmaps/prey.ico
@@ -103,10 +106,11 @@ Subject: [PATCH 1/2] Remove executable bit from prey.ico (triggers a lintian war
         self.assertEqual(b"1.7.0.4", version)
 
     def test_extract_bytes(self):
-        text = b"""From ff643aae102d8870cac88e8f007e70f58f3a7363 Mon Sep 17 00:00:00 2001
+        text = b"""\
+From ff643aae102d8870cac88e8f007e70f58f3a7363 Mon Sep 17 00:00:00 2001
 From: Jelmer Vernooij <jelmer@samba.org>
 Date: Thu, 15 Apr 2010 15:40:28 +0200
-Subject: [PATCH 1/2] Remove executable bit from prey.ico (triggers a lintian warning).
+Subject: [PATCH 1/2] Remove executable bit from prey.ico (triggers a warning).
 
 ---
  pixmaps/prey.ico |  Bin 9662 -> 9662 bytes
@@ -115,12 +119,12 @@ Subject: [PATCH 1/2] Remove executable bit from prey.ico (triggers a lintian war
 
 -- 
 1.7.0.4
-"""
+"""  # noqa: W291
         c, diff, version = git_am_patch_split(BytesIO(text))
         self.assertEqual(b"Jelmer Vernooij <jelmer@samba.org>", c.committer)
         self.assertEqual(b"Jelmer Vernooij <jelmer@samba.org>", c.author)
         self.assertEqual(b"Remove executable bit from prey.ico "
-            b"(triggers a lintian warning).\n", c.message)
+                         b"(triggers a warning).\n", c.message)
         self.assertEqual(b""" pixmaps/prey.ico |  Bin 9662 -> 9662 bytes
  1 files changed, 0 insertions(+), 0 deletions(-)
  mode change 100755 => 100644 pixmaps/prey.ico
@@ -145,9 +149,15 @@ Subject:  [Dulwich-users] [PATCH] Added unit tests for
 
 -- 
 1.7.0.4
-"""
+"""  # noqa: W291
         c, diff, version = git_am_patch_split(BytesIO(text), "utf-8")
-        self.assertEqual(b'Added unit tests for dulwich.object_store.tree_lookup_path.\n\n* dulwich/tests/test_object_store.py\n  (TreeLookupPathTests): This test case contains a few tests that ensure the\n   tree_lookup_path function works as expected.\n', c.message)
+        self.assertEqual(b'''\
+Added unit tests for dulwich.object_store.tree_lookup_path.
+
+* dulwich/tests/test_object_store.py
+  (TreeLookupPathTests): This test case contains a few tests that ensure the
+   tree_lookup_path function works as expected.
+''', c.message)
 
     def test_extract_pseudo_from_header(self):
         text = b"""From ff643aae102d8870cac88e8f007e70f58f3a7363 Mon Sep 17 00:00:00 2001
@@ -168,13 +178,20 @@ From: Jelmer Vernooy <jelmer@debian.org>
 
 -- 
 1.7.0.4
-"""
+"""  # noqa: W291
         c, diff, version = git_am_patch_split(BytesIO(text), "utf-8")
         self.assertEqual(b"Jelmer Vernooy <jelmer@debian.org>", c.author)
-        self.assertEqual(b'Added unit tests for dulwich.object_store.tree_lookup_path.\n\n* dulwich/tests/test_object_store.py\n  (TreeLookupPathTests): This test case contains a few tests that ensure the\n   tree_lookup_path function works as expected.\n', c.message)
+        self.assertEqual(b'''\
+Added unit tests for dulwich.object_store.tree_lookup_path.
+
+* dulwich/tests/test_object_store.py
+  (TreeLookupPathTests): This test case contains a few tests that ensure the
+   tree_lookup_path function works as expected.
+''', c.message)
 
     def test_extract_no_version_tail(self):
-        text = b"""From ff643aae102d8870cac88e8f007e70f58f3a7363 Mon Sep 17 00:00:00 2001
+        text = b"""\
+From ff643aae102d8870cac88e8f007e70f58f3a7363 Mon Sep 17 00:00:00 2001
 From: Jelmer Vernooij <jelmer@samba.org>
 Date: Thu, 15 Apr 2010 15:40:28 +0200
 Subject:  [Dulwich-users] [PATCH] Added unit tests for
@@ -192,8 +209,11 @@ From: Jelmer Vernooy <jelmer@debian.org>
         self.assertEqual(None, version)
 
     def test_extract_mercurial(self):
-        raise SkipTest("git_am_patch_split doesn't handle Mercurial patches properly yet")
-        expected_diff = """diff --git a/dulwich/tests/test_patch.py b/dulwich/tests/test_patch.py
+        raise SkipTest(
+                "git_am_patch_split doesn't handle Mercurial patches "
+                "properly yet")
+        expected_diff = """\
+diff --git a/dulwich/tests/test_patch.py b/dulwich/tests/test_patch.py
 --- a/dulwich/tests/test_patch.py
 +++ b/dulwich/tests/test_patch.py
 @@ -158,7 +158,7 @@
@@ -205,8 +225,10 @@ From: Jelmer Vernooy <jelmer@debian.org>
  
  
  class DiffTests(TestCase):
-"""
-        text = """From dulwich-users-bounces+jelmer=samba.org@lists.launchpad.net Mon Nov 29 00:58:18 2010
+"""  # noqa: W291,W293
+        text = """\
+From dulwich-users-bounces+jelmer=samba.org@lists.launchpad.net \
+Mon Nov 29 00:58:18 2010
 Date: Sun, 28 Nov 2010 17:57:27 -0600
 From: Augie Fackler <durin42@gmail.com>
 To: dulwich-users <dulwich-users@lists.launchpad.net>
@@ -223,7 +245,7 @@ Post to     : dulwich-users@lists.launchpad.net
 Unsubscribe : https://launchpad.net/~dulwich-users
 More help   : https://help.launchpad.net/ListHelp
 
-""" % expected_diff
+""" % expected_diff  # noqa: W291
         c, diff, version = git_am_patch_split(BytesIO(text))
         self.assertEqual(expected_diff, diff)
         self.assertEqual(None, version)
@@ -234,8 +256,9 @@ class DiffTests(TestCase):
 
     def test_blob_diff(self):
         f = BytesIO()
-        write_blob_diff(f, (b"foo.txt", 0o644, Blob.from_string(b"old\nsame\n")),
-                           (b"bar.txt", 0o644, Blob.from_string(b"new\nsame\n")))
+        write_blob_diff(
+            f, (b"foo.txt", 0o644, Blob.from_string(b"old\nsame\n")),
+            (b"bar.txt", 0o644, Blob.from_string(b"new\nsame\n")))
         self.assertEqual([
             b"diff --git a/foo.txt b/bar.txt",
             b"index 3b0f961..a116b51 644",
@@ -249,8 +272,9 @@ class DiffTests(TestCase):
 
     def test_blob_add(self):
         f = BytesIO()
-        write_blob_diff(f, (None, None, None),
-                           (b"bar.txt", 0o644, Blob.from_string(b"new\nsame\n")))
+        write_blob_diff(
+            f, (None, None, None),
+            (b"bar.txt", 0o644, Blob.from_string(b"new\nsame\n")))
         self.assertEqual([
              b'diff --git /dev/null b/bar.txt',
              b'new mode 644',
@@ -264,8 +288,9 @@ class DiffTests(TestCase):
 
     def test_blob_remove(self):
         f = BytesIO()
-        write_blob_diff(f, (b"bar.txt", 0o644, Blob.from_string(b"new\nsame\n")),
-                           (None, None, None))
+        write_blob_diff(
+            f, (b"bar.txt", 0o644, Blob.from_string(b"new\nsame\n")),
+            (None, None, None))
         self.assertEqual([
             b'diff --git a/bar.txt /dev/null',
             b'deleted mode 644',
@@ -326,10 +351,10 @@ class DiffTests(TestCase):
         store = MemoryObjectStore()
         tree1 = Tree()
         tree1.add(b"asubmodule", S_IFGITLINK,
-            b"06d0bdd9e2e20377b3180e4986b14c8549b393e4")
+                  b"06d0bdd9e2e20377b3180e4986b14c8549b393e4")
         tree2 = Tree()
         tree2.add(b"asubmodule", S_IFGITLINK,
-            b"cc975646af69f279396d4d5e1379ac6af80ee637")
+                  b"cc975646af69f279396d4d5e1379ac6af80ee637")
         store.add_objects([(o, None) for o in [tree1, tree2]])
         write_tree_diff(f, store, tree1.id, tree2.id)
         self.assertEqual([
@@ -401,15 +426,20 @@ class DiffTests(TestCase):
         f = BytesIO()
         # Prepare two slightly different PNG headers
         b1 = Blob.from_string(
-            b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52"
-            b"\x00\x00\x01\xd5\x00\x00\x00\x9f\x08\x04\x00\x00\x00\x05\x04\x8b")
+            b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
+            b"\x00\x00\x00\x0d\x49\x48\x44\x52"
+            b"\x00\x00\x01\xd5\x00\x00\x00\x9f"
+            b"\x08\x04\x00\x00\x00\x05\x04\x8b")
         b2 = Blob.from_string(
-            b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52"
-            b"\x00\x00\x01\xd5\x00\x00\x00\x9f\x08\x03\x00\x00\x00\x98\xd3\xb3")
+            b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
+            b"\x00\x00\x00\x0d\x49\x48\x44\x52"
+            b"\x00\x00\x01\xd5\x00\x00\x00\x9f"
+            b"\x08\x03\x00\x00\x00\x98\xd3\xb3")
         store = MemoryObjectStore()
         store.add_objects([(b1, None), (b2, None)])
-        write_object_diff(f, store, (b'foo.png', 0o644, b1.id),
-                                    (b'bar.png', 0o644, b2.id), diff_binary=True)
+        write_object_diff(
+            f, store, (b'foo.png', 0o644, b1.id),
+            (b'bar.png', 0o644, b2.id), diff_binary=True)
         self.assertEqual([
             b'diff --git a/foo.png b/bar.png',
             b'index f73e47d..06364b7 644',
@@ -419,9 +449,11 @@ class DiffTests(TestCase):
             b' \x89PNG',
             b' \x1a',
             b' \x00\x00\x00',
-            b'-IHDR\x00\x00\x01\xd5\x00\x00\x00\x9f\x08\x04\x00\x00\x00\x05\x04\x8b',
+            b'-IHDR\x00\x00\x01\xd5\x00\x00\x00'
+            b'\x9f\x08\x04\x00\x00\x00\x05\x04\x8b',
             b'\\ No newline at end of file',
-            b'+IHDR\x00\x00\x01\xd5\x00\x00\x00\x9f\x08\x03\x00\x00\x00\x98\xd3\xb3',
+            b'+IHDR\x00\x00\x01\xd5\x00\x00\x00\x9f'
+            b'\x08\x03\x00\x00\x00\x98\xd3\xb3',
             b'\\ No newline at end of file'
             ], f.getvalue().splitlines())
 
@@ -429,11 +461,15 @@ class DiffTests(TestCase):
         f = BytesIO()
         # Prepare two slightly different PNG headers
         b1 = Blob.from_string(
-            b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52"
-            b"\x00\x00\x01\xd5\x00\x00\x00\x9f\x08\x04\x00\x00\x00\x05\x04\x8b")
+            b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
+            b"\x00\x00\x00\x0d\x49\x48\x44\x52"
+            b"\x00\x00\x01\xd5\x00\x00\x00\x9f"
+            b"\x08\x04\x00\x00\x00\x05\x04\x8b")
         b2 = Blob.from_string(
-            b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52"
-            b"\x00\x00\x01\xd5\x00\x00\x00\x9f\x08\x03\x00\x00\x00\x98\xd3\xb3")
+            b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a"
+            b"\x00\x00\x00\x0d\x49\x48\x44\x52"
+            b"\x00\x00\x01\xd5\x00\x00\x00\x9f"
+            b"\x08\x03\x00\x00\x00\x98\xd3\xb3")
         store = MemoryObjectStore()
         store.add_objects([(b1, None), (b2, None)])
         write_object_diff(f, store, (b'foo.png', 0o644, b1.id),
@@ -447,8 +483,10 @@ class DiffTests(TestCase):
     def test_object_diff_add_bin_blob(self):
         f = BytesIO()
         b2 = Blob.from_string(
-            b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52'
-            b'\x00\x00\x01\xd5\x00\x00\x00\x9f\x08\x03\x00\x00\x00\x98\xd3\xb3')
+            b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a'
+            b'\x00\x00\x00\x0d\x49\x48\x44\x52'
+            b'\x00\x00\x01\xd5\x00\x00\x00\x9f'
+            b'\x08\x03\x00\x00\x00\x98\xd3\xb3')
         store = MemoryObjectStore()
         store.add_object(b2)
         write_object_diff(f, store, (None, None, None),
@@ -463,8 +501,10 @@ class DiffTests(TestCase):
     def test_object_diff_remove_bin_blob(self):
         f = BytesIO()
         b1 = Blob.from_string(
-            b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52'
-            b'\x00\x00\x01\xd5\x00\x00\x00\x9f\x08\x04\x00\x00\x00\x05\x04\x8b')
+            b'\x89\x50\x4e\x47\x0d\x0a\x1a\x0a'
+            b'\x00\x00\x00\x0d\x49\x48\x44\x52'
+            b'\x00\x00\x01\xd5\x00\x00\x00\x9f'
+            b'\x08\x04\x00\x00\x00\x05\x04\x8b')
         store = MemoryObjectStore()
         store.add_object(b1)
         write_object_diff(f, store, (b'foo.png', 0o644, b1.id),
@@ -481,8 +521,10 @@ class DiffTests(TestCase):
         b1 = Blob.from_string(b"new\nsame\n")
         store = MemoryObjectStore()
         store.add_object(b1)
-        write_object_diff(f, store, (b"bar.txt", 0o644, b1.id),
-            (b"bar.txt", 0o160000, b"06d0bdd9e2e20377b3180e4986b14c8549b393e4"))
+        write_object_diff(
+            f, store, (b"bar.txt", 0o644, b1.id),
+            (b"bar.txt", 0o160000,
+                b"06d0bdd9e2e20377b3180e4986b14c8549b393e4"))
         self.assertEqual([
             b'diff --git a/bar.txt b/bar.txt',
             b'old mode 644',

+ 329 - 128
dulwich/tests/test_porcelain.py

@@ -45,6 +45,7 @@ from dulwich.tests import (
     )
 from dulwich.tests.utils import (
     build_commit_graph,
+    make_commit,
     make_object,
     )
 
@@ -66,12 +67,13 @@ class ArchiveTests(PorcelainTestCase):
     """Tests for the archive command."""
 
     def test_simple(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"refs/heads/master"] = c3.id
         out = BytesIO()
         err = BytesIO()
         porcelain.archive(self.repo.path, b"refs/heads/master", outstream=out,
-            errstream=err)
+                          errstream=err)
         self.assertEqual(b"", err.getvalue())
         tf = tarfile.TarFile(fileobj=out)
         self.addCleanup(tf.close)
@@ -81,22 +83,24 @@ class ArchiveTests(PorcelainTestCase):
 class UpdateServerInfoTests(PorcelainTestCase):
 
     def test_simple(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"refs/heads/foo"] = c3.id
         porcelain.update_server_info(self.repo.path)
-        self.assertTrue(os.path.exists(os.path.join(self.repo.controldir(),
-            'info', 'refs')))
+        self.assertTrue(os.path.exists(
+                os.path.join(self.repo.controldir(), 'info', 'refs')))
 
 
 class CommitTests(PorcelainTestCase):
 
     def test_custom_author(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"refs/heads/foo"] = c3.id
-        sha = porcelain.commit(self.repo.path, message=b"Some message",
-                author=b"Joe <joe@example.com>", committer=b"Bob <bob@example.com>")
+        sha = porcelain.commit(
+                self.repo.path, message=b"Some message",
+                author=b"Joe <joe@example.com>",
+                committer=b"Bob <bob@example.com>")
         self.assertTrue(isinstance(sha, bytes))
         self.assertEqual(len(sha), 40)
 
@@ -169,10 +173,12 @@ class CloneTests(PorcelainTestCase):
         target_path = tempfile.mkdtemp()
         errstream = BytesIO()
         self.addCleanup(shutil.rmtree, target_path)
-        r = porcelain.clone(self.repo.path, target_path,
-                            bare=True, errstream=errstream)
-        self.assertEqual(r.path, target_path)
-        self.assertRaises(KeyError, Repo(target_path).head)
+        with porcelain.clone(
+                self.repo.path, target_path, bare=True,
+                errstream=errstream) as r:
+            self.assertEqual(r.path, target_path)
+        with Repo(target_path) as r:
+            self.assertRaises(KeyError, r.head)
         self.assertFalse(b'f1' in os.listdir(target_path))
         self.assertFalse(b'f2' in os.listdir(target_path))
 
@@ -187,7 +193,8 @@ class CloneTests(PorcelainTestCase):
         target_path = tempfile.mkdtemp()
         errstream = BytesIO()
         self.addCleanup(shutil.rmtree, target_path)
-        self.assertRaises(ValueError, porcelain.clone, self.repo.path,
+        self.assertRaises(
+            ValueError, porcelain.clone, self.repo.path,
             target_path, checkout=True, bare=True, errstream=errstream)
 
     def test_no_head_no_checkout(self):
@@ -198,10 +205,11 @@ class CloneTests(PorcelainTestCase):
         (c1, ) = build_commit_graph(self.repo.object_store, commit_spec, trees)
         self.repo.refs[b"refs/heads/master"] = c1.id
         target_path = tempfile.mkdtemp()
-        errstream = BytesIO()
         self.addCleanup(shutil.rmtree, target_path)
-        porcelain.clone(self.repo.path, target_path, checkout=True,
-            errstream=errstream)
+        errstream = BytesIO()
+        r = porcelain.clone(
+            self.repo.path, target_path, checkout=True, errstream=errstream)
+        r.close()
 
 
 class InitTests(TestCase):
@@ -220,13 +228,13 @@ class InitTests(TestCase):
 class AddTests(PorcelainTestCase):
 
     def test_add_default_paths(self):
-
         # create a file for initial commit
-        with open(os.path.join(self.repo.path, 'blah'), 'w') as f:
+        fullpath = os.path.join(self.repo.path, 'blah')
+        with open(fullpath, 'w') as f:
             f.write("\n")
-        porcelain.add(repo=self.repo.path, paths=['blah'])
+        porcelain.add(repo=self.repo.path, paths=[fullpath])
         porcelain.commit(repo=self.repo.path, message=b'test',
-            author=b'test', committer=b'test')
+                         author=b'test', committer=b'test')
 
         # Add a second test file and a file in a directory
         with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
@@ -234,18 +242,57 @@ class AddTests(PorcelainTestCase):
         os.mkdir(os.path.join(self.repo.path, 'adir'))
         with open(os.path.join(self.repo.path, 'adir', 'afile'), 'w') as f:
             f.write("\n")
-        porcelain.add(self.repo.path)
+        cwd = os.getcwd()
+        try:
+            os.chdir(self.repo.path)
+            porcelain.add(self.repo.path)
+        finally:
+            os.chdir(cwd)
 
         # Check that foo was added and nothing in .git was modified
         index = self.repo.open_index()
         self.assertEqual(sorted(index), [b'adir/afile', b'blah', b'foo'])
 
+    def test_add_default_paths_subdir(self):
+        os.mkdir(os.path.join(self.repo.path, 'foo'))
+        with open(os.path.join(self.repo.path, 'blah'), 'w') as f:
+            f.write("\n")
+        with open(os.path.join(self.repo.path, 'foo', 'blie'), 'w') as f:
+            f.write("\n")
+
+        cwd = os.getcwd()
+        try:
+            os.chdir(os.path.join(self.repo.path, 'foo'))
+            porcelain.add(repo=self.repo.path)
+            porcelain.commit(repo=self.repo.path, message=b'test',
+                             author=b'test', committer=b'test')
+        finally:
+            os.chdir(cwd)
+
+        index = self.repo.open_index()
+        self.assertEqual(sorted(index), [b'foo/blie'])
+
     def test_add_file(self):
-        with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
+        fullpath = os.path.join(self.repo.path, 'foo')
+        with open(fullpath, 'w') as f:
             f.write("BAR")
-        porcelain.add(self.repo.path, paths=["foo"])
+        porcelain.add(self.repo.path, paths=[fullpath])
         self.assertIn(b"foo", self.repo.open_index())
 
+    def test_add_ignored(self):
+        with open(os.path.join(self.repo.path, '.gitignore'), 'w') as f:
+            f.write("foo")
+        with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
+            f.write("BAR")
+        with open(os.path.join(self.repo.path, 'bar'), 'w') as f:
+            f.write("BAR")
+        (added, ignored) = porcelain.add(self.repo.path, paths=[
+            os.path.join(self.repo.path, "foo"),
+            os.path.join(self.repo.path, "bar")])
+        self.assertIn(b"bar", self.repo.open_index())
+        self.assertEqual(set(['bar']), set(added))
+        self.assertEqual(set(['foo']), ignored)
+
     def test_add_file_absolute_path(self):
         # Absolute paths are (not yet) supported
         with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
@@ -257,25 +304,48 @@ class AddTests(PorcelainTestCase):
 class RemoveTests(PorcelainTestCase):
 
     def test_remove_file(self):
-        with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
+        fullpath = os.path.join(self.repo.path, 'foo')
+        with open(fullpath, 'w') as f:
+            f.write("BAR")
+        porcelain.add(self.repo.path, paths=[fullpath])
+        porcelain.commit(repo=self.repo, message=b'test', author=b'test',
+                         committer=b'test')
+        self.assertTrue(os.path.exists(os.path.join(self.repo.path, 'foo')))
+        cwd = os.getcwd()
+        try:
+            os.chdir(self.repo.path)
+            porcelain.remove(self.repo.path, paths=["foo"])
+        finally:
+            os.chdir(cwd)
+        self.assertFalse(os.path.exists(os.path.join(self.repo.path, 'foo')))
+
+    def test_remove_file_staged(self):
+        fullpath = os.path.join(self.repo.path, 'foo')
+        with open(fullpath, 'w') as f:
             f.write("BAR")
-        porcelain.add(self.repo.path, paths=["foo"])
-        porcelain.rm(self.repo.path, paths=["foo"])
+        cwd = os.getcwd()
+        try:
+            os.chdir(self.repo.path)
+            porcelain.add(self.repo.path, paths=[fullpath])
+            self.assertRaises(Exception, porcelain.rm, self.repo.path,
+                              paths=["foo"])
+        finally:
+            os.chdir(cwd)
 
 
 class LogTests(PorcelainTestCase):
 
     def test_simple(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
         outstream = StringIO()
         porcelain.log(self.repo.path, outstream=outstream)
         self.assertEqual(3, outstream.getvalue().count("-" * 50))
 
     def test_max_entries(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
         outstream = StringIO()
         porcelain.log(self.repo.path, outstream=outstream, max_entries=1)
@@ -285,16 +355,16 @@ class LogTests(PorcelainTestCase):
 class ShowTests(PorcelainTestCase):
 
     def test_nolist(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
         outstream = StringIO()
         porcelain.show(self.repo.path, objects=c3.id, outstream=outstream)
         self.assertTrue(outstream.getvalue().startswith("-" * 50))
 
     def test_simple(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
         outstream = StringIO()
         porcelain.show(self.repo.path, objects=[c3.id], outstream=outstream)
@@ -307,44 +377,104 @@ class ShowTests(PorcelainTestCase):
         porcelain.show(self.repo.path, objects=[b.id], outstream=outstream)
         self.assertEqual(outstream.getvalue(), "The Foo\n")
 
+    def test_commit_no_parent(self):
+        a = Blob.from_string(b"The Foo\n")
+        ta = Tree()
+        ta.add(b"somename", 0o100644, a.id)
+        ca = make_commit(tree=ta.id)
+        self.repo.object_store.add_objects([(a, None), (ta, None), (ca, None)])
+        outstream = StringIO()
+        porcelain.show(self.repo.path, objects=[ca.id], outstream=outstream)
+        self.assertEqual(outstream.getvalue(), """\
+--------------------------------------------------
+commit: 344da06c1bb85901270b3e8875c988a027ec087d
+Author: Test Author <test@nodomain.com>
+Committer: Test Committer <test@nodomain.com>
+Date:   Fri Jan 01 2010 00:00:00 +0000
+
+Test message.
+
+diff --git /dev/null b/somename
+new mode 100644
+index 0000000..ea5c7bf 100644
+--- /dev/null
++++ b/somename
+@@ -1,0 +1,1 @@
++The Foo
+""")
+
+    def test_commit_with_change(self):
+        a = Blob.from_string(b"The Foo\n")
+        ta = Tree()
+        ta.add(b"somename", 0o100644, a.id)
+        ca = make_commit(tree=ta.id)
+        b = Blob.from_string(b"The Bar\n")
+        tb = Tree()
+        tb.add(b"somename", 0o100644, a.id)
+        cb = make_commit(tree=tb.id)
+        self.repo.object_store.add_objects(
+            [(a, None), (b, None), (ta, None), (tb, None),
+             (ca, None), (cb, None)])
+        outstream = StringIO()
+        porcelain.show(self.repo.path, objects=[cb.id], outstream=outstream)
+        self.assertEqual(outstream.getvalue(), """\
+--------------------------------------------------
+commit: 344da06c1bb85901270b3e8875c988a027ec087d
+Author: Test Author <test@nodomain.com>
+Committer: Test Committer <test@nodomain.com>
+Date:   Fri Jan 01 2010 00:00:00 +0000
+
+Test message.
+
+diff --git /dev/null b/somename
+new mode 100644
+index 0000000..ea5c7bf 100644
+--- /dev/null
++++ b/somename
+@@ -1,0 +1,1 @@
++The Foo
+""")
+
 
 class SymbolicRefTests(PorcelainTestCase):
 
     def test_set_wrong_symbolic_ref(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
 
-        self.assertRaises(ValueError, porcelain.symbolic_ref, self.repo.path, b'foobar')
+        self.assertRaises(ValueError, porcelain.symbolic_ref, self.repo.path,
+                          b'foobar')
 
     def test_set_force_wrong_symbolic_ref(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
 
         porcelain.symbolic_ref(self.repo.path, b'force_foobar', force=True)
 
-        #test if we actually changed the file
+        # test if we actually changed the file
         with self.repo.get_named_file('HEAD') as f:
             new_ref = f.read()
         self.assertEqual(new_ref, b'ref: refs/heads/force_foobar\n')
 
     def test_set_symbolic_ref(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
 
         porcelain.symbolic_ref(self.repo.path, b'master')
 
     def test_set_symbolic_ref_other_than_master(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]], attrs=dict(refs='develop'))
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]],
+                attrs=dict(refs='develop'))
         self.repo.refs[b"HEAD"] = c3.id
         self.repo.refs[b"refs/heads/develop"] = c3.id
 
         porcelain.symbolic_ref(self.repo.path, b'develop')
 
-        #test if we actually changed the file
+        # test if we actually changed the file
         with self.repo.get_named_file('HEAD') as f:
             new_ref = f.read()
         self.assertEqual(new_ref, b'ref: refs/heads/develop\n')
@@ -353,19 +483,20 @@ class SymbolicRefTests(PorcelainTestCase):
 class DiffTreeTests(PorcelainTestCase):
 
     def test_empty(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
         outstream = BytesIO()
-        porcelain.diff_tree(self.repo.path, c2.tree, c3.tree, outstream=outstream)
+        porcelain.diff_tree(self.repo.path, c2.tree, c3.tree,
+                            outstream=outstream)
         self.assertEqual(outstream.getvalue(), b"")
 
 
 class CommitTreeTests(PorcelainTestCase):
 
     def test_simple(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         b = Blob()
         b.data = b"foo the bar"
         t = Tree()
@@ -383,8 +514,8 @@ class CommitTreeTests(PorcelainTestCase):
 class RevListTests(PorcelainTestCase):
 
     def test_simple(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         outstream = BytesIO()
         porcelain.rev_list(
             self.repo.path, [c3.id], outstream=outstream)
@@ -398,12 +529,12 @@ class RevListTests(PorcelainTestCase):
 class TagCreateTests(PorcelainTestCase):
 
     def test_annotated(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
 
         porcelain.tag_create(self.repo.path, b"tryme", b'foo <foo@bar.com>',
-                b'bar', annotated=True)
+                             b'bar', annotated=True)
 
         tags = self.repo.refs.as_dict(b"refs/tags")
         self.assertEqual(list(tags.keys()), [b"tryme"])
@@ -414,8 +545,8 @@ class TagCreateTests(PorcelainTestCase):
         self.assertLess(time.time() - tag.tag_time, 5)
 
     def test_unannotated(self):
-        c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
-            [3, 1, 2]])
+        c1, c2, c3 = build_commit_graph(
+                self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"HEAD"] = c3.id
 
         porcelain.tag_create(self.repo.path, b"tryme", annotated=False)
@@ -454,12 +585,13 @@ class TagDeleteTests(PorcelainTestCase):
 class ResetTests(PorcelainTestCase):
 
     def test_hard_head(self):
-        with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
+        fullpath = os.path.join(self.repo.path, 'foo')
+        with open(fullpath, 'w') as f:
             f.write("BAR")
-        porcelain.add(self.repo.path, paths=["foo"])
+        porcelain.add(self.repo.path, paths=[fullpath])
         porcelain.commit(self.repo.path, message=b"Some message",
-                committer=b"Jane <jane@example.com>",
-                author=b"John <john@example.com>")
+                         committer=b"Jane <jane@example.com>",
+                         author=b"John <john@example.com>")
 
         with open(os.path.join(self.repo.path, 'foo'), 'wb') as f:
             f.write(b"OOH")
@@ -474,19 +606,20 @@ class ResetTests(PorcelainTestCase):
         self.assertEqual([], changes)
 
     def test_hard_commit(self):
-        with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
+        fullpath = os.path.join(self.repo.path, 'foo')
+        with open(fullpath, 'w') as f:
             f.write("BAR")
-        porcelain.add(self.repo.path, paths=["foo"])
+        porcelain.add(self.repo.path, paths=[fullpath])
         sha = porcelain.commit(self.repo.path, message=b"Some message",
-                committer=b"Jane <jane@example.com>",
-                author=b"John <john@example.com>")
+                               committer=b"Jane <jane@example.com>",
+                               author=b"John <john@example.com>")
 
-        with open(os.path.join(self.repo.path, 'foo'), 'wb') as f:
+        with open(fullpath, 'wb') as f:
             f.write(b"BAZ")
-        porcelain.add(self.repo.path, paths=["foo"])
+        porcelain.add(self.repo.path, paths=[fullpath])
         porcelain.commit(self.repo.path, message=b"Some other message",
-                committer=b"Jane <jane@example.com>",
-                author=b"John <john@example.com>")
+                         committer=b"Jane <jane@example.com>",
+                         author=b"John <john@example.com>")
 
         porcelain.reset(self.repo, "hard", sha)
 
@@ -510,13 +643,13 @@ class PushTests(PorcelainTestCase):
         errstream = BytesIO()
 
         porcelain.commit(repo=self.repo.path, message=b'init',
-            author=b'', committer=b'')
+                         author=b'', committer=b'')
 
         # Setup target repo cloned from temp test repo
         clone_path = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, clone_path)
         target_repo = porcelain.clone(self.repo.path, target=clone_path,
-            errstream=errstream)
+                                      errstream=errstream)
         try:
             self.assertEqual(target_repo[b'HEAD'], self.repo[b'HEAD'])
         finally:
@@ -525,9 +658,9 @@ class PushTests(PorcelainTestCase):
         # create a second file to be pushed back to origin
         handle, fullpath = tempfile.mkstemp(dir=clone_path)
         os.close(handle)
-        porcelain.add(repo=clone_path, paths=[os.path.basename(fullpath)])
+        porcelain.add(repo=clone_path, paths=[fullpath])
         porcelain.commit(repo=clone_path, message=b'push',
-            author=b'', committer=b'')
+                         author=b'', committer=b'')
 
         # Setup a non-checked out branch in the remote
         refs_path = b"refs/heads/foo"
@@ -536,8 +669,8 @@ class PushTests(PorcelainTestCase):
         self.repo.refs[refs_path] = new_id
 
         # Push to the remote
-        porcelain.push(clone_path, self.repo.path, b"HEAD:" + refs_path, outstream=outstream,
-            errstream=errstream)
+        porcelain.push(clone_path, self.repo.path, b"HEAD:" + refs_path,
+                       outstream=outstream, errstream=errstream)
 
         # Check that the target and source
         with Repo(clone_path) as r_clone:
@@ -546,14 +679,14 @@ class PushTests(PorcelainTestCase):
                 b'refs/heads/foo': r_clone[b'HEAD'].id,
                 b'refs/heads/master': new_id,
                 }, self.repo.get_refs())
-            self.assertEqual(r_clone[b'HEAD'].id, self.repo.refs[refs_path])
+            self.assertEqual(r_clone[b'HEAD'].id, self.repo[refs_path].id)
 
             # Get the change in the target repo corresponding to the add
             # this will be in the foo branch.
             change = list(tree_changes(self.repo, self.repo[b'HEAD'].tree,
                                        self.repo[b'refs/heads/foo'].tree))[0]
             self.assertEqual(os.path.basename(fullpath),
-                change.new.path.decode('ascii'))
+                             change.new.path.decode('ascii'))
 
     def test_delete(self):
         """Basic test of porcelain push, removing a branch.
@@ -562,13 +695,13 @@ class PushTests(PorcelainTestCase):
         errstream = BytesIO()
 
         porcelain.commit(repo=self.repo.path, message=b'init',
-            author=b'', committer=b'')
+                         author=b'', committer=b'')
 
         # Setup target repo cloned from temp test repo
         clone_path = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, clone_path)
         target_repo = porcelain.clone(self.repo.path, target=clone_path,
-            errstream=errstream)
+                                      errstream=errstream)
         target_repo.close()
 
         # Setup a non-checked out branch in the remote
@@ -578,8 +711,8 @@ class PushTests(PorcelainTestCase):
         self.repo.refs[refs_path] = new_id
 
         # Push to the remote
-        porcelain.push(clone_path, self.repo.path, b":" + refs_path, outstream=outstream,
-            errstream=errstream)
+        porcelain.push(clone_path, self.repo.path, b":" + refs_path,
+                       outstream=outstream, errstream=errstream)
 
         self.assertEqual({
             b'HEAD': new_id,
@@ -587,7 +720,6 @@ class PushTests(PorcelainTestCase):
             }, self.repo.get_refs())
 
 
-
 class PullTests(PorcelainTestCase):
 
     def setUp(self):
@@ -595,8 +727,7 @@ class PullTests(PorcelainTestCase):
         # create a file for initial commit
         handle, fullpath = tempfile.mkstemp(dir=self.repo.path)
         os.close(handle)
-        filename = os.path.basename(fullpath)
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test',
                          author=b'test', committer=b'test')
 
@@ -604,16 +735,15 @@ class PullTests(PorcelainTestCase):
         self.target_path = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, self.target_path)
         target_repo = porcelain.clone(self.repo.path, target=self.target_path,
-                errstream=BytesIO())
+                                      errstream=BytesIO())
         target_repo.close()
 
         # create a second file to be pushed
         handle, fullpath = tempfile.mkstemp(dir=self.repo.path)
         os.close(handle)
-        filename = os.path.basename(fullpath)
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test2',
-            author=b'test2', committer=b'test2')
+                         author=b'test2', committer=b'test2')
 
         self.assertTrue(b'refs/heads/master' in self.repo.refs)
         self.assertTrue(b'refs/heads/master' in target_repo.refs)
@@ -624,7 +754,7 @@ class PullTests(PorcelainTestCase):
 
         # Pull changes into the cloned repo
         porcelain.pull(self.target_path, self.repo.path, b'refs/heads/master',
-            outstream=outstream, errstream=errstream)
+                       outstream=outstream, errstream=errstream)
 
         # Check the target repo for pushed changes
         with Repo(self.target_path) as r:
@@ -660,9 +790,9 @@ class StatusTests(PorcelainTestCase):
         with open(fullpath, 'w') as f:
             f.write('origstuff')
 
-        porcelain.add(repo=self.repo.path, paths=['foo'])
+        porcelain.add(repo=self.repo.path, paths=[fullpath])
         porcelain.commit(repo=self.repo.path, message=b'test status',
-            author=b'', committer=b'')
+                         author=b'', committer=b'')
 
         # modify access and modify time of path
         os.utime(fullpath, (0, 0))
@@ -675,11 +805,12 @@ class StatusTests(PorcelainTestCase):
         fullpath = os.path.join(self.repo.path, filename_add)
         with open(fullpath, 'w') as f:
             f.write('stuff')
-        porcelain.add(repo=self.repo.path, paths=filename_add)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
 
         results = porcelain.status(self.repo)
 
-        self.assertEqual(results.staged['add'][0], filename_add.encode('ascii'))
+        self.assertEqual(results.staged['add'][0],
+                         filename_add.encode('ascii'))
         self.assertEqual(results.unstaged, [b'foo'])
 
     def test_get_tree_changes_add(self):
@@ -687,16 +818,18 @@ class StatusTests(PorcelainTestCase):
 
         # Make a dummy file, stage
         filename = 'bar'
-        with open(os.path.join(self.repo.path, filename), 'w') as f:
+        fullpath = os.path.join(self.repo.path, filename)
+        with open(fullpath, 'w') as f:
             f.write('stuff')
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test status',
-            author=b'', committer=b'')
+                         author=b'', committer=b'')
 
         filename = 'foo'
-        with open(os.path.join(self.repo.path, filename), 'w') as f:
+        fullpath = os.path.join(self.repo.path, filename)
+        with open(fullpath, 'w') as f:
             f.write('stuff')
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         changes = porcelain.get_tree_changes(self.repo.path)
 
         self.assertEqual(changes['add'][0], filename.encode('ascii'))
@@ -712,12 +845,12 @@ class StatusTests(PorcelainTestCase):
         fullpath = os.path.join(self.repo.path, filename)
         with open(fullpath, 'w') as f:
             f.write('stuff')
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test status',
-            author=b'', committer=b'')
+                         author=b'', committer=b'')
         with open(fullpath, 'w') as f:
             f.write('otherstuff')
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         changes = porcelain.get_tree_changes(self.repo.path)
 
         self.assertEqual(changes['modify'][0], filename.encode('ascii'))
@@ -730,12 +863,18 @@ class StatusTests(PorcelainTestCase):
 
         # Make a dummy file, stage, commit, remove
         filename = 'foo'
-        with open(os.path.join(self.repo.path, filename), 'w') as f:
+        fullpath = os.path.join(self.repo.path, filename)
+        with open(fullpath, 'w') as f:
             f.write('stuff')
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test status',
-            author=b'', committer=b'')
-        porcelain.rm(repo=self.repo.path, paths=[filename])
+                         author=b'', committer=b'')
+        cwd = os.getcwd()
+        try:
+            os.chdir(self.repo.path)
+            porcelain.remove(repo=self.repo.path, paths=[filename])
+        finally:
+            os.chdir(cwd)
         changes = porcelain.get_tree_changes(self.repo.path)
 
         self.assertEqual(changes['delete'][0], filename.encode('ascii'))
@@ -743,6 +882,39 @@ class StatusTests(PorcelainTestCase):
         self.assertEqual(len(changes['modify']), 0)
         self.assertEqual(len(changes['delete']), 1)
 
+    def test_get_untracked_paths(self):
+        with open(os.path.join(self.repo.path, '.gitignore'), 'w') as f:
+            f.write('ignored\n')
+        with open(os.path.join(self.repo.path, 'ignored'), 'w') as f:
+            f.write('blah\n')
+        with open(os.path.join(self.repo.path, 'notignored'), 'w') as f:
+            f.write('blah\n')
+        self.assertEqual(
+            set(['ignored', 'notignored', '.gitignore']),
+            set(porcelain.get_untracked_paths(self.repo.path, self.repo.path,
+                                              self.repo.open_index())))
+        self.assertEqual(set(['.gitignore', 'notignored']),
+                         set(porcelain.status(self.repo).untracked))
+        self.assertEqual(set(['.gitignore', 'notignored', 'ignored']),
+                         set(porcelain.status(self.repo, ignored=True)
+                             .untracked))
+
+    def test_get_untracked_paths_nested(self):
+        with open(os.path.join(self.repo.path, 'notignored'), 'w') as f:
+            f.write('blah\n')
+        subrepo = Repo.init(os.path.join(self.repo.path, 'nested'), mkdir=True)
+        with open(os.path.join(subrepo.path, 'another'), 'w') as f:
+            f.write('foo\n')
+
+        self.assertEqual(
+            set(['notignored']),
+            set(porcelain.get_untracked_paths(self.repo.path, self.repo.path,
+                                              self.repo.open_index())))
+        self.assertEqual(
+            set(['another']),
+            set(porcelain.get_untracked_paths(subrepo.path, subrepo.path,
+                                              subrepo.open_index())))
+
 
 # TODO(jelmer): Add test for dulwich.porcelain.daemon
 
@@ -752,7 +924,8 @@ class UploadPackTests(PorcelainTestCase):
 
     def test_upload_pack(self):
         outf = BytesIO()
-        exitcode = porcelain.upload_pack(self.repo.path, BytesIO(b"0000"), outf)
+        exitcode = porcelain.upload_pack(
+                self.repo.path, BytesIO(b"0000"), outf)
         outlines = outf.getvalue().splitlines()
         self.assertEqual([b"0000"], outlines)
         self.assertEqual(0, exitcode)
@@ -763,17 +936,21 @@ class ReceivePackTests(PorcelainTestCase):
 
     def test_receive_pack(self):
         filename = 'foo'
-        with open(os.path.join(self.repo.path, filename), 'w') as f:
+        fullpath = os.path.join(self.repo.path, filename)
+        with open(fullpath, 'w') as f:
             f.write('stuff')
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         self.repo.do_commit(message=b'test status',
-            author=b'', committer=b'', author_timestamp=1402354300,
-            commit_timestamp=1402354300, author_timezone=0, commit_timezone=0)
+                            author=b'', committer=b'',
+                            author_timestamp=1402354300,
+                            commit_timestamp=1402354300, author_timezone=0,
+                            commit_timezone=0)
         outf = BytesIO()
-        exitcode = porcelain.receive_pack(self.repo.path, BytesIO(b"0000"), outf)
+        exitcode = porcelain.receive_pack(
+                self.repo.path, BytesIO(b"0000"), outf)
         outlines = outf.getvalue().splitlines()
         self.assertEqual([
-            b'00739e65bdcf4a22cdd4f3700604a275cd2aaf146b23 HEAD\x00 report-status '
+            b'00739e65bdcf4a22cdd4f3700604a275cd2aaf146b23 HEAD\x00 report-status '  # noqa: E501
             b'delete-refs quiet ofs-delta side-band-64k no-done',
             b'003f9e65bdcf4a22cdd4f3700604a275cd2aaf146b23 refs/heads/master',
             b'0000'], outlines)
@@ -832,8 +1009,7 @@ class FetchTests(PorcelainTestCase):
         # create a file for initial commit
         handle, fullpath = tempfile.mkstemp(dir=self.repo.path)
         os.close(handle)
-        filename = os.path.basename(fullpath)
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test',
                          author=b'test', committer=b'test')
 
@@ -841,22 +1017,21 @@ class FetchTests(PorcelainTestCase):
         target_path = tempfile.mkdtemp()
         self.addCleanup(shutil.rmtree, target_path)
         target_repo = porcelain.clone(self.repo.path, target=target_path,
-            errstream=errstream)
+                                      errstream=errstream)
 
         # create a second file to be pushed
         handle, fullpath = tempfile.mkstemp(dir=self.repo.path)
         os.close(handle)
-        filename = os.path.basename(fullpath)
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.commit(repo=self.repo.path, message=b'test2',
-            author=b'test2', committer=b'test2')
+                         author=b'test2', committer=b'test2')
 
         self.assertFalse(self.repo[b'HEAD'].id in target_repo)
         target_repo.close()
 
         # Fetch changes into the cloned repo
         porcelain.fetch(target_path, self.repo.path, outstream=outstream,
-            errstream=errstream)
+                        errstream=errstream)
 
         # Check the target repo for pushed changes
         with Repo(target_path) as r:
@@ -871,8 +1046,7 @@ class RepackTests(PorcelainTestCase):
     def test_simple(self):
         handle, fullpath = tempfile.mkstemp(dir=self.repo.path)
         os.close(handle)
-        filename = os.path.basename(fullpath)
-        porcelain.add(repo=self.repo.path, paths=filename)
+        porcelain.add(repo=self.repo.path, paths=fullpath)
         porcelain.repack(self.repo)
 
 
@@ -880,7 +1054,7 @@ class LsTreeTests(PorcelainTestCase):
 
     def test_empty(self):
         porcelain.commit(repo=self.repo.path, message=b'test status',
-            author=b'', committer=b'')
+                         author=b'', committer=b'')
 
         f = StringIO()
         porcelain.ls_tree(self.repo, b"HEAD", outstream=f)
@@ -892,9 +1066,9 @@ class LsTreeTests(PorcelainTestCase):
         with open(fullpath, 'w') as f:
             f.write('origstuff')
 
-        porcelain.add(repo=self.repo.path, paths=['foo'])
+        porcelain.add(repo=self.repo.path, paths=[fullpath])
         porcelain.commit(repo=self.repo.path, message=b'test status',
-            author=b'', committer=b'')
+                         author=b'', committer=b'')
 
         f = StringIO()
         porcelain.ls_tree(self.repo, b"HEAD", outstream=f)
@@ -910,7 +1084,7 @@ class LsRemoteTests(PorcelainTestCase):
 
     def test_some(self):
         cid = porcelain.commit(repo=self.repo.path, message=b'test status',
-            author=b'', committer=b'')
+                               author=b'', committer=b'')
 
         self.assertEqual({
             b'refs/heads/master': cid,
@@ -932,4 +1106,31 @@ class RemoteAddTests(PorcelainTestCase):
         porcelain.remote_add(
             self.repo, 'jelmer', 'git://jelmer.uk/code/dulwich')
         self.assertRaises(porcelain.RemoteExists, porcelain.remote_add,
-            self.repo, 'jelmer', 'git://jelmer.uk/code/dulwich')
+                          self.repo, 'jelmer', 'git://jelmer.uk/code/dulwich')
+
+
+class CheckIgnoreTests(PorcelainTestCase):
+
+    def test_check_ignored(self):
+        with open(os.path.join(self.repo.path, '.gitignore'), 'w') as f:
+            f.write("foo")
+        with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
+            f.write("BAR")
+        with open(os.path.join(self.repo.path, 'bar'), 'w') as f:
+            f.write("BAR")
+        self.assertEqual(
+            ['foo'],
+            list(porcelain.check_ignore(self.repo, ['foo'])))
+        self.assertEqual([], list(porcelain.check_ignore(self.repo, ['bar'])))
+
+    def test_check_added(self):
+        with open(os.path.join(self.repo.path, 'foo'), 'w') as f:
+            f.write("BAR")
+        self.repo.stage(['foo'])
+        with open(os.path.join(self.repo.path, '.gitignore'), 'w') as f:
+            f.write("foo\n")
+        self.assertEqual(
+            [], list(porcelain.check_ignore(self.repo, ['foo'])))
+        self.assertEqual(
+            ['foo'],
+            list(porcelain.check_ignore(self.repo, ['foo'], no_index=True)))

+ 12 - 9
dulwich/tests/test_protocol.py

@@ -128,7 +128,8 @@ class ReceivableBytesIO(BytesIO):
     def recv(self, size):
         # fail fast if no bytes are available; in a real socket, this would
         # block forever
-        if self.tell() == len(self.getvalue()) and not self.allow_read_past_eof:
+        if (self.tell() == len(self.getvalue())
+                and not self.allow_read_past_eof):
             raise GitProtocolError('Blocking read past end of socket')
         if size == 1:
             return self.read(1)
@@ -215,28 +216,30 @@ class CapabilitiesTestCase(TestCase):
     def test_caps(self):
         self.assertEqual((b'bla', [b'la']), extract_capabilities(b'bla\0la'))
         self.assertEqual((b'bla', [b'la']), extract_capabilities(b'bla\0la\n'))
-        self.assertEqual((b'bla', [b'la', b'la']), extract_capabilities(b'bla\0la la'))
+        self.assertEqual((b'bla', [b'la', b'la']),
+                         extract_capabilities(b'bla\0la la'))
 
     def test_plain_want_line(self):
-        self.assertEqual((b'want bla', []), extract_want_line_capabilities(b'want bla'))
+        self.assertEqual((b'want bla', []),
+                         extract_want_line_capabilities(b'want bla'))
 
     def test_caps_want_line(self):
         self.assertEqual((b'want bla', [b'la']),
-                extract_want_line_capabilities(b'want bla la'))
+                         extract_want_line_capabilities(b'want bla la'))
         self.assertEqual((b'want bla', [b'la']),
-                extract_want_line_capabilities(b'want bla la\n'))
+                         extract_want_line_capabilities(b'want bla la\n'))
         self.assertEqual((b'want bla', [b'la', b'la']),
-                extract_want_line_capabilities(b'want bla la la'))
+                         extract_want_line_capabilities(b'want bla la la'))
 
     def test_ack_type(self):
         self.assertEqual(SINGLE_ACK, ack_type([b'foo', b'bar']))
         self.assertEqual(MULTI_ACK, ack_type([b'foo', b'bar', b'multi_ack']))
         self.assertEqual(MULTI_ACK_DETAILED,
-                          ack_type([b'foo', b'bar', b'multi_ack_detailed']))
+                         ack_type([b'foo', b'bar', b'multi_ack_detailed']))
         # choose detailed when both present
         self.assertEqual(MULTI_ACK_DETAILED,
-                          ack_type([b'foo', b'bar', b'multi_ack',
-                                    b'multi_ack_detailed']))
+                         ack_type([b'foo', b'bar', b'multi_ack',
+                                   b'multi_ack_detailed']))
 
 
 class BufferedPktLineWriterTests(TestCase):

+ 7 - 5
dulwich/tests/test_reflog.py

@@ -58,13 +58,15 @@ class ReflogLineTests(TestCase):
                 1446552482, 0, b'clone: from git://jelmer.uk/samba'))
 
     def test_parse(self):
+        reflog_line = (
+                 b'0000000000000000000000000000000000000000 '
+                 b'49030649db3dfec5a9bc03e5dde4255a14499f16 Jelmer Vernooij '
+                 b'<jelmer@jelmer.uk> 1446552482 +0000	'
+                 b'clone: from git://jelmer.uk/samba'
+                 )
         self.assertEqual(
                 (b'0000000000000000000000000000000000000000',
                  b'49030649db3dfec5a9bc03e5dde4255a14499f16',
                  b'Jelmer Vernooij <jelmer@jelmer.uk>',
                  1446552482, 0, b'clone: from git://jelmer.uk/samba'),
-                 parse_reflog_line(
-                     b'0000000000000000000000000000000000000000 '
-                     b'49030649db3dfec5a9bc03e5dde4255a14499f16 Jelmer Vernooij '
-                     b'<jelmer@jelmer.uk> 1446552482 +0000	'
-                     b'clone: from git://jelmer.uk/samba'))
+                parse_reflog_line(reflog_line))

+ 26 - 14
dulwich/tests/test_refs.py

@@ -84,6 +84,7 @@ TWOS = b'2' * 40
 THREES = b'3' * 40
 FOURS = b'4' * 40
 
+
 class PackedRefsFileTests(TestCase):
 
     def test_split_ref_line_errors(self):
@@ -106,7 +107,8 @@ class PackedRefsFileTests(TestCase):
         f = BytesIO(b'\n'.join([
             ONES + b' ref/1',
             b'^' + TWOS]))
-        self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
+        self.assertRaises(errors.PackedRefsException, list,
+                          read_packed_refs(f))
 
     def test_read_with_peeled(self):
         f = BytesIO(b'\n'.join([
@@ -124,13 +126,15 @@ class PackedRefsFileTests(TestCase):
         f = BytesIO(b'\n'.join([
             b'^' + TWOS,
             ONES + b' ref/1']))
-        self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
+        self.assertRaises(errors.PackedRefsException, list,
+                          read_packed_refs(f))
 
         f = BytesIO(b'\n'.join([
-            ONES + b' ref/1',
-            b'^' + TWOS,
-            b'^' + THREES]))
-        self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
+                ONES + b' ref/1',
+                b'^' + TWOS,
+                b'^' + THREES]))
+        self.assertRaises(errors.PackedRefsException, list,
+                          read_packed_refs(f))
 
     def test_write_with_peeled(self):
         f = BytesIO()
@@ -154,7 +158,8 @@ class PackedRefsFileTests(TestCase):
 # Dict of refs that we expect all RefsContainerTests subclasses to define.
 _TEST_REFS = {
     b'HEAD': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
-    b'refs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+    b'refs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa':
+             b'42d06bd4b77fed026b154d16493e5deab78f02ec',
     b'refs/heads/master': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
     b'refs/heads/packed': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
     b'refs/tags/refs-0.1': b'df6800012397fb85c56e7418dd4eb9405dee075c',
@@ -184,7 +189,8 @@ class RefsContainerTests(object):
         self.assertEqual(_TEST_REFS, self._refs.as_dict())
 
     def test_setitem(self):
-        self._refs[b'refs/some/ref'] = b'42d06bd4b77fed026b154d16493e5deab78f02ec'
+        self._refs[b'refs/some/ref'] = (
+                b'42d06bd4b77fed026b154d16493e5deab78f02ec')
         self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
                          self._refs[b'refs/some/ref'])
         self.assertRaises(
@@ -264,7 +270,8 @@ class RefsContainerTests(object):
         self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
                          self._refs[b'HEAD'])
         self.assertTrue(self._refs.remove_if_equals(
-            b'refs/tags/refs-0.2', b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8'))
+            b'refs/tags/refs-0.2',
+            b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8'))
         self.assertTrue(self._refs.remove_if_equals(
             b'refs/tags/refs-0.2', ZERO_SHA))
         self.assertFalse(b'refs/tags/refs-0.2' in self._refs)
@@ -332,7 +339,8 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         self.assertEqual(b'ref: refs/heads/master', v)
 
         # ensure the symbolic link was written through
-        f = open(os.path.join(self._refs.path, 'refs', 'heads', 'master'), 'rb')
+        f = open(os.path.join(self._refs.path, 'refs', 'heads', 'master'),
+                 'rb')
         self.assertEqual(ones, f.read()[:40])
         f.close()
 
@@ -441,16 +449,19 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         self.assertRaises(KeyError, lambda: self._refs[b'refs/tags/refs-0.1'])
 
     def test_read_ref(self):
-        self.assertEqual(b'ref: refs/heads/master', self._refs.read_ref(b'HEAD'))
+        self.assertEqual(b'ref: refs/heads/master',
+                         self._refs.read_ref(b'HEAD'))
         self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
                          self._refs.read_ref(b'refs/heads/packed'))
         self.assertEqual(None, self._refs.read_ref(b'nonexistant'))
 
     def test_non_ascii(self):
         try:
-            encoded_ref = u'refs/tags/schön'.encode(sys.getfilesystemencoding())
+            encoded_ref = u'refs/tags/schön'.encode(
+                    sys.getfilesystemencoding())
         except UnicodeEncodeError:
-            raise SkipTest("filesystem encoding doesn't support special character")
+            raise SkipTest(
+                    "filesystem encoding doesn't support special character")
         p = os.path.join(self._repo.path, 'refs', 'tags', u'schön')
         with open(p, 'w') as f:
             f.write('00' * 20)
@@ -462,7 +473,8 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
 
 
 _TEST_REFS_SERIALIZED = (
-    b'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa\n'
+    b'42d06bd4b77fed026b154d16493e5deab78f02ec\t'
+    b'refs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa\n'
     b'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/master\n'
     b'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/packed\n'
     b'df6800012397fb85c56e7418dd4eb9405dee075c\trefs/tags/refs-0.1\n'

+ 145 - 98
dulwich/tests/test_repository.py

@@ -65,8 +65,10 @@ class CreateRepositoryTests(TestCase):
 
     def _check_repo_contents(self, repo, expect_bare):
         self.assertEqual(expect_bare, repo.bare)
-        self.assertFileContentsEqual(b'Unnamed repository', repo, 'description')
-        self.assertFileContentsEqual(b'', repo, os.path.join('info', 'exclude'))
+        self.assertFileContentsEqual(
+            b'Unnamed repository', repo, 'description')
+        self.assertFileContentsEqual(
+            b'', repo, os.path.join('info', 'exclude'))
         self.assertFileContentsEqual(None, repo, 'nonexistent file')
         barestr = b'bare = ' + str(expect_bare).lower().encode('ascii')
         with repo.get_named_file('config') as f:
@@ -78,7 +80,6 @@ class CreateRepositoryTests(TestCase):
             config_text = f.read()
             self.assertTrue(barestr in config_text, "%r" % config_text)
 
-
     def test_create_memory(self):
         repo = MemoryRepo.init_bare([], {})
         self._check_repo_contents(repo, True)
@@ -97,6 +98,22 @@ class CreateRepositoryTests(TestCase):
         self.assertEqual(os.path.join(tmp_dir, '.git'), repo._controldir)
         self._check_repo_contents(repo, False)
 
+    def test_create_disk_non_bare_mkdir(self):
+        tmp_dir = tempfile.mkdtemp()
+        target_dir = os.path.join(tmp_dir, "target")
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(target_dir, mkdir=True)
+        self.assertEqual(os.path.join(target_dir, '.git'), repo._controldir)
+        self._check_repo_contents(repo, False)
+
+    def test_create_disk_bare_mkdir(self):
+        tmp_dir = tempfile.mkdtemp()
+        target_dir = os.path.join(tmp_dir, "target")
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init_bare(target_dir, mkdir=True)
+        self.assertEqual(target_dir, repo._controldir)
+        self._check_repo_contents(repo, True)
+
 
 class MemoryRepoTests(TestCase):
 
@@ -126,7 +143,7 @@ class RepositoryRootTests(TestCase):
         r = self.open_repo('a.git')
         r[b"refs/tags/foo"] = b'a90fa2d900a17e99b433217e988c4eb4a2e9a097'
         self.assertEqual(b'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
-                          r[b"refs/tags/foo"].id)
+                         r[b"refs/tags/foo"].id)
 
     def test_getitem_unicode(self):
         r = self.open_repo('a.git')
@@ -168,7 +185,8 @@ class RepositoryRootTests(TestCase):
             b'HEAD': b'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
             b'refs/heads/master': b'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
             b'refs/tags/mytag': b'28237f4dc30d0d462658d6b937b08a0f0b6ef55a',
-            b'refs/tags/mytag-packed': b'b0931cadc54336e78a1d980420e3268903b57a50',
+            b'refs/tags/mytag-packed':
+                b'b0931cadc54336e78a1d980420e3268903b57a50',
             }, r.get_refs())
 
     def test_head(self):
@@ -234,13 +252,16 @@ class RepositoryRootTests(TestCase):
     def test_get_walker(self):
         r = self.open_repo('a.git')
         # include defaults to [r.head()]
-        self.assertEqual([e.commit.id for e in r.get_walker()],
-                         [r.head(), b'2a72d929692c41d8554c07f6301757ba18a65d91'])
         self.assertEqual(
-            [e.commit.id for e in r.get_walker([b'2a72d929692c41d8554c07f6301757ba18a65d91'])],
+            [e.commit.id for e in r.get_walker()],
+            [r.head(), b'2a72d929692c41d8554c07f6301757ba18a65d91'])
+        self.assertEqual(
+            [e.commit.id for e in
+                r.get_walker([b'2a72d929692c41d8554c07f6301757ba18a65d91'])],
             [b'2a72d929692c41d8554c07f6301757ba18a65d91'])
         self.assertEqual(
-            [e.commit.id for e in r.get_walker(b'2a72d929692c41d8554c07f6301757ba18a65d91')],
+            [e.commit.id for e in
+                r.get_walker(b'2a72d929692c41d8554c07f6301757ba18a65d91')],
             [b'2a72d929692c41d8554c07f6301757ba18a65d91'])
 
     def test_clone(self):
@@ -252,8 +273,10 @@ class RepositoryRootTests(TestCase):
                 b'HEAD': b'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
                 b'refs/remotes/origin/master':
                     b'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
-                b'refs/heads/master': b'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
-                b'refs/tags/mytag': b'28237f4dc30d0d462658d6b937b08a0f0b6ef55a',
+                b'refs/heads/master':
+                    b'a90fa2d900a17e99b433217e988c4eb4a2e9a097',
+                b'refs/tags/mytag':
+                    b'28237f4dc30d0d462658d6b937b08a0f0b6ef55a',
                 b'refs/tags/mytag-packed':
                     b'b0931cadc54336e78a1d980420e3268903b57a50',
                 }, t.refs.as_dict())
@@ -264,7 +287,8 @@ class RepositoryRootTests(TestCase):
             encoded_path = r.path
             if not isinstance(encoded_path, bytes):
                 encoded_path = encoded_path.encode(sys.getfilesystemencoding())
-            self.assertEqual(encoded_path, c.get((b'remote', b'origin'), b'url'))
+            self.assertEqual(encoded_path,
+                             c.get((b'remote', b'origin'), b'url'))
             self.assertEqual(
                 b'+refs/heads/*:refs/remotes/origin/*',
                 c.get((b'remote', b'origin'), b'fetch'))
@@ -339,12 +363,13 @@ class RepositoryRootTests(TestCase):
         rel = os.path.relpath(os.path.join(repo_dir, 'submodule'), temp_dir)
         os.symlink(os.path.join(rel, 'dotgit'), os.path.join(temp_dir, '.git'))
         with Repo(temp_dir) as r:
-            self.assertEqual(r.head(), b'a90fa2d900a17e99b433217e988c4eb4a2e9a097')
+            self.assertEqual(r.head(),
+                             b'a90fa2d900a17e99b433217e988c4eb4a2e9a097')
 
     def test_common_revisions(self):
         """
-        This test demonstrates that ``find_common_revisions()`` actually returns
-        common heads, not revisions; dulwich already uses
+        This test demonstrates that ``find_common_revisions()`` actually
+        returns common heads, not revisions; dulwich already uses
         ``find_common_revisions()`` in such a manner (see
         ``Repo.fetch_objects()``).
         """
@@ -357,17 +382,17 @@ class RepositoryRootTests(TestCase):
         # Re-create each-side of the merge in simple_merge.git.
         #
         # Since the trees and blobs are missing, the repository created is
-        # corrupted, but we're only checking for commits for the purpose of this
-        # test, so it's immaterial.
+        # corrupted, but we're only checking for commits for the purpose of
+        # this test, so it's immaterial.
         r1_dir = self.mkdtemp()
         self.addCleanup(shutil.rmtree, r1_dir)
-        r1_commits = [b'ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd', # HEAD
+        r1_commits = [b'ab64bbdcc51b170d21588e5c5d391ee5c0c96dfd',  # HEAD
                       b'60dacdc733de308bb77bb76ce0fb0f9b44c9769e',
                       b'0d89f20333fbb1d2f3a94da77f4981373d8f4310']
 
         r2_dir = self.mkdtemp()
         self.addCleanup(shutil.rmtree, r2_dir)
-        r2_commits = [b'4cffe90e0a41ad3f5190079d7c8f036bde29cbe6', # HEAD
+        r2_commits = [b'4cffe90e0a41ad3f5190079d7c8f036bde29cbe6',  # HEAD
                       b'60dacdc733de308bb77bb76ce0fb0f9b44c9769e',
                       b'0d89f20333fbb1d2f3a94da77f4981373d8f4310']
 
@@ -536,15 +561,22 @@ exit 1
                     w.args == expected_warning.args):
                 break
         else:
-            raise AssertionError('Expected warning %r not in %r' %
-                    (expected_warning, warnings_list))
+            raise AssertionError(
+                'Expected warning %r not in %r' %
+                (expected_warning, warnings_list))
         self.assertEqual([commit_sha], r[commit_sha2].parents)
 
     def test_as_dict(self):
         def check(repo):
-            self.assertEqual(repo.refs.subkeys(b'refs/tags'), repo.refs.subkeys(b'refs/tags/'))
-            self.assertEqual(repo.refs.as_dict(b'refs/tags'), repo.refs.as_dict(b'refs/tags/'))
-            self.assertEqual(repo.refs.as_dict(b'refs/heads'), repo.refs.as_dict(b'refs/heads/'))
+            self.assertEqual(
+                repo.refs.subkeys(b'refs/tags'),
+                repo.refs.subkeys(b'refs/tags/'))
+            self.assertEqual(
+                repo.refs.as_dict(b'refs/tags'),
+                repo.refs.as_dict(b'refs/tags/'))
+            self.assertEqual(
+                repo.refs.as_dict(b'refs/heads'),
+                repo.refs.as_dict(b'refs/heads/'))
 
         bare = self.open_repo('a.git')
         tmp_dir = self.mkdtemp()
@@ -605,11 +637,12 @@ class BuildRepoRootTests(TestCase):
         with open(os.path.join(r.path, 'a'), 'wb') as f:
             f.write(b'file contents')
         r.stage(['a'])
-        commit_sha = r.do_commit(b'msg',
-                                 committer=b'Test Committer <test@nodomain.com>',
-                                 author=b'Test Author <test@nodomain.com>',
-                                 commit_timestamp=12345, commit_timezone=0,
-                                 author_timestamp=12345, author_timezone=0)
+        commit_sha = r.do_commit(
+                b'msg',
+                committer=b'Test Committer <test@nodomain.com>',
+                author=b'Test Author <test@nodomain.com>',
+                commit_timestamp=12345, commit_timezone=0,
+                author_timestamp=12345, author_timezone=0)
         self.assertEqual([], r[commit_sha].parents)
         self._root_commit = commit_sha
 
@@ -627,11 +660,12 @@ class BuildRepoRootTests(TestCase):
         with open(os.path.join(r.path, 'a'), 'wb') as f:
             f.write(b'new contents')
         r.stage(['a'])
-        commit_sha = r.do_commit(b'modified a',
-                                 committer=b'Test Committer <test@nodomain.com>',
-                                 author=b'Test Author <test@nodomain.com>',
-                                 commit_timestamp=12395, commit_timezone=0,
-                                 author_timestamp=12395, author_timezone=0)
+        commit_sha = r.do_commit(
+            b'modified a',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0)
         self.assertEqual([self._root_commit], r[commit_sha].parents)
         a_mode, a_id = tree_lookup_path(r.get_object, r[commit_sha].tree, b'a')
         self.assertEqual(stat.S_IFREG | 0o644, a_mode)
@@ -642,11 +676,12 @@ class BuildRepoRootTests(TestCase):
         r = self._repo
         os.symlink('a', os.path.join(r.path, 'b'))
         r.stage(['a', 'b'])
-        commit_sha = r.do_commit(b'Symlink b',
-                                 committer=b'Test Committer <test@nodomain.com>',
-                                 author=b'Test Author <test@nodomain.com>',
-                                 commit_timestamp=12395, commit_timezone=0,
-                                 author_timestamp=12395, author_timezone=0)
+        commit_sha = r.do_commit(
+            b'Symlink b',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0)
         self.assertEqual([self._root_commit], r[commit_sha].parents)
         b_mode, b_id = tree_lookup_path(r.get_object, r[commit_sha].tree, b'b')
         self.assertTrue(stat.S_ISLNK(b_mode))
@@ -656,11 +691,12 @@ class BuildRepoRootTests(TestCase):
         r = self._repo
         os.remove(os.path.join(r.path, 'a'))
         r.stage(['a'])
-        commit_sha = r.do_commit(b'deleted a',
-                                 committer=b'Test Committer <test@nodomain.com>',
-                                 author=b'Test Author <test@nodomain.com>',
-                                 commit_timestamp=12395, commit_timezone=0,
-                                 author_timestamp=12395, author_timezone=0)
+        commit_sha = r.do_commit(
+            b'deleted a',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0)
         self.assertEqual([self._root_commit], r[commit_sha].parents)
         self.assertEqual([], list(r.open_index()))
         tree = r[r[commit_sha].tree]
@@ -669,22 +705,24 @@ class BuildRepoRootTests(TestCase):
     def test_commit_follows(self):
         r = self._repo
         r.refs.set_symbolic_ref(b'HEAD', b'refs/heads/bla')
-        commit_sha = r.do_commit(b'commit with strange character',
-             committer=b'Test Committer <test@nodomain.com>',
-             author=b'Test Author <test@nodomain.com>',
-             commit_timestamp=12395, commit_timezone=0,
-             author_timestamp=12395, author_timezone=0,
-             ref=b'HEAD')
+        commit_sha = r.do_commit(
+            b'commit with strange character',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0,
+            ref=b'HEAD')
         self.assertEqual(commit_sha, r[b'refs/heads/bla'].id)
 
     def test_commit_encoding(self):
         r = self._repo
-        commit_sha = r.do_commit(b'commit with strange character \xee',
-             committer=b'Test Committer <test@nodomain.com>',
-             author=b'Test Author <test@nodomain.com>',
-             commit_timestamp=12395, commit_timezone=0,
-             author_timestamp=12395, author_timezone=0,
-             encoding=b"iso8859-1")
+        commit_sha = r.do_commit(
+            b'commit with strange character \xee',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0,
+            encoding=b"iso8859-1")
         self.assertEqual(b"iso8859-1", r[commit_sha].encoding)
 
     def test_commit_config_identity(self):
@@ -744,12 +782,13 @@ class BuildRepoRootTests(TestCase):
     def test_commit_branch(self):
         r = self._repo
 
-        commit_sha = r.do_commit(b'commit to branch',
-             committer=b'Test Committer <test@nodomain.com>',
-             author=b'Test Author <test@nodomain.com>',
-             commit_timestamp=12395, commit_timezone=0,
-             author_timestamp=12395, author_timezone=0,
-             ref=b"refs/heads/new_branch")
+        commit_sha = r.do_commit(
+            b'commit to branch',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0,
+            ref=b"refs/heads/new_branch")
         self.assertEqual(self._root_commit, r[b"HEAD"].id)
         self.assertEqual(commit_sha, r[b"refs/heads/new_branch"].id)
         self.assertEqual([], r[commit_sha].parents)
@@ -757,30 +796,33 @@ class BuildRepoRootTests(TestCase):
 
         new_branch_head = commit_sha
 
-        commit_sha = r.do_commit(b'commit to branch 2',
-             committer=b'Test Committer <test@nodomain.com>',
-             author=b'Test Author <test@nodomain.com>',
-             commit_timestamp=12395, commit_timezone=0,
-             author_timestamp=12395, author_timezone=0,
-             ref=b"refs/heads/new_branch")
+        commit_sha = r.do_commit(
+            b'commit to branch 2',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0,
+            ref=b"refs/heads/new_branch")
         self.assertEqual(self._root_commit, r[b"HEAD"].id)
         self.assertEqual(commit_sha, r[b"refs/heads/new_branch"].id)
         self.assertEqual([new_branch_head], r[commit_sha].parents)
 
     def test_commit_merge_heads(self):
         r = self._repo
-        merge_1 = r.do_commit(b'commit to branch 2',
-             committer=b'Test Committer <test@nodomain.com>',
-             author=b'Test Author <test@nodomain.com>',
-             commit_timestamp=12395, commit_timezone=0,
-             author_timestamp=12395, author_timezone=0,
-             ref=b"refs/heads/new_branch")
-        commit_sha = r.do_commit(b'commit with merge',
-             committer=b'Test Committer <test@nodomain.com>',
-             author=b'Test Author <test@nodomain.com>',
-             commit_timestamp=12395, commit_timezone=0,
-             author_timestamp=12395, author_timezone=0,
-             merge_heads=[merge_1])
+        merge_1 = r.do_commit(
+            b'commit to branch 2',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0,
+            ref=b"refs/heads/new_branch")
+        commit_sha = r.do_commit(
+            b'commit with merge',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0,
+            merge_heads=[merge_1])
         self.assertEqual(
             [self._root_commit, merge_1],
             r[commit_sha].parents)
@@ -790,12 +832,13 @@ class BuildRepoRootTests(TestCase):
 
         old_shas = set(r.object_store)
         old_refs = r.get_refs()
-        commit_sha = r.do_commit(b'commit with no ref',
-             committer=b'Test Committer <test@nodomain.com>',
-             author=b'Test Author <test@nodomain.com>',
-             commit_timestamp=12395, commit_timezone=0,
-             author_timestamp=12395, author_timezone=0,
-             ref=None)
+        commit_sha = r.do_commit(
+            b'commit with no ref',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0,
+            ref=None)
         new_shas = set(r.object_store) - old_shas
 
         # New sha is added, but no new refs
@@ -810,12 +853,13 @@ class BuildRepoRootTests(TestCase):
 
         old_shas = set(r.object_store)
         old_refs = r.get_refs()
-        commit_sha = r.do_commit(b'commit with no ref',
-             committer=b'Test Committer <test@nodomain.com>',
-             author=b'Test Author <test@nodomain.com>',
-             commit_timestamp=12395, commit_timezone=0,
-             author_timestamp=12395, author_timezone=0,
-             ref=None, merge_heads=[self._root_commit])
+        commit_sha = r.do_commit(
+            b'commit with no ref',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0,
+            ref=None, merge_heads=[self._root_commit])
         new_shas = set(r.object_store) - old_shas
 
         # New sha is added, but no new refs
@@ -836,6 +880,8 @@ class BuildRepoRootTests(TestCase):
         r.stage(['a'])
         r.stage(['a'])  # double-stage a deleted path
 
+    @skipIf(sys.platform == 'win32' and sys.version_info[:2] >= (3, 6),
+            'tries to implicitly decode as utf8')
     def test_commit_no_encode_decode(self):
         r = self._repo
         repo_path_bytes = r.path.encode(sys.getfilesystemencoding())
@@ -850,12 +896,13 @@ class BuildRepoRootTests(TestCase):
             self.addCleanup(os.remove, full_path)
 
         r.stage(names)
-        commit_sha = r.do_commit(b'Files with different encodings',
-             committer=b'Test Committer <test@nodomain.com>',
-             author=b'Test Author <test@nodomain.com>',
-             commit_timestamp=12395, commit_timezone=0,
-             author_timestamp=12395, author_timezone=0,
-             ref=None, merge_heads=[self._root_commit])
+        commit_sha = r.do_commit(
+            b'Files with different encodings',
+            committer=b'Test Committer <test@nodomain.com>',
+            author=b'Test Author <test@nodomain.com>',
+            commit_timestamp=12395, commit_timezone=0,
+            author_timestamp=12395, author_timezone=0,
+            ref=None, merge_heads=[self._root_commit])
 
         for name, encoding in zip(names, encodings):
             mode, id = tree_lookup_path(r.get_object, r[commit_sha].tree, name)

+ 44 - 29
dulwich/tests/test_server.py

@@ -152,7 +152,8 @@ class HandlerTestCase(TestCase):
         self.assertSucceeds(set_caps, [b'cap2', b'ignoreme'])
 
     def test_has_capability(self):
-        self.assertRaises(GitProtocolError, self._handler.has_capability, b'cap')
+        self.assertRaises(GitProtocolError, self._handler.has_capability,
+                          b'cap')
         caps = self._handler.capabilities()
         self._handler.set_client_capabilities(caps)
         for cap in caps:
@@ -205,7 +206,7 @@ class UploadPackHandlerTestCase(TestCase):
         caps = list(self._handler.required_capabilities()) + [b'include-tag']
         self._handler.set_client_capabilities(caps)
         self.assertEqual({b'1234' * 10: ONE, b'5678' * 10: TWO},
-                          self._handler.get_tagged(refs, repo=self._repo))
+                         self._handler.get_tagged(refs, repo=self._repo))
 
         # non-include-tag case
         caps = self._handler.required_capabilities()
@@ -291,6 +292,7 @@ class TestUploadPackHandler(UploadPackHandler):
     def required_capabilities(self):
         return ()
 
+
 class ReceivePackHandlerTestCase(TestCase):
 
     def setUp(self):
@@ -319,8 +321,9 @@ class ProtocolGraphWalkerEmptyTestCase(TestCase):
         self._repo = MemoryRepo.init_bare([], {})
         backend = DictBackend({b'/': self._repo})
         self._walker = ProtocolGraphWalker(
-            TestUploadPackHandler(backend, [b'/', b'host=lolcats'], TestProto()),
-            self._repo.object_store, self._repo.get_peeled)
+                TestUploadPackHandler(backend, [b'/', b'host=lolcats'],
+                                      TestProto()),
+                self._repo.object_store, self._repo.get_peeled)
 
     def test_empty_repository(self):
         # The server should wait for a flush packet.
@@ -333,7 +336,6 @@ class ProtocolGraphWalkerEmptyTestCase(TestCase):
         self.assertEqual(None, self._walker.proto.get_received_line())
 
 
-
 class ProtocolGraphWalkerTestCase(TestCase):
 
     def setUp(self):
@@ -352,8 +354,9 @@ class ProtocolGraphWalkerTestCase(TestCase):
         self._repo = MemoryRepo.init_bare(commits, {})
         backend = DictBackend({b'/': self._repo})
         self._walker = ProtocolGraphWalker(
-            TestUploadPackHandler(backend, [b'/', b'host=lolcats'], TestProto()),
-            self._repo.object_store, self._repo.get_peeled)
+                TestUploadPackHandler(backend, [b'/', b'host=lolcats'],
+                                      TestProto()),
+                self._repo.object_store, self._repo.get_peeled)
 
     def test_all_wants_satisfied_no_haves(self):
         self._walker.set_wants([ONE])
@@ -391,9 +394,9 @@ class ProtocolGraphWalkerTestCase(TestCase):
     def test_split_proto_line(self):
         allowed = (b'want', b'done', None)
         self.assertEqual((b'want', ONE),
-                          _split_proto_line(b'want ' + ONE + b'\n', allowed))
+                         _split_proto_line(b'want ' + ONE + b'\n', allowed))
         self.assertEqual((b'want', TWO),
-                          _split_proto_line(b'want ' + TWO + b'\n', allowed))
+                         _split_proto_line(b'want ' + TWO + b'\n', allowed))
         self.assertRaises(GitProtocolError, _split_proto_line,
                           b'want xxxx\n', allowed)
         self.assertRaises(UnexpectedCommandError, _split_proto_line,
@@ -401,7 +404,8 @@ class ProtocolGraphWalkerTestCase(TestCase):
         self.assertRaises(GitProtocolError, _split_proto_line,
                           b'foo ' + FOUR + b'\n', allowed)
         self.assertRaises(GitProtocolError, _split_proto_line, b'bar', allowed)
-        self.assertEqual((b'done', None), _split_proto_line(b'done\n', allowed))
+        self.assertEqual((b'done', None),
+                         _split_proto_line(b'done\n', allowed))
         self.assertEqual((None, None), _split_proto_line(b'', allowed))
 
     def test_determine_wants(self):
@@ -427,16 +431,20 @@ class ProtocolGraphWalkerTestCase(TestCase):
         self._walker.advertise_refs = False
 
         self._walker.proto.set_output([b'want ' + FOUR + b' multi_ack', None])
-        self.assertRaises(GitProtocolError, self._walker.determine_wants, heads)
+        self.assertRaises(GitProtocolError, self._walker.determine_wants,
+                          heads)
 
         self._walker.proto.set_output([None])
         self.assertEqual([], self._walker.determine_wants(heads))
 
-        self._walker.proto.set_output([b'want ' + ONE + b' multi_ack', b'foo', None])
-        self.assertRaises(GitProtocolError, self._walker.determine_wants, heads)
+        self._walker.proto.set_output(
+                [b'want ' + ONE + b' multi_ack', b'foo', None])
+        self.assertRaises(GitProtocolError, self._walker.determine_wants,
+                          heads)
 
         self._walker.proto.set_output([b'want ' + FOUR + b' multi_ack', None])
-        self.assertRaises(GitProtocolError, self._walker.determine_wants, heads)
+        self.assertRaises(GitProtocolError, self._walker.determine_wants,
+                          heads)
 
     def test_determine_wants_advertisement(self):
         self._walker.proto.set_output([None])
@@ -554,8 +562,8 @@ class TestProtocolGraphWalker(object):
             return
         # Whether or not PACK is sent after is determined by this, so
         # record this value.
-        self.pack_sent = self._impl.handle_done(self.done_required,
-            self.done_received)
+        self.pack_sent = self._impl.handle_done(
+                self.done_required, self.done_received)
         return self.pack_sent
 
     def notify_done(self):
@@ -1001,8 +1009,8 @@ class FileSystemBackendTests(TestCase):
             self.backend = FileSystemBackend()
 
     def test_nonexistant(self):
-        self.assertRaises(NotGitRepository,
-            self.backend.open_repository, "/does/not/exist/unless/foo")
+        self.assertRaises(NotGitRepository, self.backend.open_repository,
+                          "/does/not/exist/unless/foo")
 
     def test_absolute(self):
         repo = self.backend.open_repository(self.path)
@@ -1011,8 +1019,9 @@ class FileSystemBackendTests(TestCase):
             os.path.normcase(os.path.abspath(self.repo.path)))
 
     def test_child(self):
-        self.assertRaises(NotGitRepository,
-            self.backend.open_repository, os.path.join(self.path, "foo"))
+        self.assertRaises(
+                NotGitRepository,
+                self.backend.open_repository, os.path.join(self.path, "foo"))
 
     def test_bad_repo_path(self):
         backend = FileSystemBackend()
@@ -1027,8 +1036,9 @@ class DictBackendTests(TestCase):
     def test_nonexistant(self):
         repo = MemoryRepo.init_bare([], {})
         backend = DictBackend({b'/': repo})
-        self.assertRaises(NotGitRepository,
-            backend.open_repository, "/does/not/exist/unless/foo")
+        self.assertRaises(
+                NotGitRepository, backend.open_repository,
+                "/does/not/exist/unless/foo")
 
     def test_bad_repo_path(self):
         repo = MemoryRepo.init_bare([], {})
@@ -1046,19 +1056,22 @@ class ServeCommandTests(TestCase):
         self.backend = DictBackend({})
 
     def serve_command(self, handler_cls, args, inf, outf):
-        return serve_command(handler_cls, [b"test"] + args, backend=self.backend,
-            inf=inf, outf=outf)
+        return serve_command(
+                handler_cls, [b"test"] + args, backend=self.backend, inf=inf,
+                outf=outf)
 
     def test_receive_pack(self):
         commit = make_commit(id=ONE, parents=[], commit_time=111)
         self.backend.repos[b"/"] = MemoryRepo.init_bare(
             [commit], {b"refs/heads/master": commit.id})
         outf = BytesIO()
-        exitcode = self.serve_command(ReceivePackHandler, [b"/"], BytesIO(b"0000"), outf)
+        exitcode = self.serve_command(ReceivePackHandler, [b"/"],
+                                      BytesIO(b"0000"), outf)
         outlines = outf.getvalue().splitlines()
         self.assertEqual(2, len(outlines))
-        self.assertEqual(b"1111111111111111111111111111111111111111 refs/heads/master",
-            outlines[0][4:].split(b"\x00")[0])
+        self.assertEqual(
+                b"1111111111111111111111111111111111111111 refs/heads/master",
+                outlines[0][4:].split(b"\x00")[0])
         self.assertEqual(b"0000", outlines[-1])
         self.assertEqual(0, exitcode)
 
@@ -1076,7 +1089,8 @@ class UpdateServerInfoTests(TestCase):
         update_server_info(self.repo)
         with open(os.path.join(self.path, ".git", "info", "refs"), 'rb') as f:
             self.assertEqual(b'', f.read())
-        with open(os.path.join(self.path, ".git", "objects", "info", "packs"), 'rb') as f:
+        p = os.path.join(self.path, ".git", "objects", "info", "packs")
+        with open(p, 'rb') as f:
             self.assertEqual(b'', f.read())
 
     def test_simple(self):
@@ -1087,5 +1101,6 @@ class UpdateServerInfoTests(TestCase):
         update_server_info(self.repo)
         with open(os.path.join(self.path, ".git", "info", "refs"), 'rb') as f:
             self.assertEqual(f.read(), commit_id + b'\trefs/heads/foo\n')
-        with open(os.path.join(self.path, ".git", "objects", "info", "packs"), 'rb') as f:
+        p = os.path.join(self.path, ".git", "objects", "info", "packs")
+        with open(p, 'rb') as f:
             self.assertEqual(f.read(), b'')

+ 18 - 14
dulwich/tests/test_walk.py

@@ -25,7 +25,6 @@ from itertools import (
     )
 
 from dulwich.diff_tree import (
-    CHANGE_ADD,
     CHANGE_MODIFY,
     CHANGE_RENAME,
     TreeChange,
@@ -177,9 +176,11 @@ class WalkerTest(TestCase):
             2, trees={1: [(b'a', blob_a1)],
                       2: [(b'a', blob_a2), (b'b', blob_b2)]})
         e1 = TestWalkEntry(c1, [TreeChange.add((b'a', F, blob_a1.id))])
-        e2 = TestWalkEntry(c2, [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
+        e2 = TestWalkEntry(
+                c2,
+                [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
                                            (b'a', F, blob_a2.id)),
-                                TreeChange.add((b'b', F, blob_b2.id))])
+                 TreeChange.add((b'b', F, blob_b2.id))])
         self.assertWalkYields([e2, e1], [c2.id])
 
     def test_changes_multiple_parents(self):
@@ -192,8 +193,9 @@ class WalkerTest(TestCase):
                    3: [(b'a', blob_a3), (b'b', blob_b2)]})
         # a is a modify/add conflict and b is not conflicted.
         changes = [[
-            TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a3.id)),
-            TreeChange.add((b'a', F, blob_a3.id)),
+                TreeChange(CHANGE_MODIFY,
+                           (b'a', F, blob_a1.id), (b'a', F, blob_a3.id)),
+                TreeChange.add((b'a', F, blob_a3.id)),
         ]]
         self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
                               exclude=[c1.id, c2.id])
@@ -293,7 +295,8 @@ class WalkerTest(TestCase):
         c1, c2, c3, c4, c5, c6 = self.make_linear_commits(6, trees=trees)
         self.assertWalkYields([c5], [c6.id], paths=[b'c'])
 
-        e = lambda n: (n, F, blob.id)
+        def e(n):
+            return (n, F, blob.id)
         self.assertWalkYields(
             [TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e(b'b'), e(b'c'))]),
              TestWalkEntry(c3, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'b'))]),
@@ -310,7 +313,8 @@ class WalkerTest(TestCase):
                       5: [(b'a', blob)],
                       6: [(b'c', blob)]})
 
-        e = lambda n: (n, F, blob.id)
+        def e(n):
+            return (n, F, blob.id)
         # Once the path changes to b, we aren't interested in a or c anymore.
         self.assertWalkYields(
             [TestWalkEntry(c6, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'c'))]),
@@ -356,8 +360,8 @@ class WalkerTest(TestCase):
           11, times=[9, 0, 1, 2, 3, 4, 5, 8, 6, 7, 9])
         c8, _, c10, c11 = commits[-4:]
         del self.store[commits[0].id]
-        # c9 is older than we want to walk, but is out of order with its parent,
-        # so we need to walk past it to get to c8.
+        # c9 is older than we want to walk, but is out of order with its
+        # parent, so we need to walk past it to get to c8.
         # c1 would also match, but we've deleted it, and it should get pruned
         # even with over-scanning.
         self.assertWalkYields([c11, c10, c8], [c11.id], since=7)
@@ -495,11 +499,11 @@ class WalkEntryTest(TestCase):
         entry_b = (b'y/b', F, blob_b.id)
         entry_b2 = (b'y/b', F, blob_b2.id)
         self.assertEqual(
-            [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2),
-             TreeChange.add(entry_a2)],
-            [TreeChange.add(entry_b2),
-             TreeChange(CHANGE_MODIFY, entry_b, entry_b2)]],
-            changes,
+                [[TreeChange(CHANGE_MODIFY, entry_a, entry_a2),
+                  TreeChange.add(entry_a2)],
+                 [TreeChange.add(entry_b2),
+                  TreeChange(CHANGE_MODIFY, entry_b, entry_b2)]],
+                changes,
         )
 
     def test_filter_changes(self):

+ 19 - 15
dulwich/tests/test_web.py

@@ -160,7 +160,7 @@ class DumbHandlersTestCase(WebTestCase):
         xs = b'x' * bufsize
         f = BytesIO(2 * xs)
         self.assertEqual([xs, xs],
-                          list(send_file(self._req, f, 'some/thing')))
+                         list(send_file(self._req, f, 'some/thing')))
         self.assertEqual(HTTP_OK, self._status)
         self.assertContentTypeEquals('some/thing')
         self.assertTrue(f.closed)
@@ -229,7 +229,8 @@ class DumbHandlersTestCase(WebTestCase):
         self.assertEqual(HTTP_ERROR, self._status)
 
     def test_get_pack_file(self):
-        pack_name = os.path.join('objects', 'pack', 'pack-%s.pack' % ('1' * 40))
+        pack_name = os.path.join(
+            'objects', 'pack', 'pack-%s.pack' % ('1' * 40))
         backend = _test_backend([], named_files={pack_name: b'pack contents'})
         mat = re.search('.*', pack_name)
         output = b''.join(get_pack_file(self._req, backend, mat))
@@ -268,10 +269,10 @@ class DumbHandlersTestCase(WebTestCase):
 
         mat = re.search('.*', '//info/refs')
         self.assertEqual([blob1.id + b'\trefs/heads/master\n',
-                           blob3.id + b'\trefs/tags/blob-tag\n',
-                           tag1.id + b'\trefs/tags/tag-tag\n',
-                           blob2.id + b'\trefs/tags/tag-tag^{}\n'],
-                          list(get_info_refs(self._req, backend, mat)))
+                          blob3.id + b'\trefs/tags/blob-tag\n',
+                          tag1.id + b'\trefs/tags/tag-tag\n',
+                          blob2.id + b'\trefs/tags/tag-tag^{}\n'],
+                         list(get_info_refs(self._req, backend, mat)))
         self.assertEqual(HTTP_OK, self._status)
         self.assertContentTypeEquals('text/plain')
         self.assertFalse(self._req.cached)
@@ -300,7 +301,8 @@ class DumbHandlersTestCase(WebTestCase):
         mat = re.search('.*', '//info/packs')
         output = b''.join(get_info_packs(self._req, backend, mat))
         expected = b''.join(
-            [(b'P pack-' + s + b'.pack\n') for s in [b'1' * 40, b'2' * 40, b'3' * 40]])
+            [(b'P pack-' + s + b'.pack\n')
+             for s in [b'1' * 40, b'2' * 40, b'3' * 40]])
         self.assertEqual(expected, output)
         self.assertEqual(HTTP_OK, self._status)
         self.assertContentTypeEquals('text/plain')
@@ -374,9 +376,9 @@ class SmartHandlersTestCase(WebTestCase):
         handler_output = b''.join(get_info_refs(self._req, b'backend', mat))
         write_output = self._output.getvalue()
         self.assertEqual((b'001e# service=git-upload-pack\n'
-                           b'0000'
-                           # input is ignored by the handler
-                           b'handled input: '), write_output)
+                          b'0000'
+                          # input is ignored by the handler
+                          b'handled input: '), write_output)
         # Ensure all output was written via the write callback.
         self.assertEqual(b'', handler_output)
         self.assertTrue(self._handler.advertise_refs)
@@ -412,7 +414,7 @@ class HTTPGitRequestTestCase(WebTestCase):
         self.assertEqual(message.encode('ascii'), self._req.not_found(message))
         self.assertEqual(HTTP_NOT_FOUND, self._status)
         self.assertEqual(set([('Content-Type', 'text/plain')]),
-                          set(self._headers))
+                         set(self._headers))
 
     def test_forbidden(self):
         self._req.cache_forever()  # cache headers should be discarded
@@ -420,7 +422,7 @@ class HTTPGitRequestTestCase(WebTestCase):
         self.assertEqual(message.encode('ascii'), self._req.forbidden(message))
         self.assertEqual(HTTP_FORBIDDEN, self._status)
         self.assertEqual(set([('Content-Type', 'text/plain')]),
-                          set(self._headers))
+                         set(self._headers))
 
     def test_respond_ok(self):
         self._req.respond()
@@ -526,7 +528,8 @@ class GunzipTestCase(HTTPGitApplicationTestCase):
         require '.seek()'. See https://github.com/jelmer/dulwich/issues/140.)
         """
         zstream, zlength = self._get_zstream(self.example_text)
-        self._test_call(self.example_text,
+        self._test_call(
+            self.example_text,
             MinimalistWSGIInputStream(zstream.read()), zlength)
 
     def test_call_no_working_seek(self):
@@ -535,5 +538,6 @@ class GunzipTestCase(HTTPGitApplicationTestCase):
         (but defunct).  See https://github.com/jonashaag/klaus/issues/154.
         """
         zstream, zlength = self._get_zstream(self.example_text)
-        self._test_call(self.example_text,
-            MinimalistWSGIInputStream2(zstream.read()), zlength)
+        self._test_call(
+                self.example_text,
+                MinimalistWSGIInputStream2(zstream.read()), zlength)

+ 13 - 12
dulwich/tests/utils.py

@@ -50,9 +50,9 @@ from dulwich.pack import (
     create_delta,
     )
 from dulwich.repo import Repo
-from dulwich.tests import (
-    SkipTest,
+from dulwich.tests import (  # noqa: F401
     skipIf,
+    SkipTest,
     )
 
 
@@ -92,7 +92,8 @@ def make_object(cls, **attrs):
     """Make an object for testing and assign some members.
 
     This method creates a new subclass to allow arbitrary attribute
-    reassignment, which is not otherwise possible with objects having __slots__.
+    reassignment, which is not otherwise possible with objects having
+    __slots__.
 
     :param attrs: dict of attributes to set on the new object.
     :return: A newly initialized object of type cls.
@@ -101,9 +102,9 @@ def make_object(cls, **attrs):
     class TestObject(cls):
         """Class that inherits from the given class, but without __slots__.
 
-        Note that classes with __slots__ can't have arbitrary attributes monkey-
-        patched in, so this is a class that is exactly the same only with a
-        __dict__ instead of __slots__.
+        Note that classes with __slots__ can't have arbitrary attributes
+        monkey-patched in, so this is a class that is exactly the same only
+        with a __dict__ instead of __slots__.
         """
         pass
     TestObject.__name__ = 'TestObject_' + cls.__name__
@@ -286,15 +287,15 @@ def build_commit_graph(object_store, commit_spec, trees=None, attrs=None):
 
     :param object_store: An ObjectStore to commit objects to.
     :param commit_spec: An iterable of iterables of ints defining the commit
-        graph. Each entry defines one commit, and entries must be in topological
-        order. The first element of each entry is a commit number, and the
-        remaining elements are its parents. The commit numbers are only
+        graph. Each entry defines one commit, and entries must be in
+        topological order. The first element of each entry is a commit number,
+        and the remaining elements are its parents. The commit numbers are only
         meaningful for the call to make_commits; since real commit objects are
         created, they will get created with real, opaque SHAs.
     :param trees: An optional dict of commit number -> tree spec for building
-        trees for commits. The tree spec is an iterable of (path, blob, mode) or
-        (path, blob) entries; if mode is omitted, it defaults to the normal file
-        mode (0100644).
+        trees for commits. The tree spec is an iterable of (path, blob, mode)
+        or (path, blob) entries; if mode is omitted, it defaults to the normal
+        file mode (0100644).
     :param attrs: A dict of commit number -> (dict of attribute -> value) for
         assigning additional values to the commits.
     :return: The list of commit objects created.

+ 18 - 18
dulwich/walk.py

@@ -37,7 +37,6 @@ from dulwich.errors import (
     MissingCommitError,
     )
 from dulwich.objects import (
-    Commit,
     Tag,
     )
 
@@ -67,8 +66,8 @@ class WalkEntry(object):
             use to filter changes. Must be a directory name. Must be
             a full, valid, path reference (no partial names or wildcards).
         :return: For commits with up to one parent, a list of TreeChange
-            objects; if the commit has no parents, these will be relative to the
-            empty tree. For merge commits, a list of lists of TreeChange
+            objects; if the commit has no parents, these will be relative to
+            the empty tree. For merge commits, a list of lists of TreeChange
             objects; see dulwich.diff.tree_changes_for_merge.
         """
         cached = self._changes.get(path_prefix)
@@ -88,7 +87,8 @@ class WalkEntry(object):
                     parent = self._store[subtree_sha]
             else:
                 changes_func = tree_changes_for_merge
-                parent = [self._store[p].tree for p in self._get_parents(commit)]
+                parent = [
+                        self._store[p].tree for p in self._get_parents(commit)]
                 if path_prefix:
                     parent_trees = [self._store[p] for p in parent]
                     parent = []
@@ -192,20 +192,20 @@ class _CommitTimeQueue(object):
                                     for _, c in self._pq):
                     _, n = self._pq[0]
                     if self._last and n.commit_time >= self._last.commit_time:
-                        # If the next commit is newer than the last one, we need
-                        # to keep walking in case its parents (which we may not
-                        # have seen yet) are excluded. This gives the excluded
-                        # set a chance to "catch up" while the commit is still
-                        # in the Walker's output queue.
+                        # If the next commit is newer than the last one, we
+                        # need to keep walking in case its parents (which we
+                        # may not have seen yet) are excluded. This gives the
+                        # excluded set a chance to "catch up" while the commit
+                        # is still in the Walker's output queue.
                         reset_extra_commits = True
                     else:
                         reset_extra_commits = False
 
             if (self._min_time is not None and
-                commit.commit_time < self._min_time):
+                    commit.commit_time < self._min_time):
                 # We want to stop walking at min_time, but commits at the
-                # boundary may be out of order with respect to their parents. So
-                # we walk _MAX_EXTRA_COMMITS more commits once we hit this
+                # boundary may be out of order with respect to their parents.
+                # So we walk _MAX_EXTRA_COMMITS more commits once we hit this
                 # boundary.
                 reset_extra_commits = False
 
@@ -245,8 +245,8 @@ class Walker(object):
             ancestors.
         :param exclude: Iterable of SHAs of commits to exclude along with their
             ancestors, overriding includes.
-        :param order: ORDER_* constant specifying the order of results. Anything
-            other than ORDER_DATE may result in O(n) memory usage.
+        :param order: ORDER_* constant specifying the order of results.
+            Anything other than ORDER_DATE may result in O(n) memory usage.
         :param reverse: If True, reverse the order of output, requiring O(n)
             memory.
         :param max_entries: The maximum number of entries to yield, or None for
@@ -320,8 +320,8 @@ class Walker(object):
         """Determine if a walk entry should be returned..
 
         :param entry: The WalkEntry to consider.
-        :return: True if the WalkEntry should be returned by this walk, or False
-            otherwise (e.g. if it doesn't match any requested paths).
+        :return: True if the WalkEntry should be returned by this walk, or
+            False otherwise (e.g. if it doesn't match any requested paths).
         """
         commit = entry.commit
         if self.since is not None and commit.commit_time < self.since:
@@ -368,8 +368,8 @@ class Walker(object):
 
         :param results: An iterator of WalkEntry objects, in the order returned
             from the queue_cls.
-        :return: An iterator or list of WalkEntry objects, in the order required
-            by the Walker.
+        :return: An iterator or list of WalkEntry objects, in the order
+            required by the Walker.
         """
         if self.order == ORDER_TOPO:
             results = _topo_reorder(results, self.get_parents)

+ 18 - 10
dulwich/web.py

@@ -90,8 +90,8 @@ def url_prefix(mat):
 
     :param mat: A regex match object.
     :returns: The URL prefix, defined as the text before the match in the
-        original string. Normalized to start with one leading slash and end with
-        zero.
+        original string. Normalized to start with one leading slash and end
+        with zero.
     """
     return '/' + mat.string[:mat.start()].strip('/')
 
@@ -182,11 +182,13 @@ def get_info_refs(req, backend, mat):
             yield req.forbidden('Unsupported service')
             return
         req.nocache()
-        write = req.respond(HTTP_OK, 'application/x-%s-advertisement' % service)
+        write = req.respond(
+            HTTP_OK, 'application/x-%s-advertisement' % service)
         proto = ReceivableProtocol(BytesIO().read, write)
         handler = handler_cls(backend, [url_prefix(mat)], proto,
                               http_req=req, advertise_refs=True)
-        handler.proto.write_pkt_line(b'# service=' + service.encode('ascii') + b'\n')
+        handler.proto.write_pkt_line(
+            b'# service=' + service.encode('ascii') + b'\n')
         handler.proto.write_pkt_line(None)
         handler.handle()
     else:
@@ -323,9 +325,12 @@ class HTTPGitApplication(object):
       ('GET', re.compile('/objects/info/alternates$')): get_text_file,
       ('GET', re.compile('/objects/info/http-alternates$')): get_text_file,
       ('GET', re.compile('/objects/info/packs$')): get_info_packs,
-      ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')): get_loose_object,
-      ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')): get_pack_file,
-      ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')): get_idx_file,
+      ('GET', re.compile('/objects/([0-9a-f]{2})/([0-9a-f]{38})$')):
+      get_loose_object,
+      ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.pack$')):
+      get_pack_file,
+      ('GET', re.compile('/objects/pack/pack-([0-9a-f]{40})\\.idx$')):
+      get_idx_file,
 
       ('POST', re.compile('/git-upload-pack$')): handle_service_request,
       ('POST', re.compile('/git-receive-pack$')): handle_service_request,
@@ -385,7 +390,8 @@ class GunzipFilter(object):
                 shutil.copyfileobj(environ['wsgi.input'], wsgi_input)
                 wsgi_input.seek(0)
 
-            environ['wsgi.input'] = gzip.GzipFile(filename=None, fileobj=wsgi_input, mode='r')
+            environ['wsgi.input'] = gzip.GzipFile(
+                filename=None, fileobj=wsgi_input, mode='r')
             del environ['HTTP_CONTENT_ENCODING']
             if 'CONTENT_LENGTH' in environ:
                 del environ['CONTENT_LENGTH']
@@ -456,7 +462,7 @@ class WSGIRequestHandlerLogger(WSGIRequestHandler):
         """Handle a single HTTP request"""
 
         self.raw_requestline = self.rfile.readline()
-        if not self.parse_request(): # An error code has been sent, just exit
+        if not self.parse_request():  # An error code has been sent, just exit
             return
 
         handler = ServerHandlerLogger(
@@ -470,7 +476,9 @@ class WSGIServerLogger(WSGIServer):
 
     def handle_error(self, request, client_address):
         """Handle an error. """
-        logger.exception('Exception happened during processing of request from %s' % str(client_address))
+        logger.exception(
+            'Exception happened during processing of request from %s' %
+            str(client_address))
 
 
 def main(argv=sys.argv):

+ 29 - 21
setup.py

@@ -8,16 +8,18 @@ try:
 except ImportError:
     from distutils.core import setup, Extension
 from distutils.core import Distribution
+import os
+import sys
 
-dulwich_version_string = '0.17.3'
+dulwich_version_string = '0.18.0'
 
 include_dirs = []
 # Windows MSVC support
-import os
-import sys
-if sys.platform == 'win32':
+if sys.platform == 'win32' and sys.version_info[:2] < (3, 6):
+    # Include dulwich/ for fallback stdint.h
     include_dirs.append('dulwich')
 
+
 class DulwichDistribution(Distribution):
 
     def is_pure(self):
@@ -33,6 +35,7 @@ class DulwichDistribution(Distribution):
 
     pure = False
 
+
 if sys.platform == 'darwin' and os.path.exists('/usr/bin/xcodebuild'):
     # XCode 4.0 dropped support for ppc architecture, which is hardcoded in
     # distutils.sysconfig
@@ -48,31 +51,34 @@ if sys.platform == 'darwin' and os.path.exists('/usr/bin/xcodebuild'):
             os.environ['ARCHFLAGS'] = ''
 
 tests_require = ['fastimport']
-if not '__pypy__' in sys.modules and not sys.platform == 'win32':
+
+
+if '__pypy__' not in sys.modules and not sys.platform == 'win32':
     tests_require.extend([
         'gevent', 'geventhttpclient', 'mock', 'setuptools>=17.1'])
 
-if sys.version_info[0] > 2 and sys.platform == 'win32':
-    # C Modules don't build for python3 windows, and prevent tests from running
-    ext_modules = []
-else:
-    ext_modules = [
-        Extension('dulwich._objects', ['dulwich/_objects.c'],
-                  include_dirs=include_dirs),
-        Extension('dulwich._pack', ['dulwich/_pack.c'],
-                  include_dirs=include_dirs),
-        Extension('dulwich._diff_tree', ['dulwich/_diff_tree.c'],
-                  include_dirs=include_dirs),
-    ]
+ext_modules = [
+    Extension('dulwich._objects', ['dulwich/_objects.c'],
+              include_dirs=include_dirs),
+    Extension('dulwich._pack', ['dulwich/_pack.c'],
+              include_dirs=include_dirs),
+    Extension('dulwich._diff_tree', ['dulwich/_diff_tree.c'],
+              include_dirs=include_dirs),
+]
 
 
+if sys.platform == 'win32':
+    # Win32 setup breaks with non-ascii characters.
+    author = "Jelmer Vernooij"
+else:
+    author = "Jelmer Vernooij"
+
 setup(name='dulwich',
       description='Python Git Library',
       keywords='git',
       version=dulwich_version_string,
       url='https://www.dulwich.io/',
       license='Apachev2 or later or GPLv2',
-      author='Jelmer Vernooij',
       author_email='jelmer@jelmer.uk',
       long_description="""
       Python implementation of the Git file formats and protocols,
@@ -81,10 +87,11 @@ setup(name='dulwich',
       All functionality is available in pure Python. Optional
       C extensions can be built for improved performance.
 
-      The project is named after the part of London that Mr. and Mrs. Git live in
-      in the particular Monty Python sketch.
+      The project is named after the part of London that Mr. and Mrs. Git live
+      in in the particular Monty Python sketch.
       """,
-      packages=['dulwich', 'dulwich.tests', 'dulwich.tests.compat', 'dulwich.contrib'],
+      packages=['dulwich', 'dulwich.tests', 'dulwich.tests.compat',
+                'dulwich.contrib'],
       package_data={'': ['../docs/tutorial/*.txt']},
       scripts=['bin/dulwich', 'bin/dul-receive-pack', 'bin/dul-upload-pack'],
       classifiers=[
@@ -98,6 +105,7 @@ setup(name='dulwich',
           'Programming Language :: Python :: Implementation :: CPython',
           'Programming Language :: Python :: Implementation :: PyPy',
           'Operating System :: POSIX',
+          'Operating System :: Microsoft :: Windows',
           'Topic :: Software Development :: Version Control',
       ],
       ext_modules=ext_modules,