فهرست منبع

Import dulwich_0.18.1.orig.tar.gz

[dgit import orig dulwich_0.18.1.orig.tar.gz]
Jelmer Vernooij 7 سال پیش
کامیت
0744ba4796
100فایلهای تغییر یافته به همراه25272 افزوده شده و 0 حذف شده
  1. 4 0
      .testr.conf
  2. 41 0
      .travis.yml
  3. 132 0
      AUTHORS
  4. 51 0
      CONTRIBUTING.md
  5. 548 0
      COPYING
  6. 18 0
      MANIFEST.in
  7. 71 0
      Makefile
  8. 1584 0
      NEWS
  9. 32 0
      PKG-INFO
  10. 88 0
      README.md
  11. 133 0
      README.swift.md
  12. 2 0
      TODO
  13. 96 0
      appveyor.yml
  14. 30 0
      bin/dul-receive-pack
  15. 30 0
      bin/dul-upload-pack
  16. 591 0
      bin/dulwich
  17. 96 0
      docs/Makefile
  18. 218 0
      docs/conf.py
  19. 36 0
      docs/index.txt
  20. 121 0
      docs/make.bat
  21. 11 0
      docs/performance.txt
  22. 65 0
      docs/protocol.txt
  23. 12 0
      docs/tutorial/Makefile
  24. 13 0
      docs/tutorial/conclusion.txt
  25. 26 0
      docs/tutorial/encoding.txt
  26. 99 0
      docs/tutorial/file-format.txt
  27. 19 0
      docs/tutorial/index.txt
  28. 20 0
      docs/tutorial/introduction.txt
  29. 187 0
      docs/tutorial/object-store.txt
  30. 40 0
      docs/tutorial/porcelain.txt
  31. 84 0
      docs/tutorial/remote.txt
  32. 101 0
      docs/tutorial/repo.txt
  33. 57 0
      docs/tutorial/tag.txt
  34. 5 0
      dulwich.cfg
  35. 32 0
      dulwich.egg-info/PKG-INFO
  36. 200 0
      dulwich.egg-info/SOURCES.txt
  37. 1 0
      dulwich.egg-info/dependency_links.txt
  38. 1 0
      dulwich.egg-info/top_level.txt
  39. 25 0
      dulwich/__init__.py
  40. 504 0
      dulwich/_diff_tree.c
  41. 329 0
      dulwich/_objects.c
  42. 313 0
      dulwich/_pack.c
  43. 118 0
      dulwich/archive.py
  44. 1360 0
      dulwich/client.py
  45. 445 0
      dulwich/config.py
  46. 30 0
      dulwich/contrib/__init__.py
  47. 138 0
      dulwich/contrib/paramiko_vendor.py
  48. 143 0
      dulwich/contrib/release_robot.py
  49. 1052 0
      dulwich/contrib/swift.py
  50. 127 0
      dulwich/contrib/test_release_robot.py
  51. 656 0
      dulwich/contrib/test_swift.py
  52. 317 0
      dulwich/contrib/test_swift_smoke.py
  53. 600 0
      dulwich/diff_tree.py
  54. 180 0
      dulwich/errors.py
  55. 250 0
      dulwich/fastexport.py
  56. 176 0
      dulwich/file.py
  57. 142 0
      dulwich/greenthreads.py
  58. 152 0
      dulwich/hooks.py
  59. 358 0
      dulwich/ignore.py
  60. 648 0
      dulwich/index.py
  61. 70 0
      dulwich/log_utils.py
  62. 371 0
      dulwich/lru_cache.py
  63. 1128 0
      dulwich/object_store.py
  64. 1336 0
      dulwich/objects.py
  65. 178 0
      dulwich/objectspec.py
  66. 1967 0
      dulwich/pack.py
  67. 317 0
      dulwich/patch.py
  68. 1141 0
      dulwich/porcelain.py
  69. 505 0
      dulwich/protocol.py
  70. 76 0
      dulwich/reflog.py
  71. 795 0
      dulwich/refs.py
  72. 1160 0
      dulwich/repo.py
  73. 1138 0
      dulwich/server.py
  74. 19 0
      dulwich/stdint.h
  75. 184 0
      dulwich/tests/__init__.py
  76. 40 0
      dulwich/tests/compat/__init__.py
  77. 311 0
      dulwich/tests/compat/server_utils.py
  78. 542 0
      dulwich/tests/compat/test_client.py
  79. 156 0
      dulwich/tests/compat/test_pack.py
  80. 217 0
      dulwich/tests/compat/test_repository.py
  81. 100 0
      dulwich/tests/compat/test_server.py
  82. 93 0
      dulwich/tests/compat/test_utils.py
  83. 206 0
      dulwich/tests/compat/test_web.py
  84. 256 0
      dulwich/tests/compat/utils.py
  85. BIN
      dulwich/tests/data/blobs/11/11111111111111111111111111111111111111
  86. BIN
      dulwich/tests/data/blobs/6f/670c0fb53f9463760b7295fbb814e965fb20c8
  87. BIN
      dulwich/tests/data/blobs/95/4a536f7819d40e6f637f849ee187dd10066349
  88. BIN
      dulwich/tests/data/blobs/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
  89. 2 0
      dulwich/tests/data/commits/0d/89f20333fbb1d2f3a94da77f4981373d8f4310
  90. BIN
      dulwich/tests/data/commits/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc
  91. 2 0
      dulwich/tests/data/commits/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e
  92. BIN
      dulwich/tests/data/indexes/index
  93. BIN
      dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx
  94. BIN
      dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack
  95. 1 0
      dulwich/tests/data/repos/.gitattributes
  96. 1 0
      dulwich/tests/data/repos/a.git/HEAD
  97. 2 0
      dulwich/tests/data/repos/a.git/objects/28/237f4dc30d0d462658d6b937b08a0f0b6ef55a
  98. BIN
      dulwich/tests/data/repos/a.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91
  99. BIN
      dulwich/tests/data/repos/a.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec
  100. BIN
      dulwich/tests/data/repos/a.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9

+ 4 - 0
.testr.conf

@@ -0,0 +1,4 @@
+[DEFAULT]
+test_command=PYTHONPATH=. python -m subunit.run $IDOPTION $LISTOPT dulwich.tests.test_suite
+test_id_option=--load-list $IDFILE
+test_list_option=--list

+ 41 - 0
.travis.yml

@@ -0,0 +1,41 @@
+language: python
+sudo: false
+cache: pip
+
+python:
+  - 2.7
+  - 3.3
+  - 3.4
+  - 3.5
+  - 3.5-dev
+  - 3.6
+  - 3.6-dev
+  - 3.7-dev
+  - pypy3.3-5.2-alpha1
+
+env:
+  - PYTHONHASHSEED=random
+    TEST_REQUIRE="gevent greenlet geventhttpclient fastimport"
+
+matrix:
+  include:
+    - python: pypy
+      env: TEST_REQUIRE=fastimport
+
+install:
+  - travis_retry pip install -U pip coverage codecov flake8 $TEST_REQUIRE
+
+script:
+  # Test without c extensions
+  - python -m coverage run -p --source=dulwich -m unittest dulwich.tests.test_suite
+
+  # Test with c extensions
+  - python setup.py build_ext -i
+  - python -m coverage run -p --source=dulwich -m unittest dulwich.tests.test_suite
+
+  # Style
+  - make style
+
+after_success:
+  - python -m coverage combine
+  - codecov

+ 132 - 0
AUTHORS

@@ -0,0 +1,132 @@
+Jelmer Vernooij <jelmer@jelmer.uk>
+Dave Borowitz <dborowitz@google.com>
+John Carr <john.carr@unrouted.co.uk>
+Gary van der Merwe <garyvdm@gmail.com>
+milki <milki@rescomp.berkeley.edu>
+Augie Fackler <durin42@gmail.com>
+Tay Ray Chuan <rctay89@gmail.com>
+Risto Kankkunen <risto.kankkunen@iki.fi>
+Jonas Haag <jonas@lophus.org>
+Fabien Boucher <fabien.boucher@enovance.com>
+James Westby <jw+debian@jameswestby.net>
+Mike Edgar <adgar@google.com>
+Koen Martens <gmc@sonologic.nl>
+Abderrahim Kitouni <a.kitouni@gmail.com>
+William Grant <william.grant@canonical.com>
+Marcin Kuzminski <marcin@python-works.com>
+Ryan Faulkner <rfaulk@yahoo-inc.com>
+Julian Berman <Julian@GrayVines.com>
+Mark Mikofski <mark.mikofski@sunpowercorp.com>
+Michael K <michael-k@users.noreply.github.com>
+Ali Sabil <ali.sabil@gmail.com>
+Damien Tournoud <damien@commerceguys.com>
+Hannu Valtonen <hannu.valtonen@ohmu.fi>
+Mika Mäenpää <mika.j.maenpaa@iki.fi>
+Paul Hummer <paul@eventuallyanyway.com>
+Lele Gaifax <lele@metapensiero.it>
+Lukasz Balcerzak <lukasz.balcerzak@python-center.org>
+Tommy Yu <tommy.yu@auckland.ac.nz>
+anatoly techtonik <techtonik@gmail.com>
+bmcorser <bmcorser@gmail.com>
+Brendan Cully <brendan@kublai.com>
+Chow Loong Jin <hyperair@debian.org>
+Chris Eberle <eberle1080@gmail.com>
+Dmitriy <dkomarov@gmail.com>
+Hervé Cauwelier <herve@oursours.net>
+Hugo Osvaldo Barrera <hugo@barrera.io>
+Jameson Nash <jameson@mit.edu>
+Marc Brinkmann <git@marcbrinkmann.de>
+Nicolas Dandrimont <nicolas@dandrimont.eu>
+Robert Brown <robert.brown@gmail.com>
+Siddharth Agarwal <sid0@fb.com>
+Stefan Zimmermann <zimmermann.code@gmail.com>
+Takeshi Kanemoto <tak.kanemoto@gmail.com>
+Yifan Zhang <yifan@wavii.com>
+Aaron O'Mullan <aaron.omullan@friendco.de>
+Adam "Cezar" Jenkins <emperorcezar@gmail.com>
+Alberto Ruiz <aruiz@gnome.org>
+Alexander Belchenko <bialix@ukr.net>
+Andreas Kloeckner <inform@tiker.net>
+André Roth <neolynx@gmail.com>
+Benjamin Pollack <benjamin@bitquabit.com>
+Benoit HERVIER <khertan@khertan.net>
+Dan Callaghan <dcallagh@redhat.com>
+David Keijser <david.keijser@klarna.com>
+David Ostrovsky <david@ostrovsky.org>
+David Pursehouse <david.pursehouse@gmail.com>
+Dmitrij D. Czarkoff <czarkoff@gmail.com>
+Doug Hellmann <doug@doughellmann.com>
+Dov Feldstern <dovdevel@gmail.com>
+Félix Mattrat <felix@dysosmus.net>
+Hwee Miin Koh <hwee-miin.koh@ubisoft.com>
+Jason R. Coombs <jaraco@jaraco.com>
+Jeremy Whitlock <jcscoobyrs@gmail.com>
+John Arbash Meinel <john@arbash-meinel.com>
+Laurent Rineau <laurent.rineau@cgal.org>
+Martin Packman <gzlist@googlemail.com>
+Max Shawabkeh <max99x@gmail.com>
+Michael Hudson <michael.hudson@canonical.com>
+Nick Stenning <nick@whiteink.com>
+Nick Ward <ward.nickjames@gmail.com>
+Paul Chen <lancevdance@gmail.com>
+Roland Mas <lolando@debian.org>
+Ronald Blaschke <ron@rblasch.org>
+Ronny Pfannschmidt <Ronny.Pfannschmidt@gmx.de>
+Ross Light <ross@zombiezen.com>
+Ryan McKern <ryan@orangefort.com>
+Ted Horst <ted.horst@earthlink.net>
+Thomas Liebetraut <thomas@tommie-lie.de>
+Timo Schmid <info@bluec0re.eu>
+Víðir Valberg Guðmundsson <vidir.valberg@orn.li>
+dak180 <dak180@users.sourceforge.net>
+Akbar Gumbira <akbargumbira@gmail.com>
+Alex Holmes <alex.holmes@isotoma.com>
+Andi McClure <andi.m.mcclure@gmail.com>
+Andres Lowrie <andres.lowrie@gmail.com>
+Artem Tikhomirov <artem.tikhomirov@syntevo.com>
+Brian Visel <eode@eptitude.net>
+Bruce Duncan <Bruce.Duncan@ed.ac.uk>
+Bruno Renié <brutasse@gmail.com>
+Chaiwat Suttipongsakul <cwt@bashell.com>
+Chris Bunney <crbunney@users.noreply.github.com>
+Chris Reid <chris@reidsy.com>
+Daniele Sluijters <daniele.sluijters@gmail.com>
+David Bennett <davbennett@google.com>
+David Blewett <davidb@sixfeetup.com>
+David Carr <david@carrclan.us>
+Dirk <dirk@opani.com>
+Elan Ruusamäe <glen@delfi.ee>
+Forrest Hopkins <fingerheroes@gmail.com>
+Hal Wine <hal.wine@gmail.com>
+Hans Kolek <hkolek@gmail.com>
+Jakub Wilk <jwilk@jwilk.net>
+JonChu <jchonphoenix@gmail.com>
+Kostis Anagnostopoulos <ankostis@gmail.com>
+Kyle Kelly <kkelly@yelp.com>
+Lionel Flandrin <lionel@svkt.org>
+Max Bowsher <_@maxb.eu>
+Mike Williams <miwilliams@google.com>
+Mikhail Terekhov <terekhov@emc.com>
+Nix <nix@esperi.co.uk>
+OnMaster <wme@CONTACT.DE>
+Pascal Quantin <pascal.quantin@gmail.com>
+Ricardo Salveti <ricardo.salveti@openbossa.org>
+Rod Cloutier <rodcloutier@gmail.com>
+Sam Vilain <svilain@saymedia.com>
+Stefano Rivera <stefano@rivera.za.net>
+Steven Myint <git@stevenmyint.com>
+Søren Løvborg <sorenl@unity3d.com>
+Travis Cline <travis.cline@gmail.com>
+Victor Stinner <vstinner@redhat.com>
+Volodymyr Holovko <vholovko@gmail.com>
+Yuval Langer <yuval.langer@gmail.com>
+codingtony <tony.bussieres@gmail.com>
+jon bain <jsbain@yahoo.com>
+kwatters <kwatters@tagged.com>
+max <max0d41@github.com>
+Segev Finer <segev208@gmail.com>
+fviolette <fviolette@talend.com>
+dzhuang <dzhuang.scut@gmail.com>
+Antoine Pietri <antoine.pietri1@gmail.com>
+
+If you contributed but are missing from this list, please send me an e-mail.

+ 51 - 0
CONTRIBUTING.md

@@ -0,0 +1,51 @@
+All functionality should be available in pure Python. Optional C
+implementations may be written for performance reasons, but should never
+replace the Python implementation. The C implementations should follow the
+kernel/git coding style.
+
+Where possible include updates to NEWS along with your improvements.
+
+New functionality and bug fixes should be accompanied by matching unit tests.
+
+Coding style
+------------
+Where possible, please follow PEP8 with regard to coding style.
+
+Furthermore, triple-quotes should always be """, single quotes are ' unless
+using " would result in less escaping within the string.
+
+Public methods, functions and classes should all have doc strings. Please use
+epydoc style docstrings to document parameters and return values.
+You can generate the documentation by running "make doc".
+
+Running the tests
+-----------------
+To run the testsuite, you should be able to simply run "make check". This
+will run the tests using unittest.
+
+ $ make check
+
+Tox configuration is also present as well as a Travis configuration file.
+
+String Types
+------------
+Like Linux, Git treats filenames as arbitrary bytestrings. There is no prescribed
+encoding for these strings, and although it is fairly common to use UTF-8, any
+raw byte strings are supported.
+
+For this reason, Dulwich internally treats git-based filenames as bytestrings. It is up
+to the Dulwich API user to encode and decode them if necessary.
+
+* git-repository related filenames: bytes
+* object sha1 digests (20 bytes long): bytes
+* object sha1 hexdigests (40 bytes long): str (bytestrings on python2, strings on python3)
+
+Merge requests
+--------------
+Please either send pull requests to the maintainer (jelmer@jelmer.uk) or create new pull
+requests on GitHub.
+
+Licensing
+---------
+All contributions should be made under the same license that Dulwich itself comes under:
+both Apache License, version 2.0 or later and GNU General Public License, version 2.0 or later.

+ 548 - 0
COPYING

@@ -0,0 +1,548 @@
+Dulwich may be used under the conditions of either of two licenses,
+the Apache License (version 2.0 or later) or the GNU General Public License,
+version 2.0 or later.
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+		    GNU GENERAL PUBLIC LICENSE
+		       Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+			    Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+		    GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+			    NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+		     END OF TERMS AND CONDITIONS
+
+	    How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
+
+
+

+ 18 - 0
MANIFEST.in

@@ -0,0 +1,18 @@
+include NEWS
+include AUTHORS
+include README.md
+include README.swift.md
+include Makefile
+include COPYING
+include CONTRIBUTING.md
+include TODO
+include setup.cfg
+include dulwich/stdint.h
+recursive-include docs conf.py *.txt Makefile make.bat
+recursive-include examples *.py
+graft dulwich/tests/data
+include tox.ini
+include dulwich.cfg
+include appveyor.yml
+include .testr.conf
+include .travis.yml

+ 71 - 0
Makefile

@@ -0,0 +1,71 @@
+PYTHON = python
+PYFLAKES = pyflakes
+PEP8 = pep8
+FLAKE8 ?= flake8
+SETUP = $(PYTHON) setup.py
+PYDOCTOR ?= pydoctor
+TESTRUNNER ?= unittest
+RUNTEST = PYTHONHASHSEED=random PYTHONPATH=.:$(PYTHONPATH) $(PYTHON) -m $(TESTRUNNER) $(TEST_OPTIONS)
+COVERAGE = python3-coverage
+
+DESTDIR=/
+
+all: build
+
+doc:: pydoctor
+doc:: sphinx
+
+sphinx::
+	$(MAKE) -C docs html
+
+pydoctor::
+	$(PYDOCTOR) --make-html -c dulwich.cfg
+
+build::
+	$(SETUP) build
+	$(SETUP) build_ext -i
+
+install::
+	$(SETUP) install --root="$(DESTDIR)"
+
+check:: build
+	$(RUNTEST) dulwich.tests.test_suite
+
+check-tutorial:: build
+	$(RUNTEST) dulwich.tests.tutorial_test_suite
+
+check-nocompat:: build
+	$(RUNTEST) dulwich.tests.nocompat_test_suite
+
+check-compat:: build
+	$(RUNTEST) dulwich.tests.compat_test_suite
+
+check-pypy:: clean
+	$(MAKE) check-noextensions PYTHON=pypy
+
+check-noextensions:: clean
+	$(RUNTEST) dulwich.tests.test_suite
+
+check-all: check check-pypy check-noextensions
+
+clean::
+	$(SETUP) clean --all
+	rm -f dulwich/*.so
+
+flakes:
+	$(PYFLAKES) dulwich
+
+pep8:
+	$(PEP8) dulwich
+
+style:
+	$(FLAKE8) --exclude=build,.git,build-pypy,.tox
+
+before-push: check
+	git diff origin/master | $(PEP8) --diff
+
+coverage:
+	$(COVERAGE) run --source=dulwich -m unittest dulwich.tests.test_suite dulwich.contrib.test_suite
+
+coverage-html: coverage
+	$(COVERAGE) html

+ 1584 - 0
NEWS

@@ -0,0 +1,1584 @@
+0.18.1	2017-07-31
+
+ BUG FIXES
+
+  * Fix syntax error in dulwich.contrib.test_swift_smoke.
+    (Jelmer Vernooij)
+
+0.18.0	2017-07-31
+
+ BUG FIXES
+
+  * Fix remaining tests on Windows. (Jelmer Vernooij, #493)
+
+  * Fix build of C extensions with Python 3 on Windows.
+    (Jelmer Vernooij)
+
+  * Pass 'mkdir' argument onto Repo.init_bare in Repo.clone.
+    (Jelmer Vernooij, #504)
+
+  * In ``dulwich.porcelain.add``, if no files are specified,
+    add from current working directory rather than repository root.
+    (Jelmer Vernooij, #521)
+
+  * Properly deal with submodules in 'porcelain.status'.
+    (Jelmer Vernooij, #517)
+
+  * ``dulwich.porcelain.remove`` now actually removes files from
+    disk, not just from the index. (Jelmer Vernooij, #488)
+
+  * Fix handling of "reset" command with markers and without
+    "from". (Antoine Pietri)
+
+  * Fix handling of "merge" command with markers. (Antoine Pietri)
+
+  * Support treeish argument to porcelain.reset(), rather than
+    requiring a ref/commit id. (Jelmer Vernooij)
+
+  * Handle race condition when mtime doesn't change between writes/reads.
+    (Jelmer Vernooij, #541)
+
+  * Fix ``dulwich.porcelain.show`` on commits with Python 3.
+    (Jelmer Vernooij, #532)
+
+ IMPROVEMENTS
+
+  * Add basic support for reading ignore files in ``dulwich.ignore``.
+    ``dulwich.porcelain.add`` and ``dulwich.porcelain.status`` now honor
+    ignores. (Jelmer Vernooij, Segev Finer, #524, #526)
+
+  * New ``dulwich.porcelain.check_ignore`` command.
+    (Jelmer Vernooij)
+
+  * ``dulwich.porcelain.status`` now supports a ``ignored`` argument.
+    (Jelmer Vernooij)
+
+ DOCUMENTATION
+
+  * Clarified docstrings for Client.{send_pack,fetch_pack} implementations.
+    (Jelmer Vernooij, #523)
+
+0.17.3	2017-03-20
+
+ PLATFORM SUPPORT
+
+  * List Python 3.3 as supported. (Jelmer Vernooij, #513)
+
+ BUG FIXES
+
+  * Fix compatibility with pypy 3. (Jelmer Vernooij)
+
+0.17.2	2017-03-19
+
+ BUG FIXES
+
+  * Add workaround for
+    https://bitbucket.org/pypy/pypy/issues/2499/cpyext-pystring_asstring-doesnt-work,
+    fixing Dulwich when used with C extensions on pypy < 5.6. (Victor Stinner)
+
+  * Properly quote config values with a '#' character in them.
+    (Jelmer Vernooij, #511)
+
+0.17.1	2017-03-01
+
+ IMPROVEMENTS
+
+  * Add basic 'dulwich pull' command. (Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Cope with existing submodules during pull.
+    (Jelmer Vernooij, #505)
+
+0.17.0	2017-03-01
+
+ TEST FIXES
+
+  * Skip test that requires sync to synchronize filesystems if os.sync is
+    not available. (Koen Martens)
+
+ IMPROVEMENTS
+
+  * Implement MemoryRepo.{set_description,get_description}.
+    (Jelmer Vernooij)
+
+  * Raise exception in Repo.stage() when absolute paths are
+    passed in. Allow passing in relative paths to
+    porcelain.add().(Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Handle multi-line quoted values in config files.
+    (Jelmer Vernooij, #495)
+
+  * Allow porcelain.clone of repository without HEAD.
+    (Jelmer Vernooij, #501)
+
+  * Support passing tag ids to Walker()'s include argument.
+    (Jelmer Vernooij)
+
+  * Don't strip trailing newlines from extra headers.
+    (Nicolas Dandrimont)
+
+  * Set bufsize=0 for subprocess interaction with SSH client.
+    Fixes hangs on Python 3. (René Stern, #434)
+
+  * Don't drop first slash for SSH paths, except for those
+    starting with "~". (Jelmer Vernooij, René Stern, #463)
+
+  * Properly log off after retrieving just refs.
+    (Jelmer Vernooij)
+
+0.16.3	2016-01-14
+
+ TEST FIXES
+
+  * Remove racy check that relies on clock time changing between writes.
+    (Jelmer Vernooij)
+
+ IMPROVEMENTS
+
+  * Add porcelain.remote_add. (Jelmer Vernooij)
+
+0.16.2	2016-01-14
+
+ IMPROVEMENTS
+
+  * Fixed failing test-cases on windows.
+    (Koen Martens)
+
+ API CHANGES
+
+  * Repo is now a context manager, so that it can be easily
+    closed using a ``with`` statement. (Søren Løvborg)
+
+ TEST FIXES
+
+  * Only run worktree list compat tests against git 2.7.0,
+    when 'git worktree list' was introduced. (Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Ignore filemode when building index when core.filemode
+    is false.
+    (Koen Martens)
+
+  * Initialize core.filemode configuration setting by
+    probing the filesystem for trustable permissions.
+    (Koen Martens)
+
+  * Fix ``porcelain.reset`` to respect the comittish argument.
+    (Koen Martens)
+
+  * Fix dulwich.porcelain.ls_remote() on Python 3.
+    (#471, Jelmer Vernooij)
+
+  * Allow both unicode and byte strings for host paths
+    in dulwich.client. (#435, Jelmer Vernooij)
+
+  * Add remote from porcelain.clone. (#466, Jelmer Vernooij)
+
+  * Fix unquoting of credentials before passing to urllib2.
+    (#475, Volodymyr Holovko)
+
+  * Cope with submodules in `build_index_from_tree`.
+    (#477, Jelmer Vernooij)
+
+  * Handle deleted files in `get_unstaged_changes`.
+    (#483, Doug Hellmann)
+
+  * Don't overwrite files when they haven't changed in
+    `build_file_from_blob`.
+    (#479, Benoît HERVIER)
+
+  * Check for existence of index file before opening pack.
+    Fixes a race when new packs are being added.
+    (#482, wme)
+
+0.16.1	2016-12-25
+
+ BUG FIXES
+
+  * Fix python3 compatibility for dulwich.contrib.release_robot.
+    (Jelmer Vernooij)
+
+0.16.0	2016-12-24
+
+ IMPROVEMENTS
+
+  * Add support for worktrees. See `git-worktree(1)` and
+    `gitrepository-layout(5)`. (Laurent Rineau)
+
+  * Add support for `commondir` file in Git control
+    directories. (Laurent Rineau)
+
+  * Add support for passwords in HTTP URLs.
+    (Jon Bain, Mika Mäenpää)
+
+  * Add `release_robot` script to contrib,
+    allowing easy finding of current version based on Git tags.
+    (Mark Mikofski)
+
+  * Add ``Blob.splitlines`` method.
+    (Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Fix handling of ``Commit.tree`` being set to an actual
+    tree object rather than a tree id. (Jelmer Vernooij)
+
+  * Return remote refs from LocalGitClient.fetch_pack(),
+    consistent with the documentation for that method.
+    (#461, Jelmer Vernooij)
+
+  * Fix handling of unknown URL schemes in get_transport_and_path.
+    (#465, Jelmer Vernooij)
+
+0.15.0	2016-10-09
+
+ BUG FIXES
+
+  * Allow missing trailing LF when reading service name from
+    HTTP servers. (Jelmer Vernooij, Andrew Shadura, #442)
+
+  * Fix dulwich.porcelain.pull() on Python3. (Jelmer Vernooij, #451)
+
+  * Properly pull in tags during dulwich.porcelain.clone.
+    (Jelmer Vernooij, #408)
+
+ CHANGES
+
+  * Changed license from "GNU General Public License, version 2.0 or later"
+    to "Apache License, version 2.0 or later or GNU General Public License,
+    version 2.0 or later". (#153)
+
+ IMPROVEMENTS
+
+  * Add ``dulwich.porcelain.ls_tree`` implementation. (Jelmer Vernooij)
+
+0.14.1	2016-07-05
+
+ BUG FIXES
+
+  * Fix regression removing untouched refs when pushing over SSH.
+    (Jelmer Vernooij #441)
+
+  * Skip Python3 tests for SWIFT contrib module, as it has not yet
+    been ported.
+
+0.14.0	2016-07-03
+
+ BUG FIXES
+
+  * Fix ShaFile.id after modification of a copied ShaFile.
+    (Félix Mattrat, Jelmer Vernooij)
+
+  * Support removing refs from porcelain.push.
+    (Jelmer Vernooij, #437)
+
+  * Stop magic protocol ref `capabilities^{}` from leaking out
+    to clients. (Jelmer Vernooij, #254)
+
+ IMPROVEMENTS
+
+  * Add `dulwich.config.parse_submodules` function.
+
+  * Add `RefsContainer.follow` method. (#438)
+
+0.13.0	2016-04-24
+
+ IMPROVEMENTS
+
+  * Support `ssh://` URLs in get_transport_and_path_from_url().
+    (Jelmer Vernooij, #402)
+
+  * Support missing empty line after headers in Git commits and tags.
+    (Nicolas Dandrimont, #413)
+
+  * Fix `dulwich.porcelain.status` when used in empty trees.
+    (Jelmer Vernooij, #415)
+
+  * Return copies of objects in MemoryObjectStore rather than
+    references, making the behaviour more consistent with that of
+    DiskObjectStore. (Félix Mattrat, Jelmer Vernooij)
+
+  * Fix ``dulwich.web`` on Python3. (#295, Jonas Haag)
+
+ CHANGES
+
+  * Drop support for Python 2.6.
+
+  * Fix python3 client web support. (Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Fix hang on Gzip decompression. (Jonas Haag)
+
+  * Don't rely on working tell() and seek() methods
+    on wsgi.input. (Jonas Haag)
+
+  * Support fastexport/fastimport functionality on python3 with newer
+    versions of fastimport (>= 0.9.5). (Jelmer Vernooij, Félix Mattrat)
+
+0.12.0	2015-12-13
+
+ IMPROVEMENTS
+
+  * Add a `dulwich.archive` module that can create tarballs.
+    Based on code from Jonas Haag in klaus.
+
+  * Add a `dulwich.reflog` module for reading and writing reflogs.
+    (Jelmer Vernooij)
+
+  * Fix handling of ambiguous refs in `parse_ref` to make
+    it match the behaviour described in https://git-scm.com/docs/gitrevisions.
+    (Chris Bunney)
+
+  * Support Python3 in C modules. (Lele Gaifax)
+
+ BUG FIXES
+
+  * Simplify handling of SSH command invocation.
+    Fixes quoting of paths. Thanks, Thomas Liebetraut. (#384)
+
+  * Fix inconsistent handling of trailing slashes for DictRefsContainer. (#383)
+
+  * Add hack to support thin packs duing fetch(), albeit while requiring the
+    entire pack file to be loaded into memory. (jsbain)
+
+ CHANGES
+
+  * This will be the last release to support Python 2.6.
+
+0.11.2	2015-09-18
+
+ IMPROVEMENTS
+
+  * Add support for agent= capability. (Jelmer Vernooij, #298)
+
+  * Add support for quiet capability. (Jelmer Vernooij)
+
+ CHANGES
+
+  * The ParamikoSSHVendor class has been moved to
+  * dulwich.contrib.paramiko_vendor, as it's currently untested.
+    (Jelmer Vernooij, #364)
+
+0.11.1	2015-09-13
+
+ Fix-up release to exclude broken blame.py file.
+
+0.11.0	2015-09-13
+
+ IMPROVEMENTS
+
+  * Extended Python3 support to most of the codebase.
+    (Gary van der Merwe, Jelmer Vernooij)
+  * The `Repo` object has a new `close` method that can be called to close any
+    open resources. (Gary van der Merwe)
+  * Support 'git.bat' in SubprocessGitClient on Windows.
+    (Stefan Zimmermann)
+  * Advertise 'ofs-delta' capability in receive-pack server side
+    capabilities. (Jelmer Vernooij)
+  * Switched `default_local_git_client_cls` to `LocalGitClient`.
+    (Gary van der Merwe)
+  * Add `porcelain.ls_remote` and `GitClient.get_refs`.
+    (Michael Edgar)
+  * Add `Repo.discover` method. (B. M. Corser)
+  * Add `dulwich.objectspec.parse_refspec`. (Jelmer Vernooij)
+  * Add `porcelain.pack_objects` and `porcelain.repack`.
+    (Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Fix handling of 'done' in graph walker and implement the
+    'no-done' capability. (Tommy Yu, #88)
+
+  * Avoid recursion limit issues resolving deltas. (William Grant, #81)
+
+  * Allow arguments in local client binary path overrides.
+    (Jelmer Vernooij)
+
+  * Fix handling of commands with arguments in paramiko SSH
+    client. (Andreas Klöckner, Jelmer Vernooij, #363)
+
+  * Fix parsing of quoted strings in configs. (Jelmer Vernooij, #305)
+
+0.10.1  2015-03-25
+
+ BUG FIXES
+
+  * Return `ApplyDeltaError` when encountering delta errors
+    in both C extensions and native delta application code.
+    (Jelmer Vernooij, #259)
+
+0.10.0	2015-03-22
+
+ BUG FIXES
+
+  * In dulwich.index.build_index_from_tree, by default
+    refuse to create entries that start with .git/.
+
+  * Fix running of testsuite when installed.
+    (Jelmer Vernooij, #223)
+
+  * Use a block cache in _find_content_rename_candidates(),
+    improving performance. (Mike Williams)
+
+  * Add support for ``core.protectNTFS`` setting.
+    (Jelmer Vernooij)
+
+  * Fix TypeError when fetching empty updates.
+    (Hwee Miin Koh)
+
+  * Resolve delta refs when pulling into a MemoryRepo.
+    (Max Shawabkeh, #256)
+
+  * Fix handling of tags of non-commits in missing object finder.
+    (Augie Fackler, #211)
+
+  * Explicitly disable mmap on plan9 where it doesn't work.
+    (Jeff Sickel)
+
+ IMPROVEMENTS
+
+  * New public method `Repo.reset_index`. (Jelmer Vernooij)
+
+  * Prevent duplicate parsing of loose files in objects
+    directory when reading. Thanks to David Keijser for the
+    report. (Jelmer Vernooij, #231)
+
+0.9.9	2015-03-20
+
+ SECURITY BUG FIXES
+
+  * Fix buffer overflow in C implementation of pack apply_delta().
+    (CVE-2015-0838)
+
+    Thanks to Ivan Fratric of the Google Security Team for
+    reporting this issue.
+    (Jelmer Vernooij)
+
+0.9.8	2014-11-30
+
+ BUG FIXES
+
+  * Various fixes to improve test suite running on Windows.
+    (Gary van der Merwe)
+
+  * Limit delta copy length to 64K in v2 pack files. (Robert Brown)
+
+  * Strip newline from final ACKed SHA while fetching packs.
+    (Michael Edgar)
+
+  * Remove assignment to PyList_SIZE() that was causing segfaults on
+    pypy. (Jelmer Vernooij, #196)
+
+ IMPROVEMENTS
+
+  * Add porcelain 'receive-pack' and 'upload-pack'. (Jelmer Vernooij)
+
+  * Handle SIGINT signals in bin/dulwich. (Jelmer Vernooij)
+
+  * Add 'status' support to bin/dulwich. (Jelmer Vernooij)
+
+  * Add 'branch_create', 'branch_list', 'branch_delete' porcelain.
+    (Jelmer Vernooij)
+
+  * Add 'fetch' porcelain. (Jelmer Vernooij)
+
+  * Add 'tag_delete' porcelain. (Jelmer Vernooij)
+
+  * Add support for serializing/deserializing 'gpgsig' attributes in Commit.
+    (Jelmer Vernooij)
+
+ CHANGES
+
+  * dul-web is now available as 'dulwich web-daemon'.
+    (Jelmer Vernooij)
+
+  * dulwich.porcelain.tag has been renamed to tag_create.
+    dulwich.porcelain.list_tags has been renamed to tag_list.
+    (Jelmer Vernooij)
+
+ API CHANGES
+
+  * Restore support for Python 2.6. (Jelmer Vernooij, Gary van der Merwe)
+
+
+0.9.7	2014-06-08
+
+ BUG FIXES
+
+  * Fix tests dependent on hash ordering. (Michael Edgar)
+
+  * Support staging symbolic links in Repo.stage.
+    (Robert Brown)
+
+  * Ensure that all files object are closed when running the test suite.
+    (Gary van der Merwe)
+
+  * When writing OFS_DELTA pack entries, write correct offset.
+    (Augie Fackler)
+
+  * Fix handler of larger copy operations in packs. (Augie Fackler)
+
+  * Various fixes to improve test suite running on Windows.
+    (Gary van der Merwe)
+
+  * Fix logic for extra adds of identical files in rename detector.
+    (Robert Brown)
+
+ IMPROVEMENTS
+
+  * Add porcelain 'status'. (Ryan Faulkner)
+
+  * Add porcelain 'daemon'. (Jelmer Vernooij)
+
+  * Add `dulwich.greenthreads` module which provides support
+    for concurrency of some object store operations.
+    (Fabien Boucher)
+
+  * Various changes to improve compatibility with Python 3.
+    (Gary van der Merwe, Hannu Valtonen, michael-k)
+
+  * Add OpenStack Swift backed repository implementation
+    in dulwich.contrib. See README.swift for details. (Fabien Boucher)
+
+API CHANGES
+
+  * An optional close function can be passed to the Protocol class. This will
+    be called by its close method. (Gary van der Merwe)
+
+  * All classes with close methods are now context managers, so that they can
+    be easily closed using a `with` statement. (Gary van der Merwe)
+
+  * Remove deprecated `num_objects` argument to `write_pack` methods.
+    (Jelmer Vernooij)
+
+ OTHER CHANGES
+
+  * The 'dul-daemon' script has been removed. The same functionality
+    is now available as 'dulwich daemon'. (Jelmer Vernooij)
+
+0.9.6	2014-04-23
+
+ IMPROVEMENTS
+
+  * Add support for recursive add in 'git add'.
+    (Ryan Faulkner, Jelmer Vernooij)
+
+  * Add porcelain 'list_tags'. (Ryan Faulkner)
+
+  * Add porcelain 'push'. (Ryan Faulkner)
+
+  * Add porcelain 'pull'. (Ryan Faulkner)
+
+  * Support 'http.proxy' in HttpGitClient.
+    (Jelmer Vernooij, #1096030)
+
+  * Support 'http.useragent' in HttpGitClient.
+    (Jelmer Vernooij)
+
+  * In server, wait for clients to send empty list of
+    wants when talking to empty repository.
+    (Damien Tournoud)
+
+  * Various changes to improve compatibility with
+    Python 3. (Gary van der Merwe)
+
+ BUG FIXES
+
+  * Support unseekable 'wsgi.input' streams.
+    (Jonas Haag)
+
+  * Raise TypeError when passing unicode() object
+    to Repo.__getitem__.
+    (Jonas Haag)
+
+  * Fix handling of `reset` command in dulwich.fastexport.
+    (Jelmer Vernooij, #1249029)
+
+  * In client, don't wait for server to close connection
+    first. Fixes hang when used against GitHub
+    server implementation. (Siddharth Agarwal)
+
+  * DeltaChainIterator: fix a corner case where an object is inflated as an
+    object already in the repository.
+    (Damien Tournoud, #135)
+
+  * Stop leaking file handles during pack reload. (Damien Tournoud)
+
+  * Avoid reopening packs during pack cache reload. (Jelmer Vernooij)
+
+ API CHANGES
+
+  * Drop support for Python 2.6. (Jelmer Vernooij)
+
+0.9.5	2014-02-23
+
+ IMPROVEMENTS
+
+  * Add porcelain 'tag'. (Ryan Faulkner)
+
+  * New module `dulwich.objectspec` for parsing strings referencing
+    objects and commit ranges. (Jelmer Vernooij)
+
+  * Add shallow branch support. (milki)
+
+  * Allow passing urllib2 `opener` into HttpGitClient.
+    (Dov Feldstern, #909037)
+
+ CHANGES
+
+  * Drop support for Python 2.4 and 2.5. (Jelmer Vernooij)
+
+ API CHANGES
+
+  * Remove long deprecated ``Repo.commit``, ``Repo.get_blob``,
+    ``Repo.tree`` and ``Repo.tag``. (Jelmer Vernooij)
+
+  * Remove long deprecated ``Repo.revision_history`` and ``Repo.ref``.
+    (Jelmer Vernooij)
+
+  * Remove long deprecated ``Tree.entries``. (Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Raise KeyError rather than TypeError when passing in
+    unicode object of length 20 or 40 to Repo.__getitem__.
+    (Jelmer Vernooij)
+
+  * Use 'rm' rather than 'unlink' in tests, since the latter
+    does not exist on OpenBSD and other platforms.
+    (Dmitrij D. Czarkoff)
+
+0.9.4	2013-11-30
+
+ IMPROVEMENTS
+
+  * Add ssh_kwargs attribute to ParamikoSSHVendor. (milki)
+
+  * Add Repo.set_description(). (Víðir Valberg Guðmundsson)
+
+  * Add a basic `dulwich.porcelain` module. (Jelmer Vernooij, Marcin Kuzminski)
+
+  * Various performance improvements for object access.
+   (Jelmer Vernooij)
+
+  * New function `get_transport_and_path_from_url`,
+    similar to `get_transport_and_path` but only
+    supports URLs.
+    (Jelmer Vernooij)
+
+  * Add support for file:// URLs in `get_transport_and_path_from_url`.
+    (Jelmer Vernooij)
+
+  * Add LocalGitClient implementation.
+    (Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Support filesystems with 64bit inode and device numbers.
+    (André Roth)
+
+ CHANGES
+
+  * Ref handling has been moved to dulwich.refs.
+    (Jelmer Vernooij)
+
+ API CHANGES
+
+  * Remove long deprecated RefsContainer.set_ref().
+    (Jelmer Vernooij)
+
+  * Repo.ref() is now deprecated in favour of Repo.refs[].
+    (Jelmer Vernooij)
+
+FEATURES
+
+  * Add support for graftpoints. (milki)
+
+0.9.3	2013-09-27
+
+ BUG FIXES
+
+  * Fix path for stdint.h in MANIFEST.in. (Jelmer Vernooij)
+
+0.9.2	2013-09-26
+
+ BUG FIXES
+
+  * Include stdint.h in MANIFEST.in (Mark Mikofski)
+
+0.9.1	2013-09-22
+
+ BUG FIXES
+
+  * Support lookups of 40-character refs in BaseRepo.__getitem__. (Chow Loong Jin, Jelmer Vernooij)
+
+  * Fix fetching packs with side-band-64k capability disabled. (David Keijser, Jelmer Vernooij)
+
+  * Several fixes in send-pack protocol behaviour - handling of empty pack files and deletes.
+    (milki, #1063087)
+
+  * Fix capability negotiation when fetching packs over HTTP.
+    (#1072461, William Grant)
+
+  * Enforce determine_wants returning an empty list rather than None. (Fabien Boucher, Jelmer Vernooij)
+
+  * In the server, support pushes just removing refs. (Fabien Boucher, Jelmer Vernooij)
+
+ IMPROVEMENTS
+
+  * Support passing a single revision to BaseRepo.get_walker() rather than a list of revisions. 
+    (Alberto Ruiz)
+
+  * Add `Repo.get_description` method. (Jelmer Vernooij)
+
+  * Support thin packs in Pack.iterobjects() and Pack.get_raw().
+    (William Grant)
+
+  * Add `MemoryObjectStore.add_pack` and `MemoryObjectStore.add_thin_pack` methods.
+    (David Bennett)
+
+  * Add paramiko-based SSH vendor. (Aaron O'Mullan)
+
+  * Support running 'dulwich.server' and 'dulwich.web' using 'python -m'.
+    (Jelmer Vernooij)
+
+  * Add ObjectStore.close(). (Jelmer Vernooij)
+
+  * Raise appropriate NotImplementedError when encountering dumb HTTP servers.
+    (Jelmer Vernooij)
+
+ API CHANGES
+
+  * SSHVendor.connect_ssh has been renamed to SSHVendor.run_command.
+    (Jelmer Vernooij)
+
+  * ObjectStore.add_pack() now returns a 3-tuple. The last element will be an
+    abort() method that can be used to cancel the pack operation.
+    (Jelmer Vernooij)
+
+0.9.0	2013-05-31
+
+ BUG FIXES
+
+  * Push efficiency - report missing objects only. (#562676, Artem Tikhomirov)
+
+  * Use indentation consistent with C Git in config files.
+    (#1031356, Curt Moore, Jelmer Vernooij)
+
+  * Recognize and skip binary files in diff function.
+    (Takeshi Kanemoto)
+
+  * Fix handling of relative paths in dulwich.client.get_transport_and_path.
+    (Brian Visel, #1169368)
+
+  * Preserve ordering of entries in configuration.
+    (Benjamin Pollack)
+
+  * Support ~ expansion in SSH client paths. (milki, #1083439)
+
+  * Support relative paths in alternate paths.
+    (milki, Michel Lespinasse, #1175007)
+
+  * Log all error messages from wsgiref server to the logging module. This
+    makes the test suit quiet again. (Gary van der Merwe)
+
+  * Support passing None for empty tree in changes_from_tree.
+    (Kevin Watters)
+
+  * Support fetching empty repository in client. (milki, #1060462)
+
+ IMPROVEMENTS:
+
+  * Add optional honor_filemode flag to build_index_from_tree.
+    (Mark Mikofski)
+
+  * Support core/filemode setting when building trees. (Jelmer Vernooij)
+
+  * Add chapter on tags in tutorial. (Ryan Faulkner)
+
+ FEATURES
+
+  * Add support for mergetags. (milki, #963525)
+
+  * Add support for posix shell hooks. (milki)
+
+0.8.7	2012-11-27
+
+ BUG FIXES
+
+  * Fix use of alternates in ``DiskObjectStore``.{__contains__,__iter__}.
+    (Dmitriy)
+
+  * Fix compatibility with Python 2.4. (David Carr)
+
+0.8.6	2012-11-09
+
+ API CHANGES
+
+  * dulwich.__init__ no longer imports client, protocol, repo and
+    server modules. (Jelmer Vernooij)
+
+ FEATURES
+
+  * ConfigDict now behaves more like a dictionary.
+    (Adam 'Cezar' Jenkins, issue #58)
+
+  * HTTPGitApplication now takes an optional
+    `fallback_app` argument. (Jonas Haag, issue #67)
+
+  * Support for large pack index files. (Jameson Nash)
+
+ TESTING
+
+  * Make index entry tests a little bit less strict, to cope with
+    slightly different behaviour on various platforms.
+    (Jelmer Vernooij)
+
+  * ``setup.py test`` (available when setuptools is installed) now
+    runs all tests, not just the basic unit tests.
+    (Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Commit._deserialize now actually deserializes the current state rather than
+    the previous one. (Yifan Zhang, issue #59)
+
+  * Handle None elements in lists of TreeChange objects. (Alex Holmes)
+
+  * Support cloning repositories without HEAD set.
+    (D-Key, Jelmer Vernooij, issue #69)
+
+  * Support ``MemoryRepo.get_config``. (Jelmer Vernooij)
+
+  * In ``get_transport_and_path``, pass extra keyword arguments on to
+    HttpGitClient. (Jelmer Vernooij)
+
+0.8.5	2012-03-29
+
+ BUG FIXES
+
+  * Avoid use of 'with' in dulwich.index. (Jelmer Vernooij)
+
+  * Be a little bit strict about OS behaviour in index tests.
+    Should fix the tests on Debian GNU/kFreeBSD. (Jelmer Vernooij)
+
+0.8.4	2012-03-28
+
+ BUG FIXES
+
+  * Options on the same line as sections in config files are now supported.
+    (Jelmer Vernooij, #920553)
+
+  * Only negotiate capabilities that are also supported by the server.
+    (Rod Cloutier, Risto Kankkunen)
+
+  * Fix parsing of invalid timezone offsets with two minus signs.
+    (Jason R. Coombs, #697828)
+
+  * Reset environment variables during tests, to avoid
+    test isolation leaks reading ~/.gitconfig. (Risto Kankkunen)
+
+ TESTS
+
+  * $HOME is now explicitly specified for tests that use it to read
+    ``~/.gitconfig``, to prevent test isolation issues.
+    (Jelmer Vernooij, #920330)
+
+ FEATURES
+
+  * Additional arguments to get_transport_and_path are now passed
+    on to the constructor of the transport. (Sam Vilain)
+
+  * The WSGI server now transparently handles when a git client submits data
+    using Content-Encoding: gzip.
+    (David Blewett, Jelmer Vernooij)
+
+  * Add dulwich.index.build_index_from_tree(). (milki)
+
+0.8.3	2012-01-21
+
+ FEATURES
+
+  * The config parser now supports the git-config file format as
+    described in git-config(1) and can write git config files.
+    (Jelmer Vernooij, #531092, #768687)
+
+  * ``Repo.do_commit`` will now use the user identity from
+    .git/config or ~/.gitconfig if none was explicitly specified.
+    (Jelmer Vernooij)
+
+ BUG FIXES
+
+  * Allow ``determine_wants`` methods to include the zero sha in their
+    return value. (Jelmer Vernooij)
+
+0.8.2	2011-12-18
+
+ BUG FIXES
+
+  * Cope with different zlib buffer sizes in sha1 file parser.
+    (Jelmer Vernooij)
+
+  * Fix get_transport_and_path for HTTP/HTTPS URLs.
+    (Bruno Renié)
+
+  * Avoid calling free_objects() on NULL in error cases. (Chris Eberle)
+
+  * Fix use --bare argument to 'dulwich init'. (Chris Eberle)
+
+  * Properly abort connections when the determine_wants function
+    raises an exception. (Jelmer Vernooij, #856769)
+
+  * Tweak xcodebuild hack to deal with more error output.
+    (Jelmer Vernooij, #903840)
+
+ FEATURES
+
+  * Add support for retrieving tarballs from remote servers.
+    (Jelmer Vernooij, #379087)
+
+  * New method ``update_server_info`` which generates data
+    for dumb server access. (Jelmer Vernooij, #731235)
+
+0.8.1	2011-10-31
+
+ FEATURES
+
+  * Repo.do_commit has a new argument 'ref'.
+
+  * Repo.do_commit has a new argument 'merge_heads'. (Jelmer Vernooij)
+
+  * New ``Repo.get_walker`` method. (Jelmer Vernooij)
+
+  * New ``Repo.clone`` method. (Jelmer Vernooij, #725369)
+
+  * ``GitClient.send_pack`` now supports the 'side-band-64k' capability.
+    (Jelmer Vernooij)
+
+  * ``HttpGitClient`` which supports the smart server protocol over
+    HTTP. "dumb" access is not yet supported. (Jelmer Vernooij, #373688)
+
+  * Add basic support for alternates. (Jelmer Vernooij, #810429)
+
+ CHANGES
+
+  * unittest2 or python >= 2.7 is now required for the testsuite.
+    testtools is no longer supported. (Jelmer Vernooij, #830713)
+
+ BUG FIXES
+
+  * Fix compilation with older versions of MSVC.  (Martin gz)
+
+  * Special case 'refs/stash' as a valid ref. (Jelmer Vernooij, #695577)
+
+  * Smart protocol clients can now change refs even if they are
+    not uploading new data. (Jelmer Vernooij, #855993)
+
+  * Don't compile C extensions when running in pypy.
+    (Ronny Pfannschmidt, #881546)
+
+  * Use different name for strnlen replacement function to avoid clashing
+    with system strnlen. (Jelmer Vernooij, #880362)
+
+ API CHANGES
+
+  * ``Repo.revision_history`` is now deprecated in favor of ``Repo.get_walker``.
+    (Jelmer Vernooij)
+
+0.8.0	2011-08-07
+
+ FEATURES
+
+  * New DeltaChainIterator abstract class for quickly iterating all objects in
+    a pack, with implementations for pack indexing and inflation.
+    (Dave Borowitz)
+
+  * New walk module with a Walker class for customizable commit walking.
+    (Dave Borowitz)
+
+  * New tree_changes_for_merge function in diff_tree. (Dave Borowitz)
+
+  * Easy rename detection in RenameDetector even without find_copies_harder.
+    (Dave Borowitz)
+
+ BUG FIXES
+
+  * Avoid storing all objects in memory when writing pack.
+    (Jelmer Vernooij, #813268)
+
+  * Support IPv6 for git:// connections. (Jelmer Vernooij, #801543)
+
+  * Improve performance of Repo.revision_history(). (Timo Schmid, #535118)
+
+  * Fix use of SubprocessWrapper on Windows. (Paulo Madeira, #670035)
+
+  * Fix compilation on newer versions of Mac OS X (Lion and up). (Ryan McKern, #794543)
+
+  * Prevent raising ValueError for correct refs in RefContainer.__delitem__.
+
+  * Correctly return a tuple from MemoryObjectStore.get_raw. (Dave Borowitz)
+
+  * Fix a bug in reading the pack checksum when there are fewer than 20 bytes
+    left in the buffer. (Dave Borowitz)
+
+  * Support ~ in git:// URL paths. (Jelmer Vernooij, #813555)
+
+  * Make ShaFile.__eq__ work when other is not a ShaFile. (Dave Borowitz)
+
+  * ObjectStore.get_graph_walker() now no longer yields the same
+    revision more than once. This has a significant improvement for
+    performance when wide revision graphs are involved.
+    (Jelmer Vernooij, #818168)
+
+  * Teach ReceivePackHandler how to read empty packs. (Dave Borowitz)
+
+  * Don't send a pack with duplicates of the same object. (Dave Borowitz)
+
+  * Teach the server how to serve a clone of an empty repo. (Dave Borowitz)
+
+  * Correctly advertise capabilities during receive-pack. (Dave Borowitz)
+
+  * Fix add/add and add/rename conflicts in tree_changes_for_merge.
+    (Dave Borowitz)
+
+  * Use correct MIME types in web server. (Dave Borowitz)
+
+ API CHANGES
+
+  * write_pack no longer takes the num_objects argument and requires an object
+    to be passed in that is iterable (rather than an iterator) and that
+    provides __len__.  (Jelmer Vernooij)
+
+  * write_pack_data has been renamed to write_pack_objects and no longer takes a
+    num_objects argument. (Jelmer Vernooij)
+
+  * take_msb_bytes, read_zlib_chunks, unpack_objects, and
+    PackStreamReader.read_objects now take an additional argument indicating a
+    crc32 to compute. (Dave Borowitz)
+
+  * PackObjectIterator was removed; its functionality is still exposed by
+    PackData.iterobjects. (Dave Borowitz)
+
+  * Add a sha arg to write_pack_object to incrementally compute a SHA.
+    (Dave Borowitz)
+
+  * Include offset in PackStreamReader results. (Dave Borowitz)
+
+  * Move PackStreamReader from server to pack. (Dave Borowitz)
+
+  * Extract a check_length_and_checksum, compute_file_sha, and
+    pack_object_header pack helper functions. (Dave Borowitz)
+
+  * Extract a compute_file_sha function. (Dave Borowitz)
+
+  * Remove move_in_thin_pack as a separate method; add_thin_pack now completes
+    the thin pack and moves it in in one step. Remove ThinPackData as well.
+    (Dave Borowitz)
+
+  * Custom buffer size in read_zlib_chunks. (Dave Borowitz)
+
+  * New UnpackedObject data class that replaces ad-hoc tuples in the return
+    value of unpack_object and various DeltaChainIterator methods.
+    (Dave Borowitz)
+
+  * Add a lookup_path convenience method to Tree. (Dave Borowitz)
+
+  * Optionally create RenameDetectors without passing in tree SHAs.
+    (Dave Borowitz)
+
+  * Optionally include unchanged entries in RenameDetectors. (Dave Borowitz)
+
+  * Optionally pass a RenameDetector to tree_changes. (Dave Borowitz)
+
+  * Optionally pass a request object through to server handlers. (Dave Borowitz)
+
+ TEST CHANGES
+
+  * If setuptools is installed, "python setup.py test" will now run the testsuite.
+    (Jelmer Vernooij)
+
+  * Add a new build_pack test utility for building packs from a simple spec.
+    (Dave Borowitz)
+
+  * Add a new build_commit_graph test utility for building commits from a
+    simple spec. (Dave Borowitz)
+
+0.7.1	2011-04-12
+
+ BUG FIXES
+
+  * Fix double decref in _diff_tree.c. (Ted Horst, #715528)
+
+  * Fix the build on Windows. (Pascal Quantin)
+
+  * Fix get_transport_and_path compatibility with pre-2.6.5 versions of Python.
+    (Max Bowsher, #707438)
+
+  * BaseObjectStore.determine_wants_all no longer breaks on zero SHAs.
+    (Jelmer Vernooij)
+
+  * write_tree_diff() now supports submodules.
+    (Jelmer Vernooij)
+
+  * Fix compilation for XCode 4 and older versions of distutils.sysconfig.
+    (Daniele Sluijters)
+
+ IMPROVEMENTS
+
+  * Sphinxified documentation. (Lukasz Balcerzak)
+
+  * Add Pack.keep.(Marc Brinkmann)
+
+ API CHANGES
+
+  * The order of the parameters to Tree.add(name, mode, sha) has changed, and
+    is now consistent with the rest of Dulwich. Existing code will still
+    work but print a DeprecationWarning. (Jelmer Vernooij, #663550)
+
+  * Tree.entries() is now deprecated in favour of Tree.items() and
+    Tree.iteritems(). (Jelmer Vernooij)
+
+0.7.0	2011-01-21
+
+ FEATURES
+
+  * New `dulwich.diff_tree` module for simple content-based rename detection.
+    (Dave Borowitz)
+
+  * Add Tree.items(). (Jelmer Vernooij)
+
+  * Add eof() and unread_pkt_line() methods to Protocol. (Dave Borowitz)
+
+  * Add write_tree_diff(). (Jelmer Vernooij)
+
+  * Add `serve_command` function for git server commands as executables.
+    (Jelmer Vernooij)
+
+  * dulwich.client.get_transport_and_path now supports rsync-style repository URLs.
+    (Dave Borowitz, #568493)
+
+ BUG FIXES
+
+  * Correct short-circuiting operation for no-op fetches in the server.
+    (Dave Borowitz)
+
+  * Support parsing git mbox patches without a version tail, as generated by
+    Mercurial.  (Jelmer Vernooij)
+
+  * Fix dul-receive-pack and dul-upload-pack. (Jelmer Vernooij)
+
+  * Zero-padded file modes in Tree objects no longer trigger an exception but
+    the check code warns about them. (Augie Fackler, #581064)
+
+  * Repo.init() now honors the mkdir flag. (#671159)
+
+  * The ref format is now checked when setting a ref rather than when reading it back.
+    (Dave Borowitz, #653527)
+
+  * Make sure pack files are closed correctly. (Tay Ray Chuan)
+
+ DOCUMENTATION
+
+  * Run the tutorial inside the test suite. (Jelmer Vernooij)
+
+  * Reorganized and updated the tutorial. (Jelmer Vernooij, Dave Borowitz, #610550,
+     #610540)
+
+
+0.6.2	2010-10-16
+
+ BUG FIXES
+
+  * HTTP server correctly handles empty CONTENT_LENGTH. (Dave Borowitz)
+
+  * Don't error when creating GitFiles with the default mode. (Dave Borowitz)
+
+  * ThinPackData.from_file now works with resolve_ext_ref callback.
+    (Dave Borowitz)
+
+  * Provide strnlen() on mingw32 which doesn't have it. (Hans Kolek)
+
+  * Set bare=true in the configuratin for bare repositories. (Dirk Neumann)
+
+ FEATURES
+
+  * Use slots for core objects to save up on memory. (Jelmer Vernooij)
+
+  * Web server supports streaming progress/pack output. (Dave Borowitz)
+
+  * New public function dulwich.pack.write_pack_header. (Dave Borowitz)
+
+  * Distinguish between missing files and read errors in HTTP server.
+    (Dave Borowitz)
+
+  * Initial work on support for fastimport using python-fastimport.
+    (Jelmer Vernooij)
+
+  * New dulwich.pack.MemoryPackIndex class. (Jelmer Vernooij)
+
+  * Delegate SHA peeling to the object store.  (Dave Borowitz)
+
+ TESTS
+
+  * Use GitFile when modifying packed-refs in tests. (Dave Borowitz)
+
+  * New tests in test_web with better coverage and fewer ad-hoc mocks.
+    (Dave Borowitz)
+
+  * Standardize quote delimiters in test_protocol. (Dave Borowitz)
+
+  * Fix use when testtools is installed. (Jelmer Vernooij)
+
+  * Add trivial test for write_pack_header. (Jelmer Vernooij)
+
+  * Refactor some of dulwich.tests.compat.server_utils. (Dave Borowitz)
+
+  * Allow overwriting id property of objects in test utils. (Dave Borowitz)
+
+  * Use real in-memory objects rather than stubs for server tests.
+    (Dave Borowitz)
+
+  * Clean up MissingObjectFinder. (Dave Borowitz)
+
+ API CHANGES
+
+  * ObjectStore.iter_tree_contents now walks contents in depth-first, sorted
+    order. (Dave Borowitz)
+
+  * ObjectStore.iter_tree_contents can optionally yield tree objects as well.
+    (Dave Borowitz).
+
+  * Add side-band-64k support to ReceivePackHandler. (Dave Borowitz)
+
+  * Change server capabilities methods to classmethods. (Dave Borowitz)
+
+  * Tweak server handler injection. (Dave Borowitz)
+
+  * PackIndex1 and PackIndex2 now subclass FilePackIndex, which is 
+    itself a subclass of PackIndex. (Jelmer Vernooij)
+
+ DOCUMENTATION
+
+  * Add docstrings for various functions in dulwich.objects. (Jelmer Vernooij)
+
+  * Clean up docstrings in dulwich.protocol. (Dave Borowitz)
+
+  * Explicitly specify allowed protocol commands to
+    ProtocolGraphWalker.read_proto_line.  (Dave Borowitz)
+
+  * Add utility functions to DictRefsContainer. (Dave Borowitz)
+
+
+0.6.1	2010-07-22
+
+ BUG FIXES
+
+  * Fix memory leak in C implementation of sorted_tree_items. (Dave Borowitz)
+
+  * Use correct path separators for named repo files. (Dave Borowitz)
+
+  * python > 2.7 and testtools-based test runners will now also pick up skipped
+    tests correctly. (Jelmer Vernooij)
+
+ FEATURES
+
+  * Move named file initilization to BaseRepo. (Dave Borowitz)
+
+  * Add logging utilities and git/HTTP server logging. (Dave Borowitz)
+
+  * The GitClient interface has been cleaned up and instances are now reusable.
+    (Augie Fackler)
+
+  * Allow overriding paths to executables in GitSSHClient. 
+    (Ross Light, Jelmer Vernooij, #585204)
+
+  * Add PackBasedObjectStore.pack_loose_objects(). (Jelmer Vernooij)
+
+ TESTS
+
+  * Add tests for sorted_tree_items and C implementation. (Dave Borowitz)
+
+  * Add a MemoryRepo that stores everything in memory. (Dave Borowitz)
+
+  * Quiet logging output from web tests. (Dave Borowitz)
+
+  * More flexible version checking for compat tests. (Dave Borowitz)
+
+  * Compat tests for servers with and without side-band-64k. (Dave Borowitz)
+
+ CLEANUP
+
+  * Clean up file headers. (Dave Borowitz)
+
+ TESTS
+
+  * Use GitFile when modifying packed-refs in tests. (Dave Borowitz)
+
+ API CHANGES
+
+  * dulwich.pack.write_pack_index_v{1,2} now take a file-like object
+    rather than a filename. (Jelmer Vernooij)
+
+  * Make dul-daemon/dul-web trivial wrappers around server functionality.
+    (Dave Borowitz)
+
+  * Move reference WSGI handler to web.py. (Dave Borowitz)
+
+  * Factor out _report_status in ReceivePackHandler. (Dave Borowitz)
+
+  * Factor out a function to convert a line to a pkt-line. (Dave Borowitz)
+
+
+0.6.0	2010-05-22
+
+note: This list is most likely incomplete for 0.6.0.
+
+ BUG FIXES
+ 
+  * Fix ReceivePackHandler to disallow removing refs without delete-refs.
+    (Dave Borowitz)
+
+  * Deal with capabilities required by the client, even if they 
+    can not be disabled in the server. (Dave Borowitz)
+
+  * Fix trailing newlines in generated patch files.
+    (Jelmer Vernooij)
+
+  * Implement RefsContainer.__contains__. (Jelmer Vernooij)
+
+  * Cope with \r in ref files on Windows. (
+    http://github.com/jelmer/dulwich/issues/#issue/13, Jelmer Vernooij)
+
+  * Fix GitFile breakage on Windows. (Anatoly Techtonik, #557585)
+
+  * Support packed ref deletion with no peeled refs. (Augie Fackler)
+
+  * Fix send pack when there is nothing to fetch. (Augie Fackler)
+
+  * Fix fetch if no progress function is specified. (Augie Fackler)
+
+  * Allow double-staging of files that are deleted in the index. 
+    (Dave Borowitz)
+
+  * Fix RefsContainer.add_if_new to support dangling symrefs.
+    (Dave Borowitz)
+
+  * Non-existant index files in non-bare repositories are now treated as 
+    empty. (Dave Borowitz)
+
+  * Always update ShaFile.id when the contents of the object get changed. 
+    (Jelmer Vernooij)
+
+  * Various Python2.4-compatibility fixes. (Dave Borowitz)
+
+  * Fix thin pack handling. (Dave Borowitz)
+ 
+ FEATURES
+
+  * Add include-tag capability to server. (Dave Borowitz)
+
+  * New dulwich.fastexport module that can generate fastexport 
+    streams. (Jelmer Vernooij)
+
+  * Implemented BaseRepo.__contains__. (Jelmer Vernooij)
+
+  * Add __setitem__ to DictRefsContainer. (Dave Borowitz)
+
+  * Overall improvements checking Git objects. (Dave Borowitz)
+
+  * Packs are now verified while they are received. (Dave Borowitz)
+
+ TESTS
+
+  * Add framework for testing compatibility with C Git. (Dave Borowitz)
+
+  * Add various tests for the use of non-bare repositories. (Dave Borowitz)
+
+  * Cope with diffstat not being available on all platforms. 
+    (Tay Ray Chuan, Jelmer Vernooij)
+
+  * Add make_object and make_commit convenience functions to test utils.
+    (Dave Borowitz)
+
+ API BREAKAGES
+
+  * The 'committer' and 'message' arguments to Repo.do_commit() have 
+    been swapped. 'committer' is now optional. (Jelmer Vernooij)
+
+  * Repo.get_blob, Repo.commit, Repo.tag and Repo.tree are now deprecated.
+    (Jelmer Vernooij)
+
+  * RefsContainer.set_ref() was renamed to RefsContainer.set_symbolic_ref(),
+    for clarity. (Jelmer Vernooij)
+
+ API CHANGES
+
+  * The primary serialization APIs in dulwich.objects now work 
+    with chunks of strings rather than with full-text strings. 
+    (Jelmer Vernooij)
+
+0.5.02010-03-03
+
+ BUG FIXES
+
+  * Support custom fields in commits (readonly). (Jelmer Vernooij)
+
+  * Improved ref handling. (Dave Borowitz)
+
+  * Rework server protocol to be smarter and interoperate with cgit client.
+    (Dave Borowitz)
+
+  * Add a GitFile class that uses the same locking protocol for writes as 
+    cgit. (Dave Borowitz)
+
+  * Cope with forward slashes correctly in the index on Windows.
+    (Jelmer Vernooij, #526793)
+
+ FEATURES
+
+  * --pure option to setup.py to allow building/installing without the C 
+    extensions. (Hal Wine, Anatoly Techtonik, Jelmer Vernooij, #434326)
+
+  * Implement Repo.get_config(). (Jelmer Vernooij, Augie Fackler)
+
+  * HTTP dumb and smart server. (Dave Borowitz)
+
+  * Add abstract baseclass for Repo that does not require file system 
+    operations. (Dave Borowitz)
+
+0.4.1	2010-01-03
+
+ FEATURES
+
+  * Add ObjectStore.iter_tree_contents(). (Jelmer Vernooij)
+
+  * Add Index.changes_from_tree(). (Jelmer Vernooij)
+
+  * Add ObjectStore.tree_changes(). (Jelmer Vernooij)
+
+  * Add functionality for writing patches in dulwich.patch.
+    (Jelmer Vernooij)
+
+0.4.0	2009-10-07
+
+ DOCUMENTATION
+
+  * Added tutorial.
+
+ API CHANGES
+
+  * dulwich.object_store.tree_lookup_path will now return the mode and 
+    sha of the object found rather than the object itself.
+
+ BUG FIXES
+
+  * Use binascii.hexlify / binascii.unhexlify for better performance.
+
+  * Cope with extra unknown data in index files by ignoring it (for now).
+
+  * Add proper error message when server unexpectedly hangs up. (#415843)
+
+  * Correctly write opcode for equal in create_delta.
+
+0.3.3	2009-07-23
+
+ FEATURES
+
+  * Implement ShaFile.__hash__().
+
+  * Implement Tree.__len__()
+
+ BUG FIXES
+  
+  * Check for 'objects' and 'refs' directories
+    when looking for a Git repository. (#380818)
+
+0.3.2	2009-05-20
+
+ BUG FIXES
+
+  * Support the encoding field in Commits.
+  
+  * Some Windows compatibility fixes.
+
+  * Fixed several issues in commit support.
+
+ FEATURES
+
+  * Basic support for handling submodules.
+
+0.3.1	2009-05-13
+
+ FEATURES
+
+  * Implemented Repo.__getitem__, Repo.__setitem__ and Repo.__delitem__ to 
+    access content.
+
+ API CHANGES
+
+  * Removed Repo.set_ref, Repo.remove_ref, Repo.tags, Repo.get_refs and 
+    Repo.heads in favor of Repo.refs, a dictionary-like object for accessing
+    refs.
+
+ BUG FIXES
+
+  * Removed import of 'sha' module in objects.py, which was causing 
+    deprecation warnings on Python 2.6.
+
+0.3.0	2009-05-10
+
+ FEATURES
+
+  * A new function 'commit_tree' has been added that can commit a tree 
+    based on an index.
+
+ BUG FIXES
+
+  * The memory usage when generating indexes has been significantly reduced.
+ 
+  * A memory leak in the C implementation of parse_tree has been fixed.
+
+  * The send-pack smart server command now works. (Thanks Scott Chacon)
+
+  * The handling of short timestamps (less than 10 digits) has been fixed.
+
+  * The handling of timezones has been fixed.
+
+0.2.1	2009-04-30
+
+ BUG FIXES
+
+  * Fix compatibility with Python2.4.
+
+0.2.0	2009-04-30
+
+ FEATURES
+
+  * Support for activity reporting in smart protocol client.
+
+  * Optional C extensions for better performance in a couple of 
+    places that are performance-critical.
+
+0.1.1	2009-03-13
+
+ BUG FIXES
+
+  * Fixed regression in Repo.find_missing_objects()
+
+  * Don't fetch ^{} objects from remote hosts, as requesting them 
+    causes a hangup.
+
+  * Always write pack to disk completely before calculating checksum.
+
+ FEATURES
+
+  * Allow disabling thin packs when talking to remote hosts.
+
+0.1.0	2009-01-24
+
+  * Initial release.

+ 32 - 0
PKG-INFO

@@ -0,0 +1,32 @@
+Metadata-Version: 1.1
+Name: dulwich
+Version: 0.18.1
+Summary: Python Git Library
+Home-page: https://www.dulwich.io/
+Author: UNKNOWN
+Author-email: jelmer@jelmer.uk
+License: Apachev2 or later or GPLv2
+Description: 
+              Python implementation of the Git file formats and protocols,
+              without the need to have git installed.
+        
+              All functionality is available in pure Python. Optional
+              C extensions can be built for improved performance.
+        
+              The project is named after the part of London that Mr. and Mrs. Git live
+              in in the particular Monty Python sketch.
+              
+Keywords: git
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Operating System :: POSIX
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Topic :: Software Development :: Version Control

+ 88 - 0
README.md

@@ -0,0 +1,88 @@
+[![Build Status](https://travis-ci.org/jelmer/dulwich.png?branch=master)](https://travis-ci.org/jelmer/dulwich)
+[![Windows Build status](https://ci.appveyor.com/api/projects/status/cnothr6pxprfx2lf/branch/master?svg=true)](https://ci.appveyor.com/project/jelmer/dulwich-njb6g/branch/master)
+
+This is the Dulwich project.
+
+It aims to provide an interface to git repos (both local and remote) that
+doesn't call out to git directly but instead uses pure Python.
+
+**Main website**: [www.dulwich.io](https://www.dulwich.io/)
+
+**License**: Apache License, version 2 or GNU General Public License, version 2 or later.
+
+The project is named after the part of London that Mr. and Mrs. Git live in
+in the particular Monty Python sketch.
+
+Installation
+------------
+
+By default, Dulwich' setup.py will attempt to build and install the optional C
+extensions. The reason for this is that they significantly improve the performance
+since some low-level operations that are executed often are much slower in CPython.
+
+If you don't want to install the C bindings, specify the --pure argument to setup.py::
+
+    $ python setup.py --pure install
+
+or if you are installing from pip::
+
+    $ pip install dulwich --global-option="--pure"
+
+Getting started
+---------------
+
+Dulwich comes with both a lower-level API and higher-level plumbing ("porcelain").
+
+For example, to use the lower level API to access the commit message of the
+last commit:
+
+    >>> from dulwich.repo import Repo
+    >>> r = Repo('.')
+    >>> r.head()
+    '57fbe010446356833a6ad1600059d80b1e731e15'
+    >>> c = r[r.head()]
+    >>> c
+    <Commit 015fc1267258458901a94d228e39f0a378370466>
+    >>> c.message
+    'Add note about encoding.\n'
+
+And to print it using porcelain:
+
+    >>> from dulwich import porcelain
+    >>> porcelain.log('.', max_entries=1)
+    --------------------------------------------------
+    commit: 57fbe010446356833a6ad1600059d80b1e731e15
+    Author: Jelmer Vernooij <jelmer@jelmer.uk>
+    Date:   Sat Apr 29 2017 23:57:34 +0000
+
+    Add note about encoding.
+
+Further documentation
+---------------------
+
+The dulwich documentation can be found in doc/ and
+[on the web](https://www.dulwich.io/docs/).
+
+The API reference can be generated using pydoctor, by running "make pydoctor",
+or [on the web](https://www.dulwich.io/apidocs).
+
+Help
+----
+
+There is a *#dulwich* IRC channel on the [Freenode](https://www.freenode.net/), and
+[dulwich-announce](https://groups.google.com/forum/#!forum/dulwich-announce)
+and [dulwich-discuss](https://groups.google.com/forum/#!forum/dulwich-discuss)
+mailing lists.
+
+Contributing
+------------
+
+For a full list of contributors, see the git logs or [AUTHORS](AUTHORS).
+
+If you'd like to contribute to Dulwich, see the [CONTRIBUTING](CONTRIBUTING.md)
+file and [list of open issues](https://github.com/jelmer/dulwich/issues).
+
+Supported versions of Python
+----------------------------
+
+At the moment, Dulwich supports (and is tested on) CPython 2.7, 3.3, 3.4, 3.5, 3.6 and Pypy.

+ 133 - 0
README.swift.md

@@ -0,0 +1,133 @@
+Openstack Swift as backend for Dulwich
+======================================
+Fabien Boucher <fabien.boucher@enovance.com>
+
+The module dulwich/contrib/swift.py implements dulwich.repo.BaseRepo
+in order to being compatible with Openstack Swift.
+We can then use Dulwich as server (Git server) and instead of using
+a regular POSIX file system to store repository objects we use the
+object storage Swift via its own API.
+
+c Git client <---> Dulwich server <---> Openstack Swift API
+
+This implementation is still a work in progress and we can say that
+is a Beta version so you need to be prepared to find bugs.
+
+Configuration file
+------------------
+
+We need to provide some configuration values in order to let Dulwich
+talk and authenticate against Swift. The following config file must
+be used as template:
+
+    [swift]
+    # Authentication URL (Keystone or Swift)
+    auth_url = http://127.0.0.1:5000/v2.0
+    # Authentication version to use
+    auth_ver = 2
+    # The tenant and username separated by a semicolon
+    username = admin;admin
+    # The user password
+    password = pass
+    # The Object storage region to use (auth v2) (Default RegionOne)
+    region_name = RegionOne
+    # The Object storage endpoint URL to use (auth v2) (Default internalURL)
+    endpoint_type = internalURL
+    # Concurrency to use for parallel tasks (Default 10)
+    concurrency = 10
+    # Size of the HTTP pool (Default 10)
+    http_pool_length = 10
+    # Timeout delay for HTTP connections (Default 20)
+    http_timeout = 20
+    # Chunk size to read from pack (Bytes) (Default 12228)
+    chunk_length = 12228
+    # Cache size (MBytes) (Default 20)
+    cache_length = 20
+
+
+Note that for now we use the same tenant to perform the requests
+against Swift. Therefor there is only one Swift account used
+for storing repositories. Each repository will be contained in
+a Swift container.
+
+How to start unittest
+---------------------
+
+There is no need to have a Swift cluster running to run the unitests.
+Just run the following command in the Dulwich source directory:
+
+    $ PYTHONPATH=. python -m dulwich.contrib.test_swift
+
+How to start functional tests
+-----------------------------
+
+We provide some basic tests to perform smoke tests against a real Swift
+cluster. To run those functional tests you need a properly configured
+configuration file. The tests can be run as follow:
+
+    $ DULWICH_SWIFT_CFG=/etc/swift-dul.conf PYTHONPATH=. python -m dulwich.contrib.test_swift_smoke
+
+How to install
+--------------
+
+Install the Dulwich library via the setup.py. The dependencies will be
+automatically retrieved from pypi:
+
+    $ python ./setup.py install
+
+How to run the server
+---------------------
+
+Start the server using the following command:
+
+    $ python -m dulwich.contrib.swift daemon -c /etc/swift-dul.conf -l 127.0.0.1
+
+Note that a lot of request will be performed against the Swift
+cluster so it is better to start the Dulwich server as close
+as possible of the Swift proxy. The best solution is to run
+the server on the Swift proxy node to reduce the latency.
+
+How to use
+----------
+
+Once you have validated that the functional tests is working as expected and
+the server is running we can init a bare repository. Run this
+command with the name of the repository to create:
+
+    $ python -m dulwich.contrib.swift init -c /etc/swift-dul.conf edeploy
+
+The repository name will be the container that will contain all the Git
+objects for the repository. Then standard c Git client can be used to
+perform operations againt this repository.
+
+As an example we can clone the previously empty bare repository:
+
+    $ git clone git://localhost/edeploy
+
+Then push an existing project in it:
+
+    $ git clone https://github.com/enovance/edeploy.git edeployclone
+    $ cd edeployclone
+    $ git remote add alt git://localhost/edeploy
+    $ git push alt master
+    $ git ls-remote alt
+    9dc50a9a9bff1e232a74e365707f22a62492183e        HEAD
+    9dc50a9a9bff1e232a74e365707f22a62492183e        refs/heads/master
+
+The other Git commands can be used the way you do usually against
+a regular repository.
+
+Note the daemon subcommands starts a Git server listening for the
+Git protocol. Therefor there is no authentication or encryption
+at all between the cGIT client and the GIT server (Dulwich).
+
+Note on the .info file for pack object
+--------------------------------------
+
+The Swift interface of Dulwich relies only on the pack format
+to store Git objects. Instead of using only an index (pack-sha.idx)
+along with the pack, we add a second file (pack-sha.info). This file
+is automatically created when a client pushes some references on the
+repository. The purpose of this file is to speed up pack creation
+server side when a client fetches some references. Currently this
+.info format is not optimized and may change in future.

+ 2 - 0
TODO

@@ -0,0 +1,2 @@
+- 'git annotate' equivalent
+- repacking

+ 96 - 0
appveyor.yml

@@ -0,0 +1,96 @@
+environment:
+
+  matrix:
+
+    - PYTHON: "C:\\Python27"
+      PYTHON_VERSION: "2.7.x"
+      PYTHON_ARCH: "32"
+
+    - PYTHON: "C:\\Python27-x64"
+      PYTHON_VERSION: "2.7.x"
+      PYTHON_ARCH: "64"
+
+    - PYTHON: "C:\\Python33"
+      PYTHON_VERSION: "3.3.x"
+      PYTHON_ARCH: "32"
+
+    - PYTHON: "C:\\Python33-x64"
+      PYTHON_VERSION: "3.3.x"
+      PYTHON_ARCH: "64"
+      DISTUTILS_USE_SDK: "1"
+
+    - PYTHON: "C:\\Python34"
+      PYTHON_VERSION: "3.4.x"
+      PYTHON_ARCH: "32"
+
+    - PYTHON: "C:\\Python34-x64"
+      PYTHON_VERSION: "3.4.x"
+      PYTHON_ARCH: "64"
+      DISTUTILS_USE_SDK: "1"
+
+    - PYTHON: "C:\\Python35"
+      PYTHON_VERSION: "3.5.x"
+      PYTHON_ARCH: "32"
+
+    - PYTHON: "C:\\Python35-x64"
+      PYTHON_VERSION: "3.5.x"
+      PYTHON_ARCH: "64"
+
+    - PYTHON: "C:\\Python36"
+      PYTHON_VERSION: "3.6.x"
+      PYTHON_ARCH: "32"
+
+    - PYTHON: "C:\\Python36-x64"
+      PYTHON_VERSION: "3.6.x"
+      PYTHON_ARCH: "64"
+
+install:
+  # If there is a newer build queued for the same PR, cancel this one.
+  # The AppVeyor 'rollout builds' option is supposed to serve the same
+  # purpose but it is problematic because it tends to cancel builds pushed
+  # directly to master instead of just PR builds (or the converse).
+  # credits: JuliaLang developers.
+  - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
+        https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
+        Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
+          throw "There are newer queued builds for this pull request, failing early." }
+  - ECHO "Filesystem root:"
+  - ps: "ls \"C:/\""
+
+  - ECHO "Installed SDKs:"
+  - ps: "ls \"C:/Program Files/Microsoft SDKs/Windows\""
+
+  # Install Python (from the official .msi of http://python.org) and pip when
+  # not already installed.
+  - ps: if (-not(Test-Path($env:PYTHON))) { & appveyor\install.ps1 }
+
+  # Prepend newly installed Python to the PATH of this build (this cannot be
+  # done from inside the powershell script as it would require to restart
+  # the parent CMD process).
+  - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
+
+  # Check that we have the expected version and architecture for Python
+  - "build.cmd %PYTHON%\\python.exe --version"
+  - "build.cmd %PYTHON%\\python.exe -c \"import struct; print(struct.calcsize('P') * 8)\""
+
+  # Install setuptools/wheel so that we can e.g. use bdist_wheel
+  - "pip install setuptools wheel"
+
+  - "build.cmd %PYTHON%\\python.exe setup.py develop"
+
+build_script:
+  # Build the compiled extension
+  - "build.cmd %PYTHON%\\python.exe setup.py build"
+
+test_script:
+  - "build.cmd %PYTHON%\\python.exe setup.py test"
+
+after_test:
+  - "build.cmd %PYTHON%\\python.exe setup.py bdist_wheel"
+  # http://stackoverflow.com/questions/43255455/unicode-character-causing-error-with-bdist-wininst-on-python-3-but-not-python-2
+  # - "python setup.py bdist_wininst"
+  - "build.cmd %PYTHON%\\python.exe setup.py bdist_msi"
+  - ps: "ls dist"
+
+artifacts:
+  - path: dist\*

+ 30 - 0
bin/dul-receive-pack

@@ -0,0 +1,30 @@
+#!/usr/bin/python
+# dul-receive-pack - git-receive-pack in python
+# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+from dulwich.porcelain import receive_pack
+import os
+import sys
+
+if len(sys.argv) < 2:
+    sys.stderr.write("usage: %s <git-dir>\n" % os.path.basename(sys.argv[0]))
+    sys.exit(1)
+
+sys.exit(receive_pack(sys.argv[1]))

+ 30 - 0
bin/dul-upload-pack

@@ -0,0 +1,30 @@
+#!/usr/bin/python
+# dul-upload-pack - git-upload-pack in python
+# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+from dulwich.porcelain import upload_pack
+import os
+import sys
+
+if len(sys.argv) < 2:
+    sys.stderr.write("usage: %s <git-dir>\n" % os.path.basename(sys.argv[0]))
+    sys.exit(1)
+
+sys.exit(upload_pack(sys.argv[1]))

+ 591 - 0
bin/dulwich

@@ -0,0 +1,591 @@
+#!/usr/bin/python -u
+#
+# dulwich - Simple command-line interface to Dulwich
+# Copyright (C) 2008-2011 Jelmer Vernooij <jelmer@samba.org>
+# vim: expandtab
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Simple command-line interface to Dulwich>
+
+This is a very simple command-line wrapper for Dulwich. It is by
+no means intended to be a full-blown Git command-line interface but just
+a way to test Dulwich.
+"""
+
+import os
+import sys
+from getopt import getopt
+import optparse
+import signal
+
+def signal_int(signal, frame):
+    sys.exit(1)
+
+signal.signal(signal.SIGINT, signal_int)
+
+from dulwich import porcelain
+from dulwich.client import get_transport_and_path
+from dulwich.errors import ApplyDeltaError
+from dulwich.index import Index
+from dulwich.pack import Pack, sha_to_hex
+from dulwich.patch import write_tree_diff
+from dulwich.repo import Repo
+
+
+class Command(object):
+    """A Dulwich subcommand."""
+
+    def run(self, args):
+        """Run the command."""
+        raise NotImplementedError(self.run)
+
+
+class cmd_archive(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+        client, path = get_transport_and_path(args.pop(0))
+        location = args.pop(0)
+        committish = args.pop(0)
+        porcelain.archive(location, committish, outstream=sys.stdout,
+            errstream=sys.stderr)
+
+
+class cmd_add(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+
+        porcelain.add(".", paths=args)
+
+
+class cmd_rm(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+
+        porcelain.rm(".", paths=args)
+
+
+class cmd_fetch_pack(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", ["all"])
+        opts = dict(opts)
+        client, path = get_transport_and_path(args.pop(0))
+        r = Repo(".")
+        if "--all" in opts:
+            determine_wants = r.object_store.determine_wants_all
+        else:
+            determine_wants = lambda x: [y for y in args if not y in r.object_store]
+        client.fetch(path, r, determine_wants)
+
+
+class cmd_fetch(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+        opts = dict(opts)
+        client, path = get_transport_and_path(args.pop(0))
+        r = Repo(".")
+        if "--all" in opts:
+            determine_wants = r.object_store.determine_wants_all
+        refs = client.fetch(path, r, progress=sys.stdout.write)
+        print("Remote refs:")
+        for item in refs.items():
+            print("%s -> %s" % item)
+
+
+class cmd_log(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        parser.add_option("--reverse", dest="reverse", action="store_true",
+                          help="Reverse order in which entries are printed")
+        parser.add_option("--name-status", dest="name_status", action="store_true",
+                          help="Print name/status for each changed file")
+        options, args = parser.parse_args(args)
+
+        porcelain.log(".", paths=args, reverse=options.reverse,
+                      name_status=options.name_status,
+                      outstream=sys.stdout)
+
+
+class cmd_diff(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+
+        if args == []:
+            print("Usage: dulwich diff COMMITID")
+            sys.exit(1)
+
+        r = Repo(".")
+        commit_id = args[0]
+        commit = r[commit_id]
+        parent_commit = r[commit.parents[0]]
+        write_tree_diff(sys.stdout, r.object_store, parent_commit.tree, commit.tree)
+
+
+class cmd_dump_pack(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+
+        if args == []:
+            print("Usage: dulwich dump-pack FILENAME")
+            sys.exit(1)
+
+        basename, _ = os.path.splitext(args[0])
+        x = Pack(basename)
+        print("Object names checksum: %s" % x.name())
+        print("Checksum: %s" % sha_to_hex(x.get_stored_checksum()))
+        if not x.check():
+            print("CHECKSUM DOES NOT MATCH")
+        print("Length: %d" % len(x))
+        for name in x:
+            try:
+                print("\t%s" % x[name])
+            except KeyError as k:
+                print("\t%s: Unable to resolve base %s" % (name, k))
+            except ApplyDeltaError as e:
+                print("\t%s: Unable to apply delta: %r" % (name, e))
+
+
+class cmd_dump_index(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+
+        if args == []:
+            print("Usage: dulwich dump-index FILENAME")
+            sys.exit(1)
+
+        filename = args[0]
+        idx = Index(filename)
+
+        for o in idx:
+            print(o, idx[o])
+
+
+class cmd_init(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", ["bare"])
+        opts = dict(opts)
+
+        if args == []:
+            path = os.getcwd()
+        else:
+            path = args[0]
+
+        porcelain.init(path, bare=("--bare" in opts))
+
+
+class cmd_clone(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", ["bare"])
+        opts = dict(opts)
+
+        if args == []:
+            print("usage: dulwich clone host:path [PATH]")
+            sys.exit(1)
+
+        source = args.pop(0)
+        if len(args) > 0:
+            target = args.pop(0)
+        else:
+            target = None
+
+        porcelain.clone(source, target, bare=("--bare" in opts))
+
+
+class cmd_commit(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", ["message"])
+        opts = dict(opts)
+        porcelain.commit(".", message=opts["--message"])
+
+
+class cmd_commit_tree(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", ["message"])
+        if args == []:
+            print("usage: dulwich commit-tree tree")
+            sys.exit(1)
+        opts = dict(opts)
+        porcelain.commit_tree(".", tree=args[0], message=opts["--message"])
+
+
+class cmd_update_server_info(Command):
+
+    def run(self, args):
+        porcelain.update_server_info(".")
+
+
+class cmd_symbolic_ref(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", ["ref-name", "force"])
+        if not args:
+            print("Usage: dulwich symbolic-ref REF_NAME [--force]")
+            sys.exit(1)
+
+        ref_name = args.pop(0)
+        porcelain.symbolic_ref(".", ref_name=ref_name, force='--force' in args)
+
+
+class cmd_show(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+        porcelain.show(".", args)
+
+
+class cmd_diff_tree(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+        if len(args) < 2:
+            print("Usage: dulwich diff-tree OLD-TREE NEW-TREE")
+            sys.exit(1)
+        porcelain.diff_tree(".", args[0], args[1])
+
+
+class cmd_rev_list(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+        if len(args) < 1:
+            print('Usage: dulwich rev-list COMMITID...')
+            sys.exit(1)
+        porcelain.rev_list('.', args)
+
+
+class cmd_tag(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, '', [])
+        if len(args) < 2:
+            print('Usage: dulwich tag NAME')
+            sys.exit(1)
+        porcelain.tag('.', args[0])
+
+
+class cmd_repack(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", [])
+        opts = dict(opts)
+        porcelain.repack('.')
+
+
+class cmd_reset(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, "", ["hard", "soft", "mixed"])
+        opts = dict(opts)
+        mode = ""
+        if "--hard" in opts:
+            mode = "hard"
+        elif "--soft" in opts:
+            mode = "soft"
+        elif "--mixed" in opts:
+            mode = "mixed"
+        porcelain.reset('.', mode=mode, *args)
+
+
+class cmd_daemon(Command):
+
+    def run(self, args):
+        from dulwich import log_utils
+        from dulwich.protocol import TCP_GIT_PORT
+        parser = optparse.OptionParser()
+        parser.add_option("-l", "--listen_address", dest="listen_address",
+                          default="localhost",
+                          help="Binding IP address.")
+        parser.add_option("-p", "--port", dest="port", type=int,
+                          default=TCP_GIT_PORT,
+                          help="Binding TCP port.")
+        options, args = parser.parse_args(args)
+
+        log_utils.default_logging_config()
+        if len(args) >= 1:
+            gitdir = args[0]
+        else:
+            gitdir = '.'
+        from dulwich import porcelain
+        porcelain.daemon(gitdir, address=options.listen_address,
+                         port=options.port)
+
+
+class cmd_web_daemon(Command):
+
+    def run(self, args):
+        from dulwich import log_utils
+        parser = optparse.OptionParser()
+        parser.add_option("-l", "--listen_address", dest="listen_address",
+                          default="",
+                          help="Binding IP address.")
+        parser.add_option("-p", "--port", dest="port", type=int,
+                          default=8000,
+                          help="Binding TCP port.")
+        options, args = parser.parse_args(args)
+
+        log_utils.default_logging_config()
+        if len(args) >= 1:
+            gitdir = args[0]
+        else:
+            gitdir = '.'
+        from dulwich import porcelain
+        porcelain.web_daemon(gitdir, address=options.listen_address,
+                             port=options.port)
+
+
+class cmd_receive_pack(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        options, args = parser.parse_args(args)
+        if len(args) >= 1:
+            gitdir = args[0]
+        else:
+            gitdir = '.'
+        porcelain.receive_pack(gitdir)
+
+
+class cmd_upload_pack(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        options, args = parser.parse_args(args)
+        if len(args) >= 1:
+            gitdir = args[0]
+        else:
+            gitdir = '.'
+        porcelain.upload_pack(gitdir)
+
+
+class cmd_status(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        options, args = parser.parse_args(args)
+        if len(args) >= 1:
+            gitdir = args[0]
+        else:
+            gitdir = '.'
+        status = porcelain.status(gitdir)
+        if any(names for (kind, names) in status.staged.items()):
+            sys.stdout.write("Changes to be committed:\n\n")
+            for kind, names in status.staged.items():
+                for name in names:
+                    sys.stdout.write("\t%s: %s\n" % (
+                        kind, name.decode(sys.getfilesystemencoding())))
+            sys.stdout.write("\n")
+        if status.unstaged:
+            sys.stdout.write("Changes not staged for commit:\n\n")
+            for name in status.unstaged:
+                sys.stdout.write("\t%s\n" %
+                        name.decode(sys.getfilesystemencoding()))
+            sys.stdout.write("\n")
+        if status.untracked:
+            sys.stdout.write("Untracked files:\n\n")
+            for name in status.untracked:
+                sys.stdout.write("\t%s\n" % name)
+            sys.stdout.write("\n")
+
+
+class cmd_ls_remote(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, '', [])
+        if len(args) < 1:
+            print('Usage: dulwich ls-remote URL')
+            sys.exit(1)
+        refs = porcelain.ls_remote(args[0])
+        for ref in sorted(refs):
+            sys.stdout.write("%s\t%s\n" % (ref, refs[ref]))
+
+
+class cmd_ls_tree(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        parser.add_option("-r", "--recursive", action="store_true",
+                          help="Recusively list tree contents.")
+        parser.add_option("--name-only", action="store_true",
+                          help="Only display name.")
+        options, args = parser.parse_args(args)
+        try:
+            treeish = args.pop(0)
+        except IndexError:
+            treeish = None
+        porcelain.ls_tree(
+            '.', treeish, outstream=sys.stdout, recursive=options.recursive,
+            name_only=options.name_only)
+
+
+class cmd_pack_objects(Command):
+
+    def run(self, args):
+        opts, args = getopt(args, '', ['stdout'])
+        opts = dict(opts)
+        if len(args) < 1 and not '--stdout' in args:
+            print('Usage: dulwich pack-objects basename')
+            sys.exit(1)
+        object_ids = [l.strip() for l in sys.stdin.readlines()]
+        basename = args[0]
+        if '--stdout' in opts:
+            packf = getattr(sys.stdout, 'buffer', sys.stdout)
+            idxf = None
+            close = []
+        else:
+            packf = open(basename + '.pack', 'w')
+            idxf = open(basename + '.idx', 'w')
+            close = [packf, idxf]
+        porcelain.pack_objects('.', object_ids, packf, idxf)
+        for f in close:
+            f.close()
+
+
+class cmd_pull(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        options, args = parser.parse_args(args)
+        try:
+            from_location = args[0]
+        except IndexError:
+            from_location = None
+        porcelain.pull('.', from_location)
+
+
+class cmd_remote_add(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        options, args = parser.parse_args(args)
+        porcelain.remote_add('.', args[0], args[1])
+
+
+class cmd_remote(Command):
+
+    subcommands = {
+        "add": cmd_remote_add,
+    }
+
+    def run(self, args):
+        if not args:
+            print("Supported subcommands: %s" % ', '.join(self.subcommands.keys()))
+            return False
+        cmd = args[0]
+        try:
+            cmd_kls = self.subcommands[cmd]
+        except KeyError:
+            print('No such subcommand: %s' % args[0])
+            return False
+        return cmd_kls(args[1:])
+
+
+class cmd_check_ignore(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        options, args = parser.parse_args(args)
+        ret = 1
+        for path in porcelain.check_ignore('.', args):
+            print(path)
+            ret = 0
+        return ret
+
+
+class cmd_help(Command):
+
+    def run(self, args):
+        parser = optparse.OptionParser()
+        parser.add_option("-a", "--all", dest="all",
+                          action="store_true",
+                          help="List all commands.")
+        options, args = parser.parse_args(args)
+
+        if options.all:
+            print('Available commands:')
+            for cmd in sorted(commands):
+                print('  %s' % cmd)
+        else:
+            print("""\
+The dulwich command line tool is currently a very basic frontend for the
+Dulwich python module. For full functionality, please see the API reference.
+
+For a list of supported commands, see 'dulwich help -a'.
+""")
+
+
+commands = {
+    "add": cmd_add,
+    "archive": cmd_archive,
+    "check-ignore": cmd_check_ignore,
+    "clone": cmd_clone,
+    "commit": cmd_commit,
+    "commit-tree": cmd_commit_tree,
+    "daemon": cmd_daemon,
+    "diff": cmd_diff,
+    "diff-tree": cmd_diff_tree,
+    "dump-pack": cmd_dump_pack,
+    "dump-index": cmd_dump_index,
+    "fetch-pack": cmd_fetch_pack,
+    "fetch": cmd_fetch,
+    "help": cmd_help,
+    "init": cmd_init,
+    "log": cmd_log,
+    "ls-remote": cmd_ls_remote,
+    "ls-tree": cmd_ls_tree,
+    "pack-objects": cmd_pack_objects,
+    "pull": cmd_pull,
+    "receive-pack": cmd_receive_pack,
+    "remote": cmd_remote,
+    "repack": cmd_repack,
+    "reset": cmd_reset,
+    "rev-list": cmd_rev_list,
+    "rm": cmd_rm,
+    "show": cmd_show,
+    "status": cmd_status,
+    "symbolic-ref": cmd_symbolic_ref,
+    "tag": cmd_tag,
+    "update-server-info": cmd_update_server_info,
+    "upload-pack": cmd_upload_pack,
+    "web-daemon": cmd_web_daemon,
+    }
+
+if len(sys.argv) < 2:
+    print("Usage: %s <%s> [OPTIONS...]" % (sys.argv[0], "|".join(commands.keys())))
+    sys.exit(1)
+
+cmd = sys.argv[1]
+try:
+    cmd_kls = commands[cmd]
+except KeyError:
+    print("No such subcommand: %s" % cmd)
+    sys.exit(1)
+# TODO(jelmer): Return non-0 on errors
+cmd_kls().run(sys.argv[2:])

+ 96 - 0
docs/Makefile

@@ -0,0 +1,96 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = build
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml pickle json htmlhelp qthelp latex changes linkcheck doctest
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html      to make standalone HTML files"
+	@echo "  pdf       to make PDF document"
+	@echo "  dirhtml   to make HTML files named index.html in directories"
+	@echo "  pickle    to make pickle files"
+	@echo "  json      to make JSON files"
+	@echo "  htmlhelp  to make HTML files and a HTML help project"
+	@echo "  qthelp    to make HTML files and a qthelp project"
+	@echo "  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  changes   to make an overview of all changed/added/deprecated items"
+	@echo "  linkcheck to check all external links for integrity"
+	@echo "  doctest   to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	-rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/dulwich.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/dulwich.qhc"
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
+	      "run these through (pdf)latex."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+pdf:
+	$(SPHINXBUILD) -b pdf $(ALLSPHINXOPTS) $(BUILDDIR)/pdf
+	@echo
+	@echo "Build finished. The PDF files are in $(BUILDDIR)/pdf."
+

+ 218 - 0
docs/conf.py

@@ -0,0 +1,218 @@
+# -*- coding: utf-8 -*-
+#
+# dulwich documentation build configuration file, created by
+# sphinx-quickstart on Thu Feb 18 23:18:28 2010.
+#
+# This file is execfile()d with the current directory set to its containing
+# dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import os
+import sys
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath('..'))
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__))))
+dulwich = __import__('dulwich')
+
+# -- General configuration ----------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = ['sphinx.ext.autodoc']
+try:
+    import rst2pdf
+    if rst2pdf.version >= '0.16':
+        extensions.append('rst2pdf.pdfbuilder')
+except ImportError:
+    print("[NOTE] In order to build PDF you need rst2pdf with version >=0.16")
+
+
+autoclass_content = "both"
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['templates']
+
+# The suffix of source filenames.
+source_suffix = '.txt'
+
+# The encoding of source files.
+#         source_encoding = 'utf-8'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'dulwich'
+copyright = u'2011, Jelmer Vernooij'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '.'.join(map(str, dulwich.__version__[:2]))
+# The full version, including alpha/beta/rc tags.
+release = '.'.join(map(str, dulwich.__version__))
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+# language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+# today = ''
+# Else, today_fmt is used as the format for a strftime call.
+# today_fmt = '%B %d, %Y'
+
+# List of documents that shouldn't be included in the build.
+# unused_docs = []
+
+# List of directories, relative to source directory, that shouldn't be searched
+# for source files.
+exclude_trees = ['build']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  Major themes that come with
+# Sphinx are currently 'default' and 'sphinxdoc'.
+# html_theme = 'default'
+html_theme = 'nature'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = ['theme']
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+# html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+# html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+# html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+# html_use_modindex = True
+
+# If false, no index is generated.
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+# html_show_sourcelink = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+# html_use_opensearch = ''
+
+# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = ''
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'dulwichdoc'
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+# The paper size ('letter' or 'a4').
+# latex_paper_size = 'letter'
+
+# The font size ('10pt', '11pt' or '12pt').
+# latex_font_size = '10pt'
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass
+# [howto/manual]).
+latex_documents = [
+  ('index', 'dulwich.tex', u'dulwich Documentation',
+   u'Jelmer Vernooij', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+# latex_use_parts = False
+
+# Additional stuff for the LaTeX preamble.
+# latex_preamble = ''
+
+# Documents to append as an appendix to all manuals.
+# latex_appendices = []
+
+# If false, no module index is generated.
+# latex_use_modindex = True
+
+pdf_documents = [
+    ('index', u'dulwich', u'Documentation for dulwich',
+        u'Jelmer Vernooij'),
+]
+pdf_stylesheets = ['sphinx', 'kerning', 'a4']
+pdf_break_level = 2
+pdf_inline_footnotes = True

+ 36 - 0
docs/index.txt

@@ -0,0 +1,36 @@
+.. _index:
+
+======================================
+dulwich - Python implementation of Git
+======================================
+
+Overview
+========
+
+.. include:: ../README.md
+
+Documentation
+=============
+
+
+.. toctree::
+    :maxdepth: 2
+
+    performance
+    protocol
+
+    tutorial/index
+
+
+Changelog
+=========
+
+.. include:: ../NEWS
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+

+ 121 - 0
docs/make.bat

@@ -0,0 +1,121 @@
+@ECHO OFF
+
+REM Command file for Sphinx documentation
+
+set SPHINXBUILD=sphinx-build
+set BUILDDIR=build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+	:help
+	echo.Please use `make ^<target^>` where ^<target^> is one of
+	echo.  html      to make standalone HTML files
+    echo.  pdf       to make PDF document
+	echo.  dirhtml   to make HTML files named index.html in directories
+	echo.  pickle    to make pickle files
+	echo.  json      to make JSON files
+	echo.  htmlhelp  to make HTML files and a HTML help project
+	echo.  qthelp    to make HTML files and a qthelp project
+	echo.  latex     to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+	echo.  changes   to make an overview over all changed/added/deprecated items
+	echo.  linkcheck to check all external links for integrity
+	echo.  doctest   to run all doctests embedded in the documentation if enabled
+	goto end
+)
+
+if "%1" == "clean" (
+	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+	del /q /s %BUILDDIR%\*
+	goto end
+)
+
+if "%1" == "html" (
+	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+	goto end
+)
+
+if "%1" == "dirhtml" (
+	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+	echo.
+	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+	goto end
+)
+
+if "%1" == "pickle" (
+	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+	echo.
+	echo.Build finished; now you can process the pickle files.
+	goto end
+)
+
+if "%1" == "json" (
+	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+	echo.
+	echo.Build finished; now you can process the JSON files.
+	goto end
+)
+
+if "%1" == "htmlhelp" (
+	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+	echo.
+	echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+	goto end
+)
+
+if "%1" == "qthelp" (
+	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+	echo.
+	echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\dulwich.qhcp
+	echo.To view the help file:
+	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\dulwich.ghc
+	goto end
+)
+
+if "%1" == "latex" (
+	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+	echo.
+	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+	goto end
+)
+
+if "%1" == "changes" (
+	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+	echo.
+	echo.The overview file is in %BUILDDIR%/changes.
+	goto end
+)
+
+if "%1" == "linkcheck" (
+	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+	echo.
+	echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+	goto end
+)
+
+if "%1" == "doctest" (
+	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+	echo.
+	echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+	goto end
+)
+
+if "%1" == "pdf" (
+	%SPHINXBUILD% -b pdf %ALLSPHINXOPTS% %BUILDDIR%/pdf
+	echo.
+	echo.Build finished. The PDF files are in %BUILDDIR%/pdf.
+    goto end
+)
+
+:end

+ 11 - 0
docs/performance.txt

@@ -0,0 +1,11 @@
+.. _performance:
+
+==============================
+Possible areas for improvement
+==============================
+
+Places for improvement, ordered by difficulty / effectiveness:
+
+* read_zlib() should have a C equivalent (~ 4% overhead atm)
+* unpack_object() should have a C equivalent
+

+ 65 - 0
docs/protocol.txt

@@ -0,0 +1,65 @@
+.. _protocol:
+
+===================
+Git Server Protocol
+===================
+
+Transport
+=========
+
+The Git protocol operates over pipes or TCP/IP. When a client connects over
+TCP/IP, it sends a header that tells the server which program to run and what
+parameters to use. When invoked over SSH, git will run a program with the
+parameters as command line arguments.
+
+Protocols
+=========
+
+Basics
+------
+
+Git communicates with a server by piping data between a local program and a
+remote program.
+
+A common way of sending a unit of information is a pkt_line. This is a 4 byte
+size as human encoded hex (i.e. totally underusing the 4 bytes...) that tells
+you the size of the payload, followed by the payload. The size includes the 4
+bytes used by the size itself.
+
+    0009ABCD\n
+
+Git can also multiplex data using the sideband. As well as 4 bytes size, there
+would be a 1 byte channel number. This is in binary, so ``1`` will be ``\x01``.
+
+Typically Git will piggyback a list of capabilities on the first pkt_line it
+sends. It will also look for capabilities in the first pkt_like it receives.
+Git will degrade as much as possible when encountering a server or client with
+differing capabilities.
+
+git-upload-pack
+---------------
+
+git-upload pack is used by git-ls-remote, git-clone, git-fetch and git-pull.
+And i'm sure others. Typically a client will connect a local git-fetch-pack to
+a remote git-upload-pack.
+
+Capabilities for this protocol include multi_ack, thin-pack, ofs-delta,
+sideband and sideband-64k A thin pack can reference objects not in the current
+pack.
+
+The server tells the client what refs it has. The client states which of those
+SHA1's it would like. It then starts to report which SHA1's it has. The server
+ACKs these allowing the client to work out when to stop sending SHA1's. This
+saves a lot of transfer because the client can make decisions like "well if it
+has this SHA, then it has all its parents so I don't need to care about those".
+When the client stops sending shas, the server can work out an optimal pack and
+then send it to the client.
+
+git-receive-pack
+----------------
+
+git-receive-pack is used by git push. Typically a client connects a local
+git-send-pack to a remote git-receive-pack.
+
+Capabilities include report-status and delete-ref.
+

+ 12 - 0
docs/tutorial/Makefile

@@ -0,0 +1,12 @@
+RST2HTML = rst2html
+TXT=$(shell ls *.txt)
+
+ALL: index.html
+
+index.html: $(TXT)
+	$(RST2HTML) index.txt index.html
+
+clean:
+	rm -f index.html
+
+.PHONY: clean

+ 13 - 0
docs/tutorial/conclusion.txt

@@ -0,0 +1,13 @@
+.. _tutorial-conclusion:
+
+Conclusion
+==========
+
+This tutorial currently only covers a small (but important) part of Dulwich.
+It still needs to be extended to cover packs, refs, reflogs and network
+communication.
+
+Dulwich is abstracting much of the Git plumbing, so there would be more to
+see.
+
+For now, that's all folks!

+ 26 - 0
docs/tutorial/encoding.txt

@@ -0,0 +1,26 @@
+Encoding
+========
+
+You will notice that all lower-level functions in Dulwich take byte strings
+rather than unicode strings. This is intentional.
+
+Although `C git`_ recommends the use of UTF-8 for encoding, this is not
+strictly enforced and C git treats filenames as sequences of non-NUL bytes.
+There are repositories in the wild that use non-UTF-8 encoding for filenames
+and commit messages.
+
+.. _C git: https://github.com/git/git/blob/master/Documentation/i18n.txt
+
+The library should be able to read *all* existing git repositories,
+irregardless of what encoding they use. This is the main reason why Dulwich
+does not convert paths to unicode strings.
+
+A further consideration is that converting back and forth to unicode
+is an extra performance penalty. E.g. if you are just iterating over file
+contents, there is no need to consider encoded strings. Users of the library
+may have specific assumptions they can make about the encoding - e.g. they
+could just decide that all their data is latin-1, or the default Python
+encoding.
+
+Higher level functions, such as the porcelain in dulwich.porcelain, will
+automatically convert unicode strings to UTF-8 bytestrings.

+ 99 - 0
docs/tutorial/file-format.txt

@@ -0,0 +1,99 @@
+Git File format
+===============
+
+For a better understanding of Dulwich, we'll start by explaining most of the
+Git secrets.
+
+Open the ".git" folder of any Git-managed repository. You'll find folders
+like "branches", "hooks"... We're only interested in "objects" here. Open it.
+
+You'll mostly see 2 hex-digits folders. Git identifies content by its SHA-1
+digest. The 2 hex-digits plus the 38 hex-digits of files inside these folders
+form the 40 characters (or 20 bytes) id of Git objects you'll manage in
+Dulwich.
+
+We'll first study the three main objects:
+
+- The Commit;
+
+- The Tree;
+
+- The Blob.
+
+The Commit
+----------
+
+You're used to generate commits using Git. You have set up your name and
+e-mail, and you know how to see the history using ``git log``.
+
+A commit file looks like this::
+
+  commit <content length><NUL>tree <tree sha>
+  parent <parent sha>
+  [parent <parent sha> if several parents from merges]
+  author <author name> <author e-mail> <timestamp> <timezone>
+  committer <author name> <author e-mail> <timestamp> <timezone>
+
+  <commit message>
+
+But where are the changes you committed? The commit contains a reference to a
+tree.
+
+The Tree
+--------
+
+A tree is a collection of file information, the state of a single directory at
+a given point in time.
+
+A tree file looks like this::
+
+  tree <content length><NUL><file mode> <filename><NUL><item sha>...
+
+And repeats for every file in the tree.
+
+Note that the SHA-1 digest is in binary form here.
+
+The file mode is like the octal argument you could give to the ``chmod``
+command.  Except it is in extended form to tell regular files from
+directories and other types.
+
+We now know how our files are referenced but we haven't found their actual
+content yet. That's where the reference to a blob comes in.
+
+The Blob
+--------
+
+A blob is simply the content of files you are versioning.
+
+A blob file looks like this::
+
+  blob <content length><NUL><content>
+
+If you change a single line, another blob will be generated by Git at commit
+time. This is how Git can fastly checkout any version in time.
+
+On the opposite, several identical files with different filenames generate
+only one blob. That's mostly how renames are so cheap and efficient in Git.
+
+Dulwich Objects
+---------------
+
+Dulwich implements these three objects with an API to easily access the
+information you need, while abstracting some more secrets Git is using to
+accelerate operations and reduce space.
+
+More About Git formats
+----------------------
+
+These three objects make up most of the contents of a Git repository and are
+used for the history. They can either appear as simple files on disk (one file
+per object) or in a ``pack`` file, which is a container for a number of these
+objects.
+
+There is also an index of the current state of the working copy in the
+repository as well as files to track the existing branches and tags.
+
+For a more detailed explanation of object formats and SHA-1 digests, see:
+http://www-cs-students.stanford.edu/~blynn/gitmagic/ch08.html
+
+Just note that recent versions of Git compress object files using zlib.

+ 19 - 0
docs/tutorial/index.txt

@@ -0,0 +1,19 @@
+.. _tutorial:
+
+========
+Tutorial
+========
+
+.. toctree::
+   :maxdepth: 2
+
+   introduction
+   encoding 
+   file-format
+   repo
+   object-store
+   remote
+   tag
+   porcelain
+   conclusion
+

+ 20 - 0
docs/tutorial/introduction.txt

@@ -0,0 +1,20 @@
+.. _tutorial-introduction:
+
+Introduction
+============
+
+Like Git itself, Dulwich consists of two main layers; the so-called plumbing
+and the porcelain.
+
+The plumbing is the lower layer and it deals with the Git object database and the
+nitty gritty internals. The porcelain is roughly what you would expect to
+be exposed to as a user of the ``git`` command-like tool.
+
+Dulwich has a fairly complete plumbing implementation, and a more recently
+added porcelain implementation. The porcelain code lives in
+``dulwich.porcelain``.
+
+
+For the large part, this tutorial introduces you to the internal concepts of
+Git and the main plumbing parts of Dulwich. The last chapter covers
+the porcelain.

+ 187 - 0
docs/tutorial/object-store.txt

@@ -0,0 +1,187 @@
+.. _tutorial-object-store:
+
+The object store
+================
+
+The objects are stored in the ``object store`` of the repository.
+
+  >>> from dulwich.repo import Repo
+  >>> repo = Repo.init("myrepo", mkdir=True)
+
+Initial commit
+--------------
+
+When you use Git, you generally add or modify content. As our repository is
+empty for now, we'll start by adding a new file::
+
+  >>> from dulwich.objects import Blob
+  >>> blob = Blob.from_string(b"My file content\n")
+  >>> print(blob.id.decode('ascii'))
+  c55063a4d5d37aa1af2b2dad3a70aa34dae54dc6
+
+Of course you could create a blob from an existing file using ``from_file``
+instead.
+
+As said in the introduction, file content is separated from file name. Let's
+give this content a name::
+
+  >>> from dulwich.objects import Tree
+  >>> tree = Tree()
+  >>> tree.add(b"spam", 0o100644, blob.id)
+
+Note that "0o100644" is the octal form for a regular file with common
+permissions. You can hardcode them or you can use the ``stat`` module.
+
+The tree state of our repository still needs to be placed in time. That's the
+job of the commit::
+
+  >>> from dulwich.objects import Commit, parse_timezone
+  >>> from time import time
+  >>> commit = Commit()
+  >>> commit.tree = tree.id
+  >>> author = b"Your Name <your.email@example.com>"
+  >>> commit.author = commit.committer = author
+  >>> commit.commit_time = commit.author_time = int(time())
+  >>> tz = parse_timezone(b'-0200')[0]
+  >>> commit.commit_timezone = commit.author_timezone = tz
+  >>> commit.encoding = b"UTF-8"
+  >>> commit.message = b"Initial commit"
+
+Note that the initial commit has no parents.
+
+At this point, the repository is still empty because all operations happen in
+memory. Let's "commit" it.
+
+  >>> object_store = repo.object_store
+  >>> object_store.add_object(blob)
+
+Now the ".git/objects" folder contains a first SHA-1 file. Let's continue
+saving the changes::
+
+  >>> object_store.add_object(tree)
+  >>> object_store.add_object(commit)
+
+Now the physical repository contains three objects but still has no branch.
+Let's create the master branch like Git would::
+
+  >>> repo.refs[b'refs/heads/master'] = commit.id
+
+The master branch now has a commit where to start. When we commit to master, we
+are also moving HEAD, which is Git's currently checked out branch:
+
+  >>> head = repo.refs[b'HEAD']
+  >>> head == commit.id
+  True
+  >>> head == repo.refs[b'refs/heads/master']
+  True
+
+How did that work? As it turns out, HEAD is a special kind of ref called a
+symbolic ref, and it points at master. Most functions on the refs container
+work transparently with symbolic refs, but we can also take a peek inside HEAD:
+
+  >>> import sys
+  >>> print(repo.refs.read_ref(b'HEAD').decode(sys.getfilesystemencoding()))
+  ref: refs/heads/master
+
+Normally, you won't need to use read_ref. If you want to change what ref HEAD
+points to, in order to check out another branch, just use set_symbolic_ref.
+
+Now our repository is officially tracking a branch named "master" referring to a
+single commit.
+
+Playing again with Git
+----------------------
+
+At this point you can come back to the shell, go into the "myrepo" folder and
+type ``git status`` to let Git confirm that this is a regular repository on
+branch "master".
+
+Git will tell you that the file "spam" is deleted, which is normal because
+Git is comparing the repository state with the current working copy. And we
+have absolutely no working copy using Dulwich because we don't need it at
+all!
+
+You can checkout the last state using ``git checkout -f``. The force flag
+will prevent Git from complaining that there are uncommitted changes in the
+working copy.
+
+The file ``spam`` appears and with no surprise contains the same bytes as the
+blob::
+
+  $ cat spam
+  My file content
+
+Changing a File and Committing it
+---------------------------------
+
+Now we have a first commit, the next one will show a difference.
+
+As seen in the introduction, it's about making a path in a tree point to a
+new blob. The old blob will remain to compute the diff. The tree is altered
+and the new commit'task is to point to this new version.
+
+Let's first build the blob::
+
+  >>> from dulwich.objects import Blob
+  >>> spam = Blob.from_string(b"My new file content\n")
+  >>> print(spam.id.decode('ascii'))
+  16ee2682887a962f854ebd25a61db16ef4efe49f
+
+An alternative is to alter the previously constructed blob object::
+
+  >>> blob.data = b"My new file content\n"
+  >>> print(blob.id.decode('ascii'))
+  16ee2682887a962f854ebd25a61db16ef4efe49f
+
+In any case, update the blob id known as "spam". You also have the
+opportunity of changing its mode::
+
+  >>> tree[b"spam"] = (0o100644, spam.id)
+
+Now let's record the change::
+
+  >>> from dulwich.objects import Commit
+  >>> from time import time
+  >>> c2 = Commit()
+  >>> c2.tree = tree.id
+  >>> c2.parents = [commit.id]
+  >>> c2.author = c2.committer = b"John Doe <john@example.com>"
+  >>> c2.commit_time = c2.author_time = int(time())
+  >>> c2.commit_timezone = c2.author_timezone = 0
+  >>> c2.encoding = b"UTF-8"
+  >>> c2.message = b'Changing "spam"'
+
+In this new commit we record the changed tree id, and most important, the
+previous commit as the parent. Parents are actually a list because a commit
+may happen to have several parents after merging branches.
+
+Let's put the objects in the object store::
+
+  >>> repo.object_store.add_object(spam)
+  >>> repo.object_store.add_object(tree)
+  >>> repo.object_store.add_object(c2)
+
+You can already ask git to introspect this commit using ``git show`` and the
+value of ``c2.id`` as an argument. You'll see the difference will the
+previous blob recorded as "spam".
+
+The diff between the previous head and the new one can be printed using
+write_tree_diff::
+
+  >>> from dulwich.patch import write_tree_diff
+  >>> import sys
+  >>> write_tree_diff(sys.stdout, repo.object_store, commit.tree, tree.id)
+  diff --git a/spam b/spam
+  index c55063a..16ee268 100644
+  --- a/spam
+  +++ b/spam
+  @@ -1,1 +1,1 @@
+  -My file content
+  +My new file content
+
+You won't see it using git log because the head is still the previous
+commit. It's easy to remedy::
+
+  >>> repo.refs[b'refs/heads/master'] = c2.id
+
+Now all git tools will work as expected.

+ 40 - 0
docs/tutorial/porcelain.txt

@@ -0,0 +1,40 @@
+Porcelain
+=========
+
+The ``porcelain`` is the higher level interface, built on top of the lower
+level implementation covered in previous chapters of this tutorial. The
+``dulwich.porcelain`` module in Dulwich is aimed to closely resemble
+the Git command-line API that you are familiar with.
+
+Basic concepts
+--------------
+The porcelain operations are implemented as top-level functions in the
+``dulwich.porcelain`` module. Most arguments can either be strings or
+more complex Dulwich objects; e.g. a repository argument will either take
+a string with a path to the repository or an instance of a ``Repo`` object.
+
+Initializing a new repository
+-----------------------------
+
+  >>> from dulwich import porcelain
+
+  >>> repo = porcelain.init("myrepo")
+
+Clone a repository
+------------------
+
+  >>> porcelain.clone("git://github.com/jelmer/dulwich", "dulwich-clone")
+
+Commit changes
+--------------
+
+  >>> r = porcelain.init("testrepo")
+  >>> open("testrepo/testfile", "w").write("data")
+  >>> porcelain.add(r, "testfile")
+  >>> porcelain.commit(r, b"A sample commit")
+
+Push changes
+------------
+
+  >>> tr = porcelain.init("targetrepo")
+  >>> r = porcelain.push("testrepo", "targetrepo", "master")

+ 84 - 0
docs/tutorial/remote.txt

@@ -0,0 +1,84 @@
+.. _tutorial-remote:
+
+Most of the tests in this file require a Dulwich server, so let's start one:
+
+    >>> from dulwich.repo import Repo
+    >>> from dulwich.server import DictBackend, TCPGitServer
+    >>> import threading
+    >>> repo = Repo.init(b"remote", mkdir=True)
+    >>> cid = repo.do_commit(b"message", committer=b"Jelmer <jelmer@samba.org>")
+    >>> backend = DictBackend({b'/': repo})
+    >>> dul_server = TCPGitServer(backend, b'localhost', 0)
+    >>> threading.Thread(target=dul_server.serve).start()
+    >>> server_address, server_port=dul_server.socket.getsockname()
+
+Remote repositories
+===================
+
+The interface for remote Git repositories is different from that
+for local repositories.
+
+The Git smart server protocol provides three basic operations:
+
+ * upload-pack - provides a pack with objects requested by the client
+ * receive-pack - imports a pack with objects provided by the client
+ * upload-archive - provides a tarball with the contents of a specific revision
+
+The smart server protocol can be accessed over either plain TCP (git://),
+SSH (git+ssh://) or tunneled over HTTP (http://).
+
+Dulwich provides support for accessing remote repositories in
+``dulwich.client``. To create a new client, you can construct
+one manually::
+
+   >>> from dulwich.client import TCPGitClient
+   >>> client = TCPGitClient(server_address.encode('ascii'), server_port)
+
+Retrieving raw pack files
+-------------------------
+
+The client object can then be used to retrieve a pack. The ``fetch_pack``
+method takes a ``determine_wants`` callback argument, which allows the
+client to determine which objects it wants to end up with::
+
+   >>> def determine_wants(refs):
+   ...    # retrieve all objects
+   ...    return refs.values()
+
+Another required object is a "graph walker", which is used to determine
+which objects that the client already has should not be sent again
+by the server. Here in the tutorial we'll just use a dummy graph walker
+which claims that the client doesn't have any objects::
+
+   >>> class DummyGraphWalker(object):
+   ...     def ack(self, sha): pass
+   ...     def next(self): pass
+   ...     def __next__(self): pass
+
+With the ``determine_wants`` function in place, we can now fetch a pack,
+which we will write to a ``BytesIO`` object::
+
+   >>> from io import BytesIO
+   >>> f = BytesIO()
+   >>> remote_refs = client.fetch_pack(b"/", determine_wants,
+   ...    DummyGraphWalker(), pack_data=f.write)
+
+``f`` will now contain a full pack file::
+
+   >>> print(f.getvalue()[:4].decode('ascii'))
+   PACK
+
+Fetching objects into a local repository
+----------------------------------------
+
+It is also possible to fetch from a remote repository into a local repository,
+in which case Dulwich takes care of providing the right graph walker, and
+importing the received pack file into the local repository::
+
+   >>> from dulwich.repo import Repo
+   >>> local = Repo.init(b"local", mkdir=True)
+   >>> remote_refs = client.fetch(b"/", local)
+
+Let's shut down the server now that all tests have been run::
+
+   >>> dul_server.shutdown()

+ 101 - 0
docs/tutorial/repo.txt

@@ -0,0 +1,101 @@
+.. _tutorial-repo:
+
+The repository
+==============
+
+After this introduction, let's start directly with code::
+
+  >>> from dulwich.repo import Repo
+
+The access to a repository is through the Repo object. You can open an
+existing repository or you can create a new one. There are two types of Git
+repositories:
+
+  Regular Repositories -- They are the ones you create using ``git init`` and
+  you daily use. They contain a ``.git`` folder.
+
+  Bare Repositories -- There is no ".git" folder. The top-level folder
+  contains itself the "branches", "hooks"... folders. These are used for
+  published repositories (mirrors). They do not have a working tree.
+
+Creating a repository
+---------------------
+
+Let's create a folder and turn it into a repository, like ``git init`` would::
+
+  >>> from os import mkdir
+  >>> import sys
+  >>> mkdir("myrepo")
+  >>> repo = Repo.init("myrepo")
+  >>> repo
+  <Repo at 'myrepo'>
+
+You can already look at the structure of the "myrepo/.git" folder, though it
+is mostly empty for now.
+
+Opening an existing repository
+------------------------------
+
+To reopen an existing repository, simply pass its path to the constructor
+of ``Repo``::
+
+    >>> repo = Repo("myrepo")
+    >>> repo
+    <Repo at 'myrepo'>
+
+Opening the index
+-----------------
+
+The index is used as a staging area. Once you do a commit,
+the files tracked in the index will be recorded as the contents of the new
+commit. As mentioned earlier, only non-bare repositories have a working tree,
+so only non-bare repositories will have an index, too. To open the index, simply
+call::
+
+    >>> index = repo.open_index()
+    >>> print(index.path.decode(sys.getfilesystemencoding()))
+    myrepo/.git/index
+
+Since the repository was just created, the index will be empty::
+
+    >>> list(index)
+    []
+
+Staging new files
+-----------------
+
+The repository allows "staging" files. Only files can be staged - directories
+aren't tracked explicitly by git. Let's create a simple text file and stage it::
+
+    >>> f = open('myrepo/foo', 'wb')
+    >>> _ = f.write(b"monty")
+    >>> f.close()
+
+    >>> repo.stage([b"foo"])
+
+It will now show up in the index::
+
+    >>> print(",".join([f.decode(sys.getfilesystemencoding()) for f in repo.open_index()]))
+    foo
+
+
+Creating new commits
+--------------------
+
+Now that we have staged a change, we can commit it. The easiest way to
+do this is by using ``Repo.do_commit``. It is also possible to manipulate
+the lower-level objects involved in this, but we'll leave that for a
+separate chapter of the tutorial.
+
+To create a simple commit on the current branch, it is only necessary
+to specify the message. The committer and author will be retrieved from the
+repository configuration or global configuration if they are not specified::
+
+    >>> commit_id = repo.do_commit(
+    ...     b"The first commit", committer=b"Jelmer Vernooij <jelmer@samba.org>")
+
+``do_commit`` returns the SHA1 of the commit. Since the commit was to the 
+default branch, the repository's head will now be set to that commit::
+
+    >>> repo.head() == commit_id
+    True

+ 57 - 0
docs/tutorial/tag.txt

@@ -0,0 +1,57 @@
+.. _tutorial-tag:
+
+Tagging
+=======
+
+This tutorial will demonstrate how to add a tag to a commit via dulwich.
+
+First let's initialize the repository:
+
+    >>> from dulwich.repo import Repo
+    >>> _repo = Repo("myrepo", mkdir=True)
+
+Next we build the commit object and add it to the object store:
+
+    >>> from dulwich.objects import Blob, Tree, Commit, parse_timezone
+    >>> permissions = 0100644
+    >>> author = "John Smith"
+    >>> blob = Blob.from_string("empty")
+    >>> tree = Tree()
+    >>> tree.add(tag, permissions, blob.id)
+    >>> commit = Commit()
+    >>> commit.tree = tree.id
+    >>> commit.author = commit.committer = author
+    >>> commit.commit_time = commit.author_time = int(time())
+    >>> tz = parse_timezone('-0200')[0]
+    >>> commit.commit_timezone = commit.author_timezone = tz
+    >>> commit.encoding = "UTF-8"
+    >>> commit.message = 'Tagging repo: ' + message
+
+Add objects to the repo store instance:
+
+    >>> object_store = _repo.object_store
+    >>> object_store.add_object(blob)
+    >>> object_store.add_object(tree)
+    >>> object_store.add_object(commit)
+    >>> master_branch = 'master'
+    >>> _repo.refs['refs/heads/' + master_branch] = commit.id
+
+Finally, add the tag top the repo:
+
+    >>> _repo['refs/tags/' + commit] = commit.id
+
+Alternatively, we can use the tag object if we'd like to annotate the tag:
+
+    >>> from dulwich.objects import Blob, Tree, Commit, parse_timezone, Tag
+    >>> tag_message = "Tag Annotation"
+    >>> tag = Tag()
+    >>> tag.tagger = author
+    >>> tag.message = message
+    >>> tag.name = "v0.1"
+    >>> tag.object = (Commit, commit.id)
+    >>> tag.tag_time = commit.author_time
+    >>> tag.tag_timezone = tz
+    >>> object_store.add_object(tag)
+    >>> _repo['refs/tags/' + tag] = tag.id
+
+

+ 5 - 0
dulwich.cfg

@@ -0,0 +1,5 @@
+packages: dulwich
+docformat: restructuredtext
+projectname: Dulwich
+projecturl: https://www.dulwich.io/
+htmloutput: apidocs

+ 32 - 0
dulwich.egg-info/PKG-INFO

@@ -0,0 +1,32 @@
+Metadata-Version: 1.1
+Name: dulwich
+Version: 0.18.1
+Summary: Python Git Library
+Home-page: https://www.dulwich.io/
+Author: UNKNOWN
+Author-email: jelmer@jelmer.uk
+License: Apachev2 or later or GPLv2
+Description: 
+              Python implementation of the Git file formats and protocols,
+              without the need to have git installed.
+        
+              All functionality is available in pure Python. Optional
+              C extensions can be built for improved performance.
+        
+              The project is named after the part of London that Mr. and Mrs. Git live
+              in in the particular Monty Python sketch.
+              
+Keywords: git
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: Implementation :: CPython
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: Operating System :: POSIX
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Topic :: Software Development :: Version Control

+ 200 - 0
dulwich.egg-info/SOURCES.txt

@@ -0,0 +1,200 @@
+.testr.conf
+.travis.yml
+AUTHORS
+CONTRIBUTING.md
+COPYING
+MANIFEST.in
+Makefile
+NEWS
+README.md
+README.swift.md
+TODO
+appveyor.yml
+dulwich.cfg
+setup.cfg
+setup.py
+tox.ini
+bin/dul-receive-pack
+bin/dul-upload-pack
+bin/dulwich
+docs/Makefile
+docs/conf.py
+docs/index.txt
+docs/make.bat
+docs/performance.txt
+docs/protocol.txt
+docs/tutorial/Makefile
+docs/tutorial/conclusion.txt
+docs/tutorial/encoding.txt
+docs/tutorial/file-format.txt
+docs/tutorial/index.txt
+docs/tutorial/introduction.txt
+docs/tutorial/object-store.txt
+docs/tutorial/porcelain.txt
+docs/tutorial/remote.txt
+docs/tutorial/repo.txt
+docs/tutorial/tag.txt
+dulwich/__init__.py
+dulwich/_diff_tree.c
+dulwich/_objects.c
+dulwich/_pack.c
+dulwich/archive.py
+dulwich/client.py
+dulwich/config.py
+dulwich/diff_tree.py
+dulwich/errors.py
+dulwich/fastexport.py
+dulwich/file.py
+dulwich/greenthreads.py
+dulwich/hooks.py
+dulwich/ignore.py
+dulwich/index.py
+dulwich/log_utils.py
+dulwich/lru_cache.py
+dulwich/object_store.py
+dulwich/objects.py
+dulwich/objectspec.py
+dulwich/pack.py
+dulwich/patch.py
+dulwich/porcelain.py
+dulwich/protocol.py
+dulwich/reflog.py
+dulwich/refs.py
+dulwich/repo.py
+dulwich/server.py
+dulwich/stdint.h
+dulwich/walk.py
+dulwich/web.py
+dulwich.egg-info/PKG-INFO
+dulwich.egg-info/SOURCES.txt
+dulwich.egg-info/dependency_links.txt
+dulwich.egg-info/top_level.txt
+dulwich/contrib/__init__.py
+dulwich/contrib/paramiko_vendor.py
+dulwich/contrib/release_robot.py
+dulwich/contrib/swift.py
+dulwich/contrib/test_release_robot.py
+dulwich/contrib/test_swift.py
+dulwich/contrib/test_swift_smoke.py
+dulwich/tests/__init__.py
+dulwich/tests/test_archive.py
+dulwich/tests/test_blackbox.py
+dulwich/tests/test_client.py
+dulwich/tests/test_config.py
+dulwich/tests/test_diff_tree.py
+dulwich/tests/test_fastexport.py
+dulwich/tests/test_file.py
+dulwich/tests/test_grafts.py
+dulwich/tests/test_greenthreads.py
+dulwich/tests/test_hooks.py
+dulwich/tests/test_ignore.py
+dulwich/tests/test_index.py
+dulwich/tests/test_lru_cache.py
+dulwich/tests/test_missing_obj_finder.py
+dulwich/tests/test_object_store.py
+dulwich/tests/test_objects.py
+dulwich/tests/test_objectspec.py
+dulwich/tests/test_pack.py
+dulwich/tests/test_patch.py
+dulwich/tests/test_porcelain.py
+dulwich/tests/test_protocol.py
+dulwich/tests/test_reflog.py
+dulwich/tests/test_refs.py
+dulwich/tests/test_repository.py
+dulwich/tests/test_server.py
+dulwich/tests/test_utils.py
+dulwich/tests/test_walk.py
+dulwich/tests/test_web.py
+dulwich/tests/utils.py
+dulwich/tests/compat/__init__.py
+dulwich/tests/compat/server_utils.py
+dulwich/tests/compat/test_client.py
+dulwich/tests/compat/test_pack.py
+dulwich/tests/compat/test_repository.py
+dulwich/tests/compat/test_server.py
+dulwich/tests/compat/test_utils.py
+dulwich/tests/compat/test_web.py
+dulwich/tests/compat/utils.py
+dulwich/tests/data/blobs/11/11111111111111111111111111111111111111
+dulwich/tests/data/blobs/6f/670c0fb53f9463760b7295fbb814e965fb20c8
+dulwich/tests/data/blobs/95/4a536f7819d40e6f637f849ee187dd10066349
+dulwich/tests/data/blobs/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
+dulwich/tests/data/commits/0d/89f20333fbb1d2f3a94da77f4981373d8f4310
+dulwich/tests/data/commits/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc
+dulwich/tests/data/commits/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e
+dulwich/tests/data/indexes/index
+dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx
+dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack
+dulwich/tests/data/repos/.gitattributes
+dulwich/tests/data/repos/issue88_expect_ack_nak_client.export
+dulwich/tests/data/repos/issue88_expect_ack_nak_other.export
+dulwich/tests/data/repos/issue88_expect_ack_nak_server.export
+dulwich/tests/data/repos/server_new.export
+dulwich/tests/data/repos/server_old.export
+dulwich/tests/data/repos/a.git/HEAD
+dulwich/tests/data/repos/a.git/packed-refs
+dulwich/tests/data/repos/a.git/objects/28/237f4dc30d0d462658d6b937b08a0f0b6ef55a
+dulwich/tests/data/repos/a.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91
+dulwich/tests/data/repos/a.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec
+dulwich/tests/data/repos/a.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9
+dulwich/tests/data/repos/a.git/objects/7d/9a07d797595ef11344549b8d08198e48c15364
+dulwich/tests/data/repos/a.git/objects/a2/96d0bb611188cabb256919f36bc30117cca005
+dulwich/tests/data/repos/a.git/objects/a9/0fa2d900a17e99b433217e988c4eb4a2e9a097
+dulwich/tests/data/repos/a.git/objects/b0/931cadc54336e78a1d980420e3268903b57a50
+dulwich/tests/data/repos/a.git/objects/ff/d47d45845a8f6576491e1edb97e3fe6a850e7f
+dulwich/tests/data/repos/a.git/refs/heads/master
+dulwich/tests/data/repos/a.git/refs/tags/mytag
+dulwich/tests/data/repos/empty.git/HEAD
+dulwich/tests/data/repos/empty.git/config
+dulwich/tests/data/repos/empty.git/objects/info/.gitignore
+dulwich/tests/data/repos/empty.git/objects/pack/.gitignore
+dulwich/tests/data/repos/empty.git/refs/heads/.gitignore
+dulwich/tests/data/repos/empty.git/refs/tags/.gitignore
+dulwich/tests/data/repos/ooo_merge.git/HEAD
+dulwich/tests/data/repos/ooo_merge.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b
+dulwich/tests/data/repos/ooo_merge.git/objects/38/74e9c60a6d149c44c928140f250d81e6381520
+dulwich/tests/data/repos/ooo_merge.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8
+dulwich/tests/data/repos/ooo_merge.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6
+dulwich/tests/data/repos/ooo_merge.git/objects/76/01d7f6231db6a57f7bbb79ee52e4d462fd44d1
+dulwich/tests/data/repos/ooo_merge.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870
+dulwich/tests/data/repos/ooo_merge.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349
+dulwich/tests/data/repos/ooo_merge.git/objects/b2/a2766a2879c209ab1176e7e778b81ae422eeaa
+dulwich/tests/data/repos/ooo_merge.git/objects/f5/07291b64138b875c28e03469025b1ea20bc614
+dulwich/tests/data/repos/ooo_merge.git/objects/f9/e39b120c68182a4ba35349f832d0e4e61f485c
+dulwich/tests/data/repos/ooo_merge.git/objects/fb/5b0425c7ce46959bec94d54b9a157645e114f5
+dulwich/tests/data/repos/ooo_merge.git/refs/heads/master
+dulwich/tests/data/repos/refs.git/HEAD
+dulwich/tests/data/repos/refs.git/packed-refs
+dulwich/tests/data/repos/refs.git/objects/3b/9e5457140e738c2dcd39bf6d7acf88379b90d1
+dulwich/tests/data/repos/refs.git/objects/3e/c9c43c84ff242e3ef4a9fc5bc111fd780a76a8
+dulwich/tests/data/repos/refs.git/objects/42/d06bd4b77fed026b154d16493e5deab78f02ec
+dulwich/tests/data/repos/refs.git/objects/a1/8114c31713746a33a2e70d9914d1ef3e781425
+dulwich/tests/data/repos/refs.git/objects/cd/a609072918d7b70057b6bef9f4c2537843fcfe
+dulwich/tests/data/repos/refs.git/objects/df/6800012397fb85c56e7418dd4eb9405dee075c
+dulwich/tests/data/repos/refs.git/refs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa
+dulwich/tests/data/repos/refs.git/refs/heads/loop
+dulwich/tests/data/repos/refs.git/refs/heads/master
+dulwich/tests/data/repos/refs.git/refs/tags/refs-0.2
+dulwich/tests/data/repos/simple_merge.git/HEAD
+dulwich/tests/data/repos/simple_merge.git/objects/0d/89f20333fbb1d2f3a94da77f4981373d8f4310
+dulwich/tests/data/repos/simple_merge.git/objects/1b/6318f651a534b38f9c7aedeebbd56c1e896853
+dulwich/tests/data/repos/simple_merge.git/objects/29/69be3e8ee1c0222396a5611407e4769f14e54b
+dulwich/tests/data/repos/simple_merge.git/objects/4c/ffe90e0a41ad3f5190079d7c8f036bde29cbe6
+dulwich/tests/data/repos/simple_merge.git/objects/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc
+dulwich/tests/data/repos/simple_merge.git/objects/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e
+dulwich/tests/data/repos/simple_merge.git/objects/6f/670c0fb53f9463760b7295fbb814e965fb20c8
+dulwich/tests/data/repos/simple_merge.git/objects/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6
+dulwich/tests/data/repos/simple_merge.git/objects/90/182552c4a85a45ec2a835cadc3451bebdfe870
+dulwich/tests/data/repos/simple_merge.git/objects/95/4a536f7819d40e6f637f849ee187dd10066349
+dulwich/tests/data/repos/simple_merge.git/objects/ab/64bbdcc51b170d21588e5c5d391ee5c0c96dfd
+dulwich/tests/data/repos/simple_merge.git/objects/d4/bdad6549dfedf25d3b89d21f506aff575b28a7
+dulwich/tests/data/repos/simple_merge.git/objects/d8/0c186a03f423a81b39df39dc87fd269736ca86
+dulwich/tests/data/repos/simple_merge.git/objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
+dulwich/tests/data/repos/simple_merge.git/refs/heads/master
+dulwich/tests/data/repos/submodule/dotgit
+dulwich/tests/data/tags/71/033db03a03c6a36721efcf1968dd8f8e0cf023
+dulwich/tests/data/trees/70/c190eb48fa8bbb50ddc692a17b44cb781af7f6
+examples/clone.py
+examples/config.py
+examples/diff.py
+examples/latest_change.py

+ 1 - 0
dulwich.egg-info/dependency_links.txt

@@ -0,0 +1 @@
+

+ 1 - 0
dulwich.egg-info/top_level.txt

@@ -0,0 +1 @@
+dulwich

+ 25 - 0
dulwich/__init__.py

@@ -0,0 +1,25 @@
+# __init__.py -- The git module of dulwich
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+
+"""Python implementation of the Git file formats and protocols."""
+
+__version__ = (0, 18, 1)

+ 504 - 0
dulwich/_diff_tree.c

@@ -0,0 +1,504 @@
+/*
+ * Copyright (C) 2010 Google, Inc.
+ *
+ * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+ * General Public License as public by the Free Software Foundation; version 2.0
+ * or (at your option) any later version. You can redistribute it and/or
+ * modify it under the terms of either of these two licenses.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * You should have received a copy of the licenses; if not, see
+ * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+ * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+ * License, Version 2.0.
+ */
+
+#include <Python.h>
+#include <sys/stat.h>
+
+#ifdef _MSC_VER
+typedef unsigned short mode_t;
+#endif
+
+#if PY_MAJOR_VERSION < 3
+typedef long Py_hash_t;
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+#define PyInt_FromLong PyLong_FromLong
+#define PyInt_AsLong PyLong_AsLong
+#define PyInt_AS_LONG PyLong_AS_LONG
+#define PyString_AS_STRING PyBytes_AS_STRING
+#define PyString_AsStringAndSize PyBytes_AsStringAndSize
+#define PyString_Check PyBytes_Check
+#define PyString_CheckExact PyBytes_CheckExact
+#define PyString_FromStringAndSize PyBytes_FromStringAndSize
+#define PyString_FromString PyBytes_FromString
+#define PyString_GET_SIZE PyBytes_GET_SIZE
+#define PyString_Size PyBytes_Size
+#define _PyString_Join _PyBytes_Join
+#endif
+
+static PyObject *tree_entry_cls = NULL, *null_entry = NULL,
+	*defaultdict_cls = NULL, *int_cls = NULL;
+static int block_size;
+
+/**
+ * Free an array of PyObject pointers, decrementing any references.
+ */
+static void free_objects(PyObject **objs, Py_ssize_t n)
+{
+	Py_ssize_t i;
+	for (i = 0; i < n; i++)
+		Py_XDECREF(objs[i]);
+	PyMem_Free(objs);
+}
+
+/**
+ * Get the entries of a tree, prepending the given path.
+ *
+ * :param path: The path to prepend, without trailing slashes.
+ * :param path_len: The length of path.
+ * :param tree: The Tree object to iterate.
+ * :param n: Set to the length of result.
+ * :return: A (C) array of PyObject pointers to TreeEntry objects for each path
+ *     in tree.
+ */
+static PyObject **tree_entries(char *path, Py_ssize_t path_len, PyObject *tree,
+		Py_ssize_t *n)
+{
+	PyObject *iteritems, *items, **result = NULL;
+	PyObject *old_entry, *name, *sha;
+	Py_ssize_t i = 0, name_len, new_path_len;
+	char *new_path;
+
+	if (tree == Py_None) {
+		*n = 0;
+		result = PyMem_New(PyObject*, 0);
+		if (!result) {
+			PyErr_NoMemory();
+			return NULL;
+		}
+		return result;
+	}
+
+	iteritems = PyObject_GetAttrString(tree, "iteritems");
+	if (!iteritems)
+		return NULL;
+	items = PyObject_CallFunctionObjArgs(iteritems, Py_True, NULL);
+	Py_DECREF(iteritems);
+	if (items == NULL) {
+		return NULL;
+	}
+	/* The C implementation of iteritems returns a list, so depend on that. */
+	if (!PyList_Check(items)) {
+		PyErr_SetString(PyExc_TypeError,
+			"Tree.iteritems() did not return a list");
+		return NULL;
+	}
+
+	*n = PyList_Size(items);
+	result = PyMem_New(PyObject*, *n);
+	if (!result) {
+		PyErr_NoMemory();
+		goto error;
+	}
+	for (i = 0; i < *n; i++) {
+		old_entry = PyList_GetItem(items, i);
+		if (!old_entry)
+			goto error;
+		sha = PyTuple_GetItem(old_entry, 2);
+		if (!sha)
+			goto error;
+		name = PyTuple_GET_ITEM(old_entry, 0);
+		name_len = PyString_Size(name);
+		if (PyErr_Occurred())
+			goto error;
+
+		new_path_len = name_len;
+		if (path_len)
+			new_path_len += path_len + 1;
+		new_path = PyMem_Malloc(new_path_len);
+		if (!new_path) {
+			PyErr_NoMemory();
+			goto error;
+		}
+		if (path_len) {
+			memcpy(new_path, path, path_len);
+			new_path[path_len] = '/';
+			memcpy(new_path + path_len + 1, PyString_AS_STRING(name), name_len);
+		} else {
+			memcpy(new_path, PyString_AS_STRING(name), name_len);
+		}
+
+#if PY_MAJOR_VERSION >= 3
+		result[i] = PyObject_CallFunction(tree_entry_cls, "y#OO", new_path,
+			new_path_len, PyTuple_GET_ITEM(old_entry, 1), sha);
+#else
+		result[i] = PyObject_CallFunction(tree_entry_cls, "s#OO", new_path,
+			new_path_len, PyTuple_GET_ITEM(old_entry, 1), sha);
+#endif
+		PyMem_Free(new_path);
+		if (!result[i]) {
+			goto error;
+		}
+	}
+	Py_DECREF(items);
+	return result;
+
+error:
+	if (result)
+		free_objects(result, i);
+	Py_DECREF(items);
+	return NULL;
+}
+
+/**
+ * Use strcmp to compare the paths of two TreeEntry objects.
+ */
+static int entry_path_cmp(PyObject *entry1, PyObject *entry2)
+{
+	PyObject *path1 = NULL, *path2 = NULL;
+	int result = 0;
+
+	path1 = PyObject_GetAttrString(entry1, "path");
+	if (!path1)
+		goto done;
+
+	if (!PyString_Check(path1)) {
+		PyErr_SetString(PyExc_TypeError, "path is not a (byte)string");
+		goto done;
+	}
+
+	path2 = PyObject_GetAttrString(entry2, "path");
+	if (!path2)
+		goto done;
+
+	if (!PyString_Check(path2)) {
+		PyErr_SetString(PyExc_TypeError, "path is not a (byte)string");
+		goto done;
+	}
+
+	result = strcmp(PyString_AS_STRING(path1), PyString_AS_STRING(path2));
+
+done:
+	Py_XDECREF(path1);
+	Py_XDECREF(path2);
+	return result;
+}
+
+static PyObject *py_merge_entries(PyObject *self, PyObject *args)
+{
+	PyObject *tree1, *tree2, **entries1 = NULL, **entries2 = NULL;
+	PyObject *e1, *e2, *pair, *result = NULL;
+	Py_ssize_t n1 = 0, n2 = 0, i1 = 0, i2 = 0;
+	int path_len;
+	char *path_str;
+	int cmp;
+
+#if PY_MAJOR_VERSION >= 3
+	if (!PyArg_ParseTuple(args, "y#OO", &path_str, &path_len, &tree1, &tree2))
+#else
+	if (!PyArg_ParseTuple(args, "s#OO", &path_str, &path_len, &tree1, &tree2))
+#endif
+		return NULL;
+
+	entries1 = tree_entries(path_str, path_len, tree1, &n1);
+	if (!entries1)
+		goto error;
+
+	entries2 = tree_entries(path_str, path_len, tree2, &n2);
+	if (!entries2)
+		goto error;
+
+	result = PyList_New(0);
+	if (!result)
+		goto error;
+
+	while (i1 < n1 && i2 < n2) {
+		cmp = entry_path_cmp(entries1[i1], entries2[i2]);
+		if (PyErr_Occurred())
+			goto error;
+		if (!cmp) {
+			e1 = entries1[i1++];
+			e2 = entries2[i2++];
+		} else if (cmp < 0) {
+			e1 = entries1[i1++];
+			e2 = null_entry;
+		} else {
+			e1 = null_entry;
+			e2 = entries2[i2++];
+		}
+		pair = PyTuple_Pack(2, e1, e2);
+		if (!pair)
+			goto error;
+		PyList_Append(result, pair);
+		Py_DECREF(pair);
+	}
+
+	while (i1 < n1) {
+		pair = PyTuple_Pack(2, entries1[i1++], null_entry);
+		if (!pair)
+			goto error;
+		PyList_Append(result, pair);
+		Py_DECREF(pair);
+	}
+	while (i2 < n2) {
+		pair = PyTuple_Pack(2, null_entry, entries2[i2++]);
+		if (!pair)
+			goto error;
+		PyList_Append(result, pair);
+		Py_DECREF(pair);
+	}
+	goto done;
+
+error:
+	Py_XDECREF(result);
+	result = NULL;
+
+done:
+	if (entries1)
+		free_objects(entries1, n1);
+	if (entries2)
+		free_objects(entries2, n2);
+	return result;
+}
+
+static PyObject *py_is_tree(PyObject *self, PyObject *args)
+{
+	PyObject *entry, *mode, *result;
+	long lmode;
+
+	if (!PyArg_ParseTuple(args, "O", &entry))
+		return NULL;
+
+	mode = PyObject_GetAttrString(entry, "mode");
+	if (!mode)
+		return NULL;
+
+	if (mode == Py_None) {
+		result = Py_False;
+		Py_INCREF(result);
+	} else {
+		lmode = PyInt_AsLong(mode);
+		if (lmode == -1 && PyErr_Occurred()) {
+			Py_DECREF(mode);
+			return NULL;
+		}
+		result = PyBool_FromLong(S_ISDIR((mode_t)lmode));
+	}
+	Py_DECREF(mode);
+	return result;
+}
+
+static Py_hash_t add_hash(PyObject *get, PyObject *set, char *str, int n)
+{
+	PyObject *str_obj = NULL, *hash_obj = NULL, *value = NULL,
+		*set_value = NULL;
+	Py_hash_t hash;
+
+	/* It would be nice to hash without copying str into a PyString, but that
+	 * isn't exposed by the API. */
+	str_obj = PyString_FromStringAndSize(str, n);
+	if (!str_obj)
+		goto error;
+	hash = PyObject_Hash(str_obj);
+	if (hash == -1)
+		goto error;
+	hash_obj = PyInt_FromLong(hash);
+	if (!hash_obj)
+		goto error;
+
+	value = PyObject_CallFunctionObjArgs(get, hash_obj, NULL);
+	if (!value)
+		goto error;
+	set_value = PyObject_CallFunction(set, "(Ol)", hash_obj,
+		PyInt_AS_LONG(value) + n);
+	if (!set_value)
+		goto error;
+
+	Py_DECREF(str_obj);
+	Py_DECREF(hash_obj);
+	Py_DECREF(value);
+	Py_DECREF(set_value);
+	return 0;
+
+error:
+	Py_XDECREF(str_obj);
+	Py_XDECREF(hash_obj);
+	Py_XDECREF(value);
+	Py_XDECREF(set_value);
+	return -1;
+}
+
+static PyObject *py_count_blocks(PyObject *self, PyObject *args)
+{
+	PyObject *obj, *chunks = NULL, *chunk, *counts = NULL, *get = NULL,
+		*set = NULL;
+	char *chunk_str, *block = NULL;
+	Py_ssize_t num_chunks, chunk_len;
+	int i, j, n = 0;
+	char c;
+
+	if (!PyArg_ParseTuple(args, "O", &obj))
+		goto error;
+
+	counts = PyObject_CallFunctionObjArgs(defaultdict_cls, int_cls, NULL);
+	if (!counts)
+		goto error;
+	get = PyObject_GetAttrString(counts, "__getitem__");
+	set = PyObject_GetAttrString(counts, "__setitem__");
+
+	chunks = PyObject_CallMethod(obj, "as_raw_chunks", NULL);
+	if (!chunks)
+		goto error;
+	if (!PyList_Check(chunks)) {
+		PyErr_SetString(PyExc_TypeError,
+			"as_raw_chunks() did not return a list");
+		goto error;
+	}
+	num_chunks = PyList_GET_SIZE(chunks);
+	block = PyMem_New(char, block_size);
+	if (!block) {
+		PyErr_NoMemory();
+		goto error;
+	}
+
+	for (i = 0; i < num_chunks; i++) {
+		chunk = PyList_GET_ITEM(chunks, i);
+		if (!PyString_Check(chunk)) {
+			PyErr_SetString(PyExc_TypeError, "chunk is not a string");
+			goto error;
+		}
+		if (PyString_AsStringAndSize(chunk, &chunk_str, &chunk_len) == -1)
+			goto error;
+
+		for (j = 0; j < chunk_len; j++) {
+			c = chunk_str[j];
+			block[n++] = c;
+			if (c == '\n' || n == block_size) {
+				if (add_hash(get, set, block, n) == -1)
+					goto error;
+				n = 0;
+			}
+		}
+	}
+	if (n && add_hash(get, set, block, n) == -1)
+		goto error;
+
+	Py_DECREF(chunks);
+	Py_DECREF(get);
+	Py_DECREF(set);
+	PyMem_Free(block);
+	return counts;
+
+error:
+	Py_XDECREF(chunks);
+	Py_XDECREF(get);
+	Py_XDECREF(set);
+	Py_XDECREF(counts);
+	PyMem_Free(block);
+	return NULL;
+}
+
+static PyMethodDef py_diff_tree_methods[] = {
+	{ "_is_tree", (PyCFunction)py_is_tree, METH_VARARGS, NULL },
+	{ "_merge_entries", (PyCFunction)py_merge_entries, METH_VARARGS, NULL },
+	{ "_count_blocks", (PyCFunction)py_count_blocks, METH_VARARGS, NULL },
+	{ NULL, NULL, 0, NULL }
+};
+
+static PyObject *
+moduleinit(void)
+{
+	PyObject *m, *objects_mod = NULL, *diff_tree_mod = NULL;
+	PyObject *block_size_obj = NULL;
+
+#if PY_MAJOR_VERSION >= 3
+	static struct PyModuleDef moduledef = {
+		PyModuleDef_HEAD_INIT,
+		"_diff_tree",         /* m_name */
+		NULL,                 /* m_doc */
+		-1,                   /* m_size */
+		py_diff_tree_methods, /* m_methods */
+		NULL,                 /* m_reload */
+		NULL,                 /* m_traverse */
+		NULL,                 /* m_clear*/
+		NULL,                 /* m_free */
+	};
+	m = PyModule_Create(&moduledef);
+#else
+	m = Py_InitModule("_diff_tree", py_diff_tree_methods);
+#endif
+	if (!m)
+		goto error;
+
+	objects_mod = PyImport_ImportModule("dulwich.objects");
+	if (!objects_mod)
+		goto error;
+
+	tree_entry_cls = PyObject_GetAttrString(objects_mod, "TreeEntry");
+	Py_DECREF(objects_mod);
+	if (!tree_entry_cls)
+		goto error;
+
+	diff_tree_mod = PyImport_ImportModule("dulwich.diff_tree");
+	if (!diff_tree_mod)
+		goto error;
+
+	null_entry = PyObject_GetAttrString(diff_tree_mod, "_NULL_ENTRY");
+	if (!null_entry)
+		goto error;
+
+	block_size_obj = PyObject_GetAttrString(diff_tree_mod, "_BLOCK_SIZE");
+	if (!block_size_obj)
+		goto error;
+	block_size = (int)PyInt_AsLong(block_size_obj);
+
+	if (PyErr_Occurred())
+		goto error;
+
+	defaultdict_cls = PyObject_GetAttrString(diff_tree_mod, "defaultdict");
+	if (!defaultdict_cls)
+		goto error;
+
+	/* This is kind of hacky, but I don't know of a better way to get the
+	 * PyObject* version of int. */
+	int_cls = PyDict_GetItemString(PyEval_GetBuiltins(), "int");
+	if (!int_cls) {
+		PyErr_SetString(PyExc_NameError, "int");
+		goto error;
+	}
+
+	Py_DECREF(diff_tree_mod);
+
+	return m;
+
+error:
+	Py_XDECREF(objects_mod);
+	Py_XDECREF(diff_tree_mod);
+	Py_XDECREF(null_entry);
+	Py_XDECREF(block_size_obj);
+	Py_XDECREF(defaultdict_cls);
+	Py_XDECREF(int_cls);
+	return NULL;
+}
+
+#if PY_MAJOR_VERSION >= 3
+PyMODINIT_FUNC
+PyInit__diff_tree(void)
+{
+	return moduleinit();
+}
+#else
+PyMODINIT_FUNC
+init_diff_tree(void)
+{
+	moduleinit();
+}
+#endif

+ 329 - 0
dulwich/_objects.c

@@ -0,0 +1,329 @@
+/*
+ * Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
+ *
+ * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+ * General Public License as public by the Free Software Foundation; version 2.0
+ * or (at your option) any later version. You can redistribute it and/or
+ * modify it under the terms of either of these two licenses.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * You should have received a copy of the licenses; if not, see
+ * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+ * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+ * License, Version 2.0.
+ */
+
+#include <Python.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#if PY_MAJOR_VERSION >= 3
+#define PyInt_Check(obj) 0
+#define PyInt_CheckExact(obj) 0
+#define PyInt_AsLong PyLong_AsLong
+#define PyString_AS_STRING PyBytes_AS_STRING
+#define PyString_Check PyBytes_Check
+#define PyString_FromStringAndSize PyBytes_FromStringAndSize
+#endif
+
+#if defined(__MINGW32_VERSION) || defined(__APPLE__)
+size_t rep_strnlen(char *text, size_t maxlen);
+size_t rep_strnlen(char *text, size_t maxlen)
+{
+	const char *last = memchr(text, '\0', maxlen);
+	return last ? (size_t) (last - text) : maxlen;
+}
+#define strnlen rep_strnlen
+#endif
+
+#define bytehex(x) (((x)<0xa)?('0'+(x)):('a'-0xa+(x)))
+
+static PyObject *tree_entry_cls;
+static PyObject *object_format_exception_cls;
+
+static PyObject *sha_to_pyhex(const unsigned char *sha)
+{
+	char hexsha[41];
+	int i;
+	for (i = 0; i < 20; i++) {
+		hexsha[i*2] = bytehex((sha[i] & 0xF0) >> 4);
+		hexsha[i*2+1] = bytehex(sha[i] & 0x0F);
+	}
+
+	return PyString_FromStringAndSize(hexsha, 40);
+}
+
+static PyObject *py_parse_tree(PyObject *self, PyObject *args, PyObject *kw)
+{
+	char *text, *start, *end;
+	int len, strict;
+	size_t namelen;
+	PyObject *ret, *item, *name, *sha, *py_strict = NULL;
+	static char *kwlist[] = {"text", "strict", NULL};
+
+#if PY_MAJOR_VERSION >= 3
+	if (!PyArg_ParseTupleAndKeywords(args, kw, "y#|O", kwlist,
+	                                 &text, &len, &py_strict))
+#else
+	if (!PyArg_ParseTupleAndKeywords(args, kw, "s#|O", kwlist,
+	                                 &text, &len, &py_strict))
+#endif
+		return NULL;
+	strict = py_strict ?  PyObject_IsTrue(py_strict) : 0;
+	/* TODO: currently this returns a list; if memory usage is a concern,
+	 * consider rewriting as a custom iterator object */
+	ret = PyList_New(0);
+	if (ret == NULL) {
+		return NULL;
+	}
+	start = text;
+	end = text + len;
+	while (text < end) {
+		long mode;
+		if (strict && text[0] == '0') {
+			PyErr_SetString(object_format_exception_cls,
+			                "Illegal leading zero on mode");
+			Py_DECREF(ret);
+			return NULL;
+		}
+		mode = strtol(text, &text, 8);
+		if (*text != ' ') {
+			PyErr_SetString(PyExc_ValueError, "Expected space");
+			Py_DECREF(ret);
+			return NULL;
+		}
+		text++;
+		namelen = strnlen(text, len - (text - start));
+		name = PyString_FromStringAndSize(text, namelen);
+		if (name == NULL) {
+			Py_DECREF(ret);
+			return NULL;
+		}
+		if (text + namelen + 20 >= end) {
+			PyErr_SetString(PyExc_ValueError, "SHA truncated");
+			Py_DECREF(ret);
+			Py_DECREF(name);
+			return NULL;
+		}
+		sha = sha_to_pyhex((unsigned char *)text+namelen+1);
+		if (sha == NULL) {
+			Py_DECREF(ret);
+			Py_DECREF(name);
+			return NULL;
+		}
+		item = Py_BuildValue("(NlN)", name, mode, sha);
+		if (item == NULL) {
+			Py_DECREF(ret);
+			Py_DECREF(sha);
+			Py_DECREF(name);
+			return NULL;
+		}
+		if (PyList_Append(ret, item) == -1) {
+			Py_DECREF(ret);
+			Py_DECREF(item);
+			return NULL;
+		}
+		Py_DECREF(item);
+		text += namelen+21;
+	}
+	return ret;
+}
+
+struct tree_item {
+	const char *name;
+	int mode;
+	PyObject *tuple;
+};
+
+int cmp_tree_item(const void *_a, const void *_b)
+{
+	const struct tree_item *a = _a, *b = _b;
+	const char *remain_a, *remain_b;
+	int ret;
+	size_t common;
+	if (strlen(a->name) > strlen(b->name)) {
+		common = strlen(b->name);
+		remain_a = a->name + common;
+		remain_b = (S_ISDIR(b->mode)?"/":"");
+	} else if (strlen(b->name) > strlen(a->name)) {
+		common = strlen(a->name);
+		remain_a = (S_ISDIR(a->mode)?"/":"");
+		remain_b = b->name + common;
+	} else { /* strlen(a->name) == strlen(b->name) */
+		common = 0;
+		remain_a = a->name;
+		remain_b = b->name;
+	}
+	ret = strncmp(a->name, b->name, common);
+	if (ret != 0)
+		return ret;
+	return strcmp(remain_a, remain_b);
+}
+
+int cmp_tree_item_name_order(const void *_a, const void *_b) {
+	const struct tree_item *a = _a, *b = _b;
+	return strcmp(a->name, b->name);
+}
+
+static PyObject *py_sorted_tree_items(PyObject *self, PyObject *args)
+{
+	struct tree_item *qsort_entries = NULL;
+	int name_order, n = 0, i;
+	PyObject *entries, *py_name_order, *ret, *key, *value, *py_mode, *py_sha;
+	Py_ssize_t pos = 0, num_entries;
+	int (*cmp)(const void *, const void *);
+
+	if (!PyArg_ParseTuple(args, "OO", &entries, &py_name_order))
+		goto error;
+
+	if (!PyDict_Check(entries)) {
+		PyErr_SetString(PyExc_TypeError, "Argument not a dictionary");
+		goto error;
+	}
+
+	name_order = PyObject_IsTrue(py_name_order);
+	if (name_order == -1)
+		goto error;
+	cmp = name_order ? cmp_tree_item_name_order : cmp_tree_item;
+
+	num_entries = PyDict_Size(entries);
+	if (PyErr_Occurred())
+		goto error;
+	qsort_entries = PyMem_New(struct tree_item, num_entries);
+	if (!qsort_entries) {
+		PyErr_NoMemory();
+		goto error;
+	}
+
+	while (PyDict_Next(entries, &pos, &key, &value)) {
+		if (!PyString_Check(key)) {
+			PyErr_SetString(PyExc_TypeError, "Name is not a string");
+			goto error;
+		}
+
+		if (PyTuple_Size(value) != 2) {
+			PyErr_SetString(PyExc_ValueError, "Tuple has invalid size");
+			goto error;
+		}
+
+		py_mode = PyTuple_GET_ITEM(value, 0);
+		if (!PyInt_Check(py_mode) && !PyLong_Check(py_mode)) {
+			PyErr_SetString(PyExc_TypeError, "Mode is not an integral type");
+			goto error;
+		}
+
+		py_sha = PyTuple_GET_ITEM(value, 1);
+		if (!PyString_Check(py_sha)) {
+			PyErr_SetString(PyExc_TypeError, "SHA is not a string");
+			goto error;
+		}
+		qsort_entries[n].name = PyString_AS_STRING(key);
+		qsort_entries[n].mode = PyInt_AsLong(py_mode);
+
+		qsort_entries[n].tuple = PyObject_CallFunctionObjArgs(
+		                tree_entry_cls, key, py_mode, py_sha, NULL);
+		if (qsort_entries[n].tuple == NULL)
+			goto error;
+		n++;
+	}
+
+	qsort(qsort_entries, num_entries, sizeof(struct tree_item), cmp);
+
+	ret = PyList_New(num_entries);
+	if (ret == NULL) {
+		PyErr_NoMemory();
+		goto error;
+	}
+
+	for (i = 0; i < num_entries; i++) {
+		PyList_SET_ITEM(ret, i, qsort_entries[i].tuple);
+	}
+	PyMem_Free(qsort_entries);
+	return ret;
+
+error:
+	for (i = 0; i < n; i++) {
+		Py_XDECREF(qsort_entries[i].tuple);
+	}
+	PyMem_Free(qsort_entries);
+	return NULL;
+}
+
+static PyMethodDef py_objects_methods[] = {
+	{ "parse_tree", (PyCFunction)py_parse_tree, METH_VARARGS | METH_KEYWORDS,
+	  NULL },
+	{ "sorted_tree_items", py_sorted_tree_items, METH_VARARGS, NULL },
+	{ NULL, NULL, 0, NULL }
+};
+
+static PyObject *
+moduleinit(void)
+{
+	PyObject *m, *objects_mod, *errors_mod;
+
+#if PY_MAJOR_VERSION >= 3
+	static struct PyModuleDef moduledef = {
+		PyModuleDef_HEAD_INIT,
+		"_objects",         /* m_name */
+		NULL,               /* m_doc */
+		-1,                 /* m_size */
+		py_objects_methods, /* m_methods */
+		NULL,               /* m_reload */
+		NULL,               /* m_traverse */
+		NULL,               /* m_clear*/
+		NULL,               /* m_free */
+	};
+	m = PyModule_Create(&moduledef);
+#else
+	m = Py_InitModule3("_objects", py_objects_methods, NULL);
+#endif
+	if (m == NULL) {
+		return NULL;
+	}
+
+	errors_mod = PyImport_ImportModule("dulwich.errors");
+	if (errors_mod == NULL) {
+		return NULL;
+	}
+
+	object_format_exception_cls = PyObject_GetAttrString(
+		errors_mod, "ObjectFormatException");
+	Py_DECREF(errors_mod);
+	if (object_format_exception_cls == NULL) {
+		return NULL;
+	}
+
+	/* This is a circular import but should be safe since this module is
+	 * imported at at the very bottom of objects.py. */
+	objects_mod = PyImport_ImportModule("dulwich.objects");
+	if (objects_mod == NULL) {
+		return NULL;
+	}
+
+	tree_entry_cls = PyObject_GetAttrString(objects_mod, "TreeEntry");
+	Py_DECREF(objects_mod);
+	if (tree_entry_cls == NULL) {
+		return NULL;
+	}
+
+	return m;
+}
+
+#if PY_MAJOR_VERSION >= 3
+PyMODINIT_FUNC
+PyInit__objects(void)
+{
+	return moduleinit();
+}
+#else
+PyMODINIT_FUNC
+init_objects(void)
+{
+	moduleinit();
+}
+#endif

+ 313 - 0
dulwich/_pack.c

@@ -0,0 +1,313 @@
+/* 
+ * Copyright (C) 2009 Jelmer Vernooij <jelmer@samba.org>
+ *
+ * Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+ * General Public License as public by the Free Software Foundation; version 2.0
+ * or (at your option) any later version. You can redistribute it and/or
+ * modify it under the terms of either of these two licenses.
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * You should have received a copy of the licenses; if not, see
+ * <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+ * and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+ * License, Version 2.0.
+ */
+
+#include <Python.h>
+#include <stdint.h>
+
+#if PY_MAJOR_VERSION >= 3
+#define PyInt_FromLong PyLong_FromLong
+#define PyString_AS_STRING PyBytes_AS_STRING
+#define PyString_AS_STRING PyBytes_AS_STRING
+#define PyString_Check PyBytes_Check
+#define PyString_CheckExact PyBytes_CheckExact
+#define PyString_FromStringAndSize PyBytes_FromStringAndSize
+#define PyString_FromString PyBytes_FromString
+#define PyString_GET_SIZE PyBytes_GET_SIZE
+#define PyString_Size PyBytes_Size
+#define _PyString_Join _PyBytes_Join
+#endif
+
+static PyObject *PyExc_ApplyDeltaError = NULL;
+
+static int py_is_sha(PyObject *sha)
+{
+	if (!PyString_CheckExact(sha))
+		return 0;
+
+	if (PyString_Size(sha) != 20)
+		return 0;
+
+	return 1;
+}
+
+
+static size_t get_delta_header_size(uint8_t *delta, size_t *index, size_t length)
+{
+	size_t size = 0;
+	size_t i = 0;
+	while ((*index) < length) {
+		uint8_t cmd = delta[*index];
+		(*index)++;
+		size |= (cmd & ~0x80) << i;
+		i += 7;
+		if (!(cmd & 0x80))
+			break;
+	}
+	return size;
+}
+
+static PyObject *py_chunked_as_string(PyObject *py_buf)
+{
+	if (PyList_Check(py_buf)) {
+		PyObject *sep = PyString_FromString("");
+		if (sep == NULL) {
+			PyErr_NoMemory();
+			return NULL;
+		}
+		py_buf = _PyString_Join(sep, py_buf);
+		Py_DECREF(sep);
+		if (py_buf == NULL) {
+			PyErr_NoMemory();
+			return NULL;
+		}
+	} else if (PyString_Check(py_buf)) {
+		Py_INCREF(py_buf);
+	} else {
+		PyErr_SetString(PyExc_TypeError,
+			"src_buf is not a string or a list of chunks");
+		return NULL;
+	}
+    return py_buf;
+}
+
+static PyObject *py_apply_delta(PyObject *self, PyObject *args)
+{
+	uint8_t *src_buf, *delta;
+	size_t src_buf_len, delta_len;
+	size_t src_size, dest_size;
+	size_t outindex = 0;
+	size_t index;
+	uint8_t *out;
+	PyObject *ret, *py_src_buf, *py_delta, *ret_list;
+
+	if (!PyArg_ParseTuple(args, "OO", &py_src_buf, &py_delta))
+		return NULL;
+
+	py_src_buf = py_chunked_as_string(py_src_buf);
+	if (py_src_buf == NULL)
+		return NULL;
+
+	py_delta = py_chunked_as_string(py_delta);
+	if (py_delta == NULL) {
+		Py_DECREF(py_src_buf);
+		return NULL;
+	}
+
+	src_buf = (uint8_t *)PyString_AS_STRING(py_src_buf);
+	src_buf_len = (size_t)PyString_GET_SIZE(py_src_buf);
+
+	delta = (uint8_t *)PyString_AS_STRING(py_delta);
+	delta_len = (size_t)PyString_GET_SIZE(py_delta);
+
+	index = 0;
+	src_size = get_delta_header_size(delta, &index, delta_len);
+	if (src_size != src_buf_len) {
+		PyErr_Format(PyExc_ApplyDeltaError,
+					 "Unexpected source buffer size: %lu vs %ld", src_size, src_buf_len);
+		Py_DECREF(py_src_buf);
+		Py_DECREF(py_delta);
+		return NULL;
+	}
+	dest_size = get_delta_header_size(delta, &index, delta_len);
+	ret = PyString_FromStringAndSize(NULL, dest_size);
+	if (ret == NULL) {
+		PyErr_NoMemory();
+		Py_DECREF(py_src_buf);
+		Py_DECREF(py_delta);
+		return NULL;
+	}
+	out = (uint8_t *)PyString_AS_STRING(ret);
+	while (index < delta_len) {
+		uint8_t cmd = delta[index];
+		index++;
+		if (cmd & 0x80) {
+			size_t cp_off = 0, cp_size = 0;
+			int i;
+			for (i = 0; i < 4; i++) {
+				if (cmd & (1 << i)) {
+					uint8_t x = delta[index];
+					index++;
+					cp_off |= x << (i * 8);
+				}
+			}
+			for (i = 0; i < 3; i++) {
+				if (cmd & (1 << (4+i))) {
+					uint8_t x = delta[index];
+					index++;
+					cp_size |= x << (i * 8);
+				}
+			}
+			if (cp_size == 0)
+				cp_size = 0x10000;
+			if (cp_off + cp_size < cp_size ||
+				cp_off + cp_size > src_size ||
+				cp_size > dest_size)
+				break;
+			memcpy(out+outindex, src_buf+cp_off, cp_size);
+			outindex += cp_size;
+			dest_size -= cp_size;
+		} else if (cmd != 0) {
+			if (cmd > dest_size)
+				break;
+			memcpy(out+outindex, delta+index, cmd);
+			outindex += cmd;
+			index += cmd;
+			dest_size -= cmd;
+		} else {
+			PyErr_SetString(PyExc_ApplyDeltaError, "Invalid opcode 0");
+			Py_DECREF(ret);
+			Py_DECREF(py_delta);
+			Py_DECREF(py_src_buf);
+			return NULL;
+		}
+	}
+	Py_DECREF(py_src_buf);
+	Py_DECREF(py_delta);
+
+	if (index != delta_len) {
+		PyErr_SetString(PyExc_ApplyDeltaError, "delta not empty");
+		Py_DECREF(ret);
+		return NULL;
+	}
+
+	if (dest_size != 0) {
+		PyErr_SetString(PyExc_ApplyDeltaError, "dest size incorrect");
+		Py_DECREF(ret);
+		return NULL;
+	}
+
+	ret_list = Py_BuildValue("[N]", ret);
+	if (ret_list == NULL) {
+		Py_DECREF(ret);
+		return NULL;
+	}
+	return ret_list;
+}
+
+static PyObject *py_bisect_find_sha(PyObject *self, PyObject *args)
+{
+	PyObject *unpack_name;
+	char *sha;
+	int sha_len;
+	int start, end;
+#if PY_MAJOR_VERSION >= 3
+	if (!PyArg_ParseTuple(args, "iiy#O", &start, &end,
+			      &sha, &sha_len, &unpack_name))
+#else
+	if (!PyArg_ParseTuple(args, "iis#O", &start, &end,
+			      &sha, &sha_len, &unpack_name))
+#endif
+		return NULL;
+
+	if (sha_len != 20) {
+		PyErr_SetString(PyExc_ValueError, "Sha is not 20 bytes long");
+		return NULL;
+	}
+	if (start > end) {
+		PyErr_SetString(PyExc_AssertionError, "start > end");
+		return NULL;
+	}
+
+	while (start <= end) {
+		PyObject *file_sha;
+		int i = (start + end)/2;
+		int cmp;
+		file_sha = PyObject_CallFunction(unpack_name, "i", i);
+		if (file_sha == NULL) {
+			return NULL;
+		}
+		if (!py_is_sha(file_sha)) {
+			PyErr_SetString(PyExc_TypeError, "unpack_name returned non-sha object");
+			Py_DECREF(file_sha);
+			return NULL;
+		}
+		cmp = memcmp(PyString_AS_STRING(file_sha), sha, 20);
+		Py_DECREF(file_sha);
+		if (cmp < 0)
+			start = i + 1;
+		else if (cmp > 0)
+			end = i - 1;
+		else {
+			return PyInt_FromLong(i);
+		}
+	}
+	Py_RETURN_NONE;
+}
+
+
+static PyMethodDef py_pack_methods[] = {
+	{ "apply_delta", (PyCFunction)py_apply_delta, METH_VARARGS, NULL },
+	{ "bisect_find_sha", (PyCFunction)py_bisect_find_sha, METH_VARARGS, NULL },
+	{ NULL, NULL, 0, NULL }
+};
+
+static PyObject *
+moduleinit(void)
+{
+	PyObject *m;
+	PyObject *errors_module;
+
+#if PY_MAJOR_VERSION >= 3
+	static struct PyModuleDef moduledef = {
+	  PyModuleDef_HEAD_INIT,
+	  "_pack",         /* m_name */
+	  NULL,            /* m_doc */
+	  -1,              /* m_size */
+	  py_pack_methods, /* m_methods */
+	  NULL,            /* m_reload */
+	  NULL,            /* m_traverse */
+	  NULL,            /* m_clear*/
+	  NULL,            /* m_free */
+	};
+#endif
+
+	errors_module = PyImport_ImportModule("dulwich.errors");
+	if (errors_module == NULL)
+		return NULL;
+
+	PyExc_ApplyDeltaError = PyObject_GetAttrString(errors_module, "ApplyDeltaError");
+	Py_DECREF(errors_module);
+	if (PyExc_ApplyDeltaError == NULL)
+		return NULL;
+
+#if PY_MAJOR_VERSION >= 3
+	m = PyModule_Create(&moduledef);
+#else
+	m = Py_InitModule3("_pack", py_pack_methods, NULL);
+#endif
+	if (m == NULL)
+		return NULL;
+
+	return m;
+}
+
+#if PY_MAJOR_VERSION >= 3
+PyMODINIT_FUNC
+PyInit__pack(void)
+{
+	return moduleinit();
+}
+#else
+PyMODINIT_FUNC
+init_pack(void)
+{
+	moduleinit();
+}
+#endif

+ 118 - 0
dulwich/archive.py

@@ -0,0 +1,118 @@
+# archive.py -- Creating an archive from a tarball
+# Copyright (C) 2015 Jonas Haag <jonas@lophus.org>
+# Copyright (C) 2015 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Generates tarballs for Git trees.
+
+"""
+
+import posixpath
+import stat
+import tarfile
+from io import BytesIO
+from contextlib import closing
+
+
+class ChunkedBytesIO(object):
+    """Turn a list of bytestrings into a file-like object.
+
+    This is similar to creating a `BytesIO` from a concatenation of the
+    bytestring list, but saves memory by NOT creating one giant bytestring
+    first::
+
+        BytesIO(b''.join(list_of_bytestrings)) =~= ChunkedBytesIO(
+            list_of_bytestrings)
+    """
+    def __init__(self, contents):
+        self.contents = contents
+        self.pos = (0, 0)
+
+    def read(self, maxbytes=None):
+        if maxbytes < 0:
+            maxbytes = float('inf')
+
+        buf = []
+        chunk, cursor = self.pos
+
+        while chunk < len(self.contents):
+            if maxbytes < len(self.contents[chunk]) - cursor:
+                buf.append(self.contents[chunk][cursor:cursor+maxbytes])
+                cursor += maxbytes
+                self.pos = (chunk, cursor)
+                break
+            else:
+                buf.append(self.contents[chunk][cursor:])
+                maxbytes -= len(self.contents[chunk]) - cursor
+                chunk += 1
+                cursor = 0
+                self.pos = (chunk, cursor)
+        return b''.join(buf)
+
+
+def tar_stream(store, tree, mtime, format=''):
+    """Generate a tar stream for the contents of a Git tree.
+
+    Returns a generator that lazily assembles a .tar.gz archive, yielding it in
+    pieces (bytestrings). To obtain the complete .tar.gz binary file, simply
+    concatenate these chunks.
+
+    :param store: Object store to retrieve objects from
+    :param tree: Tree object for the tree root
+    :param mtime: UNIX timestamp that is assigned as the modification time for
+        all files
+    :param format: Optional compression format for tarball
+    :return: Bytestrings
+    """
+    buf = BytesIO()
+    with closing(tarfile.open(None, "w:%s" % format, buf)) as tar:
+        for entry_abspath, entry in _walk_tree(store, tree):
+            try:
+                blob = store[entry.sha]
+            except KeyError:
+                # Entry probably refers to a submodule, which we don't yet
+                # support.
+                continue
+            data = ChunkedBytesIO(blob.chunked)
+
+            info = tarfile.TarInfo()
+            # tarfile only works with ascii.
+            info.name = entry_abspath.decode('ascii')
+            info.size = blob.raw_length()
+            info.mode = entry.mode
+            info.mtime = mtime
+
+            tar.addfile(info, data)
+            yield buf.getvalue()
+            buf.truncate(0)
+            buf.seek(0)
+    yield buf.getvalue()
+
+
+def _walk_tree(store, tree, root=b''):
+    """Recursively walk a dulwich Tree, yielding tuples of
+    (absolute path, TreeEntry) along the way.
+    """
+    for entry in tree.iteritems():
+        entry_abspath = posixpath.join(root, entry.path)
+        if stat.S_ISDIR(entry.mode):
+            for _ in _walk_tree(store, store[entry.sha], entry_abspath):
+                yield _
+        else:
+            yield (entry_abspath, entry)

+ 1360 - 0
dulwich/client.py

@@ -0,0 +1,1360 @@
+# client.py -- Implementation of the client side git protocols
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Client side support for the Git protocol.
+
+The Dulwich client supports the following capabilities:
+
+ * thin-pack
+ * multi_ack_detailed
+ * multi_ack
+ * side-band-64k
+ * ofs-delta
+ * quiet
+ * report-status
+ * delete-refs
+
+Known capabilities that are not supported:
+
+ * shallow
+ * no-progress
+ * include-tag
+"""
+
+from contextlib import closing
+from io import BytesIO, BufferedReader
+import dulwich
+import select
+import socket
+import subprocess
+import sys
+
+try:
+    from urllib import quote as urlquote
+    from urllib import unquote as urlunquote
+except ImportError:
+    from urllib.parse import quote as urlquote
+    from urllib.parse import unquote as urlunquote
+
+try:
+    import urllib2
+    import urlparse
+except ImportError:
+    import urllib.request as urllib2
+    import urllib.parse as urlparse
+
+from dulwich.errors import (
+    GitProtocolError,
+    NotGitRepository,
+    SendPackError,
+    UpdateRefsError,
+    )
+from dulwich.protocol import (
+    _RBUFSIZE,
+    capability_agent,
+    CAPABILITY_DELETE_REFS,
+    CAPABILITY_MULTI_ACK,
+    CAPABILITY_MULTI_ACK_DETAILED,
+    CAPABILITY_OFS_DELTA,
+    CAPABILITY_QUIET,
+    CAPABILITY_REPORT_STATUS,
+    CAPABILITY_SIDE_BAND_64K,
+    CAPABILITY_THIN_PACK,
+    CAPABILITIES_REF,
+    COMMAND_DONE,
+    COMMAND_HAVE,
+    COMMAND_WANT,
+    SIDE_BAND_CHANNEL_DATA,
+    SIDE_BAND_CHANNEL_PROGRESS,
+    SIDE_BAND_CHANNEL_FATAL,
+    PktLineParser,
+    Protocol,
+    ProtocolFile,
+    TCP_GIT_PORT,
+    ZERO_SHA,
+    extract_capabilities,
+    )
+from dulwich.pack import (
+    write_pack_objects,
+    )
+from dulwich.refs import (
+    read_info_refs,
+    )
+
+
+def _fileno_can_read(fileno):
+    """Check if a file descriptor is readable."""
+    return len(select.select([fileno], [], [], 0)[0]) > 0
+
+
+def _win32_peek_avail(handle):
+    """Wrapper around PeekNamedPipe to check how many bytes are available."""
+    from ctypes import byref, wintypes, windll
+    c_avail = wintypes.DWORD()
+    c_message = wintypes.DWORD()
+    success = windll.kernel32.PeekNamedPipe(
+        handle, None, 0, None, byref(c_avail),
+        byref(c_message))
+    if not success:
+        raise OSError(wintypes.GetLastError())
+    return c_avail.value
+
+
+COMMON_CAPABILITIES = [CAPABILITY_OFS_DELTA, CAPABILITY_SIDE_BAND_64K]
+FETCH_CAPABILITIES = ([CAPABILITY_THIN_PACK, CAPABILITY_MULTI_ACK,
+                       CAPABILITY_MULTI_ACK_DETAILED] +
+                      COMMON_CAPABILITIES)
+SEND_CAPABILITIES = [CAPABILITY_REPORT_STATUS] + COMMON_CAPABILITIES
+
+
+class ReportStatusParser(object):
+    """Handle status as reported by servers with 'report-status' capability.
+    """
+
+    def __init__(self):
+        self._done = False
+        self._pack_status = None
+        self._ref_status_ok = True
+        self._ref_statuses = []
+
+    def check(self):
+        """Check if there were any errors and, if so, raise exceptions.
+
+        :raise SendPackError: Raised when the server could not unpack
+        :raise UpdateRefsError: Raised when refs could not be updated
+        """
+        if self._pack_status not in (b'unpack ok', None):
+            raise SendPackError(self._pack_status)
+        if not self._ref_status_ok:
+            ref_status = {}
+            ok = set()
+            for status in self._ref_statuses:
+                if b' ' not in status:
+                    # malformed response, move on to the next one
+                    continue
+                status, ref = status.split(b' ', 1)
+
+                if status == b'ng':
+                    if b' ' in ref:
+                        ref, status = ref.split(b' ', 1)
+                else:
+                    ok.add(ref)
+                ref_status[ref] = status
+            # TODO(jelmer): don't assume encoding of refs is ascii.
+            raise UpdateRefsError(', '.join([
+                refname.decode('ascii') for refname in ref_status
+                if refname not in ok]) +
+                ' failed to update', ref_status=ref_status)
+
+    def handle_packet(self, pkt):
+        """Handle a packet.
+
+        :raise GitProtocolError: Raised when packets are received after a
+            flush packet.
+        """
+        if self._done:
+            raise GitProtocolError("received more data after status report")
+        if pkt is None:
+            self._done = True
+            return
+        if self._pack_status is None:
+            self._pack_status = pkt.strip()
+        else:
+            ref_status = pkt.strip()
+            self._ref_statuses.append(ref_status)
+            if not ref_status.startswith(b'ok '):
+                self._ref_status_ok = False
+
+
+def read_pkt_refs(proto):
+    server_capabilities = None
+    refs = {}
+    # Receive refs from server
+    for pkt in proto.read_pkt_seq():
+        (sha, ref) = pkt.rstrip(b'\n').split(None, 1)
+        if sha == b'ERR':
+            raise GitProtocolError(ref)
+        if server_capabilities is None:
+            (ref, server_capabilities) = extract_capabilities(ref)
+        refs[ref] = sha
+
+    if len(refs) == 0:
+        return None, set([])
+    if refs == {CAPABILITIES_REF: ZERO_SHA}:
+        refs = {}
+    return refs, set(server_capabilities)
+
+
+# TODO(durin42): this doesn't correctly degrade if the server doesn't
+# support some capabilities. This should work properly with servers
+# that don't support multi_ack.
+class GitClient(object):
+    """Git smart server client.
+
+    """
+
+    def __init__(self, thin_packs=True, report_activity=None, quiet=False):
+        """Create a new GitClient instance.
+
+        :param thin_packs: Whether or not thin packs should be retrieved
+        :param report_activity: Optional callback for reporting transport
+            activity.
+        """
+        self._report_activity = report_activity
+        self._report_status_parser = None
+        self._fetch_capabilities = set(FETCH_CAPABILITIES)
+        self._fetch_capabilities.add(capability_agent())
+        self._send_capabilities = set(SEND_CAPABILITIES)
+        self._send_capabilities.add(capability_agent())
+        if quiet:
+            self._send_capabilities.add(CAPABILITY_QUIET)
+        if not thin_packs:
+            self._fetch_capabilities.remove(CAPABILITY_THIN_PACK)
+
+    def get_url(self, path):
+        """Retrieves full url to given path.
+
+        :param path: Repository path (as string)
+        :return: Url to path (as string)
+        """
+        raise NotImplementedError(self.get_url)
+
+    @classmethod
+    def from_parsedurl(cls, parsedurl, **kwargs):
+        """Create an instance of this client from a urlparse.parsed object.
+
+        :param parsedurl: Result of urlparse.urlparse()
+        :return: A `GitClient` object
+        """
+        raise NotImplementedError(cls.from_parsedurl)
+
+    def send_pack(self, path, update_refs, generate_pack_contents,
+                  progress=None, write_pack=write_pack_objects):
+        """Upload a pack to a remote repository.
+
+        :param path: Repository path (as bytestring)
+        :param update_refs: Function to determine changes to remote refs.
+            Receive dict with existing remote refs, returns dict with
+            changed refs (name -> sha, where sha=ZERO_SHA for deletions)
+        :param generate_pack_contents: Function that can return a sequence of
+            the shas of the objects to upload.
+        :param progress: Optional progress function
+        :param write_pack: Function called with (file, iterable of objects) to
+            write the objects returned by generate_pack_contents to the server.
+
+        :raises SendPackError: if server rejects the pack data
+        :raises UpdateRefsError: if the server supports report-status
+                                 and rejects ref updates
+        :return: new_refs dictionary containing the changes that were made
+            {refname: new_ref}, including deleted refs.
+        """
+        raise NotImplementedError(self.send_pack)
+
+    def fetch(self, path, target, determine_wants=None, progress=None):
+        """Fetch into a target repository.
+
+        :param path: Path to fetch from (as bytestring)
+        :param target: Target repository to fetch into
+        :param determine_wants: Optional function to determine what refs
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch. Defaults to all shas.
+        :param progress: Optional progress function
+        :return: Dictionary with all remote refs (not just those fetched)
+        """
+        if determine_wants is None:
+            determine_wants = target.object_store.determine_wants_all
+        if CAPABILITY_THIN_PACK in self._fetch_capabilities:
+            # TODO(jelmer): Avoid reading entire file into memory and
+            # only processing it after the whole file has been fetched.
+            f = BytesIO()
+
+            def commit():
+                if f.tell():
+                    f.seek(0)
+                    target.object_store.add_thin_pack(f.read, None)
+
+            def abort():
+                pass
+        else:
+            f, commit, abort = target.object_store.add_pack()
+        try:
+            result = self.fetch_pack(
+                path, determine_wants, target.get_graph_walker(), f.write,
+                progress)
+        except:
+            abort()
+            raise
+        else:
+            commit()
+        return result
+
+    def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
+                   progress=None):
+        """Retrieve a pack from a git smart server.
+
+        :param path: Remote path to fetch from
+        :param determine_wants: Function determine what refs
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch.
+        :param graph_walker: Object with next() and ack().
+        :param pack_data: Callback called for each bit of data in the pack
+        :param progress: Callback for progress reports (strings)
+        :return: Dictionary with all remote refs (not just those fetched)
+        """
+        raise NotImplementedError(self.fetch_pack)
+
+    def get_refs(self, path):
+        """Retrieve the current refs from a git smart server.
+
+        :param path: Path to the repo to fetch from. (as bytestring)
+        """
+        raise NotImplementedError(self.get_refs)
+
+    def _parse_status_report(self, proto):
+        unpack = proto.read_pkt_line().strip()
+        if unpack != b'unpack ok':
+            st = True
+            # flush remaining error data
+            while st is not None:
+                st = proto.read_pkt_line()
+            raise SendPackError(unpack)
+        statuses = []
+        errs = False
+        ref_status = proto.read_pkt_line()
+        while ref_status:
+            ref_status = ref_status.strip()
+            statuses.append(ref_status)
+            if not ref_status.startswith(b'ok '):
+                errs = True
+            ref_status = proto.read_pkt_line()
+
+        if errs:
+            ref_status = {}
+            ok = set()
+            for status in statuses:
+                if b' ' not in status:
+                    # malformed response, move on to the next one
+                    continue
+                status, ref = status.split(b' ', 1)
+
+                if status == b'ng':
+                    if b' ' in ref:
+                        ref, status = ref.split(b' ', 1)
+                else:
+                    ok.add(ref)
+                ref_status[ref] = status
+            raise UpdateRefsError(', '.join([
+                refname for refname in ref_status if refname not in ok]) +
+                b' failed to update', ref_status=ref_status)
+
+    def _read_side_band64k_data(self, proto, channel_callbacks):
+        """Read per-channel data.
+
+        This requires the side-band-64k capability.
+
+        :param proto: Protocol object to read from
+        :param channel_callbacks: Dictionary mapping channels to packet
+            handlers to use. None for a callback discards channel data.
+        """
+        for pkt in proto.read_pkt_seq():
+            channel = ord(pkt[:1])
+            pkt = pkt[1:]
+            try:
+                cb = channel_callbacks[channel]
+            except KeyError:
+                raise AssertionError('Invalid sideband channel %d' % channel)
+            else:
+                if cb is not None:
+                    cb(pkt)
+
+    def _handle_receive_pack_head(self, proto, capabilities, old_refs,
+                                  new_refs):
+        """Handle the head of a 'git-receive-pack' request.
+
+        :param proto: Protocol object to read from
+        :param capabilities: List of negotiated capabilities
+        :param old_refs: Old refs, as received from the server
+        :param new_refs: Refs to change
+        :return: (have, want) tuple
+        """
+        want = []
+        have = [x for x in old_refs.values() if not x == ZERO_SHA]
+        sent_capabilities = False
+
+        for refname in new_refs:
+            if not isinstance(refname, bytes):
+                raise TypeError('refname is not a bytestring: %r' % refname)
+            old_sha1 = old_refs.get(refname, ZERO_SHA)
+            if not isinstance(old_sha1, bytes):
+                raise TypeError('old sha1 for %s is not a bytestring: %r' %
+                                (refname, old_sha1))
+            new_sha1 = new_refs.get(refname, ZERO_SHA)
+            if not isinstance(new_sha1, bytes):
+                raise TypeError('old sha1 for %s is not a bytestring %r' %
+                                (refname, new_sha1))
+
+            if old_sha1 != new_sha1:
+                if sent_capabilities:
+                    proto.write_pkt_line(old_sha1 + b' ' + new_sha1 + b' ' +
+                                         refname)
+                else:
+                    proto.write_pkt_line(
+                        old_sha1 + b' ' + new_sha1 + b' ' + refname + b'\0' +
+                        b' '.join(capabilities))
+                    sent_capabilities = True
+            if new_sha1 not in have and new_sha1 != ZERO_SHA:
+                want.append(new_sha1)
+        proto.write_pkt_line(None)
+        return (have, want)
+
+    def _handle_receive_pack_tail(self, proto, capabilities, progress=None):
+        """Handle the tail of a 'git-receive-pack' request.
+
+        :param proto: Protocol object to read from
+        :param capabilities: List of negotiated capabilities
+        :param progress: Optional progress reporting function
+        """
+        if b"side-band-64k" in capabilities:
+            if progress is None:
+                def progress(x):
+                    pass
+            channel_callbacks = {2: progress}
+            if CAPABILITY_REPORT_STATUS in capabilities:
+                channel_callbacks[1] = PktLineParser(
+                    self._report_status_parser.handle_packet).parse
+            self._read_side_band64k_data(proto, channel_callbacks)
+        else:
+            if CAPABILITY_REPORT_STATUS in capabilities:
+                for pkt in proto.read_pkt_seq():
+                    self._report_status_parser.handle_packet(pkt)
+        if self._report_status_parser is not None:
+            self._report_status_parser.check()
+
+    def _handle_upload_pack_head(self, proto, capabilities, graph_walker,
+                                 wants, can_read):
+        """Handle the head of a 'git-upload-pack' request.
+
+        :param proto: Protocol object to read from
+        :param capabilities: List of negotiated capabilities
+        :param graph_walker: GraphWalker instance to call .ack() on
+        :param wants: List of commits to fetch
+        :param can_read: function that returns a boolean that indicates
+            whether there is extra graph data to read on proto
+        """
+        assert isinstance(wants, list) and isinstance(wants[0], bytes)
+        proto.write_pkt_line(COMMAND_WANT + b' ' + wants[0] + b' ' +
+                             b' '.join(capabilities) + b'\n')
+        for want in wants[1:]:
+            proto.write_pkt_line(COMMAND_WANT + b' ' + want + b'\n')
+        proto.write_pkt_line(None)
+        have = next(graph_walker)
+        while have:
+            proto.write_pkt_line(COMMAND_HAVE + b' ' + have + b'\n')
+            if can_read():
+                pkt = proto.read_pkt_line()
+                parts = pkt.rstrip(b'\n').split(b' ')
+                if parts[0] == b'ACK':
+                    graph_walker.ack(parts[1])
+                    if parts[2] in (b'continue', b'common'):
+                        pass
+                    elif parts[2] == b'ready':
+                        break
+                    else:
+                        raise AssertionError(
+                            "%s not in ('continue', 'ready', 'common)" %
+                            parts[2])
+            have = next(graph_walker)
+        proto.write_pkt_line(COMMAND_DONE + b'\n')
+
+    def _handle_upload_pack_tail(self, proto, capabilities, graph_walker,
+                                 pack_data, progress=None, rbufsize=_RBUFSIZE):
+        """Handle the tail of a 'git-upload-pack' request.
+
+        :param proto: Protocol object to read from
+        :param capabilities: List of negotiated capabilities
+        :param graph_walker: GraphWalker instance to call .ack() on
+        :param pack_data: Function to call with pack data
+        :param progress: Optional progress reporting function
+        :param rbufsize: Read buffer size
+        """
+        pkt = proto.read_pkt_line()
+        while pkt:
+            parts = pkt.rstrip(b'\n').split(b' ')
+            if parts[0] == b'ACK':
+                graph_walker.ack(parts[1])
+            if len(parts) < 3 or parts[2] not in (
+                    b'ready', b'continue', b'common'):
+                break
+            pkt = proto.read_pkt_line()
+        if CAPABILITY_SIDE_BAND_64K in capabilities:
+            if progress is None:
+                # Just ignore progress data
+
+                def progress(x):
+                    pass
+            self._read_side_band64k_data(proto, {
+                SIDE_BAND_CHANNEL_DATA: pack_data,
+                SIDE_BAND_CHANNEL_PROGRESS: progress}
+            )
+        else:
+            while True:
+                data = proto.read(rbufsize)
+                if data == b"":
+                    break
+                pack_data(data)
+
+
+class TraditionalGitClient(GitClient):
+    """Traditional Git client."""
+
+    DEFAULT_ENCODING = 'utf-8'
+
+    def __init__(self, path_encoding=DEFAULT_ENCODING, **kwargs):
+        self._remote_path_encoding = path_encoding
+        super(TraditionalGitClient, self).__init__(**kwargs)
+
+    def _connect(self, cmd, path):
+        """Create a connection to the server.
+
+        This method is abstract - concrete implementations should
+        implement their own variant which connects to the server and
+        returns an initialized Protocol object with the service ready
+        for use and a can_read function which may be used to see if
+        reads would block.
+
+        :param cmd: The git service name to which we should connect.
+        :param path: The path we should pass to the service. (as bytestirng)
+        """
+        raise NotImplementedError()
+
+    def send_pack(self, path, update_refs, generate_pack_contents,
+                  progress=None, write_pack=write_pack_objects):
+        """Upload a pack to a remote repository.
+
+        :param path: Repository path (as bytestring)
+        :param update_refs: Function to determine changes to remote refs.
+            Receive dict with existing remote refs, returns dict with
+            changed refs (name -> sha, where sha=ZERO_SHA for deletions)
+        :param generate_pack_contents: Function that can return a sequence of
+            the shas of the objects to upload.
+        :param progress: Optional callback called with progress updates
+        :param write_pack: Function called with (file, iterable of objects) to
+            write the objects returned by generate_pack_contents to the server.
+
+        :raises SendPackError: if server rejects the pack data
+        :raises UpdateRefsError: if the server supports report-status
+                                 and rejects ref updates
+        :return: new_refs dictionary containing the changes that were made
+            {refname: new_ref}, including deleted refs.
+        """
+        proto, unused_can_read = self._connect(b'receive-pack', path)
+        with proto:
+            old_refs, server_capabilities = read_pkt_refs(proto)
+            negotiated_capabilities = (
+                self._send_capabilities & server_capabilities)
+
+            if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
+                self._report_status_parser = ReportStatusParser()
+            report_status_parser = self._report_status_parser
+
+            try:
+                new_refs = orig_new_refs = update_refs(dict(old_refs))
+            except:
+                proto.write_pkt_line(None)
+                raise
+
+            if CAPABILITY_DELETE_REFS not in server_capabilities:
+                # Server does not support deletions. Fail later.
+                new_refs = dict(orig_new_refs)
+                for ref, sha in orig_new_refs.items():
+                    if sha == ZERO_SHA:
+                        if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
+                            report_status_parser._ref_statuses.append(
+                                b'ng ' + sha +
+                                b' remote does not support deleting refs')
+                            report_status_parser._ref_status_ok = False
+                        del new_refs[ref]
+
+            if new_refs is None:
+                proto.write_pkt_line(None)
+                return old_refs
+
+            if len(new_refs) == 0 and len(orig_new_refs):
+                # NOOP - Original new refs filtered out by policy
+                proto.write_pkt_line(None)
+                if report_status_parser is not None:
+                    report_status_parser.check()
+                return old_refs
+
+            (have, want) = self._handle_receive_pack_head(
+                proto, negotiated_capabilities, old_refs, new_refs)
+            if (not want and
+                    set(new_refs.items()).issubset(set(old_refs.items()))):
+                return new_refs
+            objects = generate_pack_contents(have, want)
+
+            dowrite = len(objects) > 0
+            dowrite = dowrite or any(old_refs.get(ref) != sha
+                                     for (ref, sha) in new_refs.items()
+                                     if sha != ZERO_SHA)
+            if dowrite:
+                write_pack(proto.write_file(), objects)
+
+            self._handle_receive_pack_tail(
+                proto, negotiated_capabilities, progress)
+            return new_refs
+
+    def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
+                   progress=None):
+        """Retrieve a pack from a git smart server.
+
+        :param path: Remote path to fetch from
+        :param determine_wants: Function determine what refs
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch.
+        :param graph_walker: Object with next() and ack().
+        :param pack_data: Callback called for each bit of data in the pack
+        :param progress: Callback for progress reports (strings)
+        :return: Dictionary with all remote refs (not just those fetched)
+        """
+        proto, can_read = self._connect(b'upload-pack', path)
+        with proto:
+            refs, server_capabilities = read_pkt_refs(proto)
+            negotiated_capabilities = (
+                self._fetch_capabilities & server_capabilities)
+
+            if refs is None:
+                proto.write_pkt_line(None)
+                return refs
+
+            try:
+                wants = determine_wants(refs)
+            except:
+                proto.write_pkt_line(None)
+                raise
+            if wants is not None:
+                wants = [cid for cid in wants if cid != ZERO_SHA]
+            if not wants:
+                proto.write_pkt_line(None)
+                return refs
+            self._handle_upload_pack_head(
+                proto, negotiated_capabilities, graph_walker, wants, can_read)
+            self._handle_upload_pack_tail(
+                proto, negotiated_capabilities, graph_walker, pack_data,
+                progress)
+            return refs
+
+    def get_refs(self, path):
+        """Retrieve the current refs from a git smart server."""
+        # stock `git ls-remote` uses upload-pack
+        proto, _ = self._connect(b'upload-pack', path)
+        with proto:
+            refs, _ = read_pkt_refs(proto)
+            proto.write_pkt_line(None)
+            return refs
+
+    def archive(self, path, committish, write_data, progress=None,
+                write_error=None):
+        proto, can_read = self._connect(b'upload-archive', path)
+        with proto:
+            proto.write_pkt_line(b"argument " + committish)
+            proto.write_pkt_line(None)
+            pkt = proto.read_pkt_line()
+            if pkt == b"NACK\n":
+                return
+            elif pkt == b"ACK\n":
+                pass
+            elif pkt.startswith(b"ERR "):
+                raise GitProtocolError(pkt[4:].rstrip(b"\n"))
+            else:
+                raise AssertionError("invalid response %r" % pkt)
+            ret = proto.read_pkt_line()
+            if ret is not None:
+                raise AssertionError("expected pkt tail")
+            self._read_side_band64k_data(proto, {
+                SIDE_BAND_CHANNEL_DATA: write_data,
+                SIDE_BAND_CHANNEL_PROGRESS: progress,
+                SIDE_BAND_CHANNEL_FATAL: write_error})
+
+
+class TCPGitClient(TraditionalGitClient):
+    """A Git Client that works over TCP directly (i.e. git://)."""
+
+    def __init__(self, host, port=None, **kwargs):
+        if port is None:
+            port = TCP_GIT_PORT
+        self._host = host
+        self._port = port
+        super(TCPGitClient, self).__init__(**kwargs)
+
+    @classmethod
+    def from_parsedurl(cls, parsedurl, **kwargs):
+        return cls(parsedurl.hostname, port=parsedurl.port, **kwargs)
+
+    def get_url(self, path):
+        netloc = self._host
+        if self._port is not None and self._port != TCP_GIT_PORT:
+            netloc += ":%d" % self._port
+        return urlparse.urlunsplit(("git", netloc, path, '', ''))
+
+    def _connect(self, cmd, path):
+        if not isinstance(cmd, bytes):
+            raise TypeError(cmd)
+        if not isinstance(path, bytes):
+            path = path.encode(self._remote_path_encoding)
+        sockaddrs = socket.getaddrinfo(
+            self._host, self._port, socket.AF_UNSPEC, socket.SOCK_STREAM)
+        s = None
+        err = socket.error("no address found for %s" % self._host)
+        for (family, socktype, proto, canonname, sockaddr) in sockaddrs:
+            s = socket.socket(family, socktype, proto)
+            s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+            try:
+                s.connect(sockaddr)
+                break
+            except socket.error as err:
+                if s is not None:
+                    s.close()
+                s = None
+        if s is None:
+            raise err
+        # -1 means system default buffering
+        rfile = s.makefile('rb', -1)
+        # 0 means unbuffered
+        wfile = s.makefile('wb', 0)
+
+        def close():
+            rfile.close()
+            wfile.close()
+            s.close()
+
+        proto = Protocol(rfile.read, wfile.write, close,
+                         report_activity=self._report_activity)
+        if path.startswith(b"/~"):
+            path = path[1:]
+        # TODO(jelmer): Alternative to ascii?
+        proto.send_cmd(
+            b'git-' + cmd, path, b'host=' + self._host.encode('ascii'))
+        return proto, lambda: _fileno_can_read(s)
+
+
+class SubprocessWrapper(object):
+    """A socket-like object that talks to a subprocess via pipes."""
+
+    def __init__(self, proc):
+        self.proc = proc
+        if sys.version_info[0] == 2:
+            self.read = proc.stdout.read
+        else:
+            self.read = BufferedReader(proc.stdout).read
+        self.write = proc.stdin.write
+
+    def can_read(self):
+        if sys.platform == 'win32':
+            from msvcrt import get_osfhandle
+            handle = get_osfhandle(self.proc.stdout.fileno())
+            return _win32_peek_avail(handle) != 0
+        else:
+            return _fileno_can_read(self.proc.stdout.fileno())
+
+    def close(self):
+        self.proc.stdin.close()
+        self.proc.stdout.close()
+        if self.proc.stderr:
+            self.proc.stderr.close()
+        self.proc.wait()
+
+
+def find_git_command():
+    """Find command to run for system Git (usually C Git).
+    """
+    if sys.platform == 'win32':  # support .exe, .bat and .cmd
+        try:  # to avoid overhead
+            import win32api
+        except ImportError:  # run through cmd.exe with some overhead
+            return ['cmd', '/c', 'git']
+        else:
+            status, git = win32api.FindExecutable('git')
+            return [git]
+    else:
+        return ['git']
+
+
+class SubprocessGitClient(TraditionalGitClient):
+    """Git client that talks to a server using a subprocess."""
+
+    def __init__(self, **kwargs):
+        self._connection = None
+        self._stderr = None
+        self._stderr = kwargs.get('stderr')
+        if 'stderr' in kwargs:
+            del kwargs['stderr']
+        super(SubprocessGitClient, self).__init__(**kwargs)
+
+    @classmethod
+    def from_parsedurl(cls, parsedurl, **kwargs):
+        return cls(**kwargs)
+
+    git_command = None
+
+    def _connect(self, service, path):
+        if not isinstance(service, bytes):
+            raise TypeError(service)
+        if isinstance(path, bytes):
+            path = path.decode(self._remote_path_encoding)
+        if self.git_command is None:
+            git_command = find_git_command()
+        argv = git_command + [service.decode('ascii'), path]
+        p = SubprocessWrapper(
+            subprocess.Popen(argv, bufsize=0, stdin=subprocess.PIPE,
+                             stdout=subprocess.PIPE,
+                             stderr=self._stderr))
+        return Protocol(p.read, p.write, p.close,
+                        report_activity=self._report_activity), p.can_read
+
+
+class LocalGitClient(GitClient):
+    """Git Client that just uses a local Repo."""
+
+    def __init__(self, thin_packs=True, report_activity=None):
+        """Create a new LocalGitClient instance.
+
+        :param thin_packs: Whether or not thin packs should be retrieved
+        :param report_activity: Optional callback for reporting transport
+            activity.
+        """
+        self._report_activity = report_activity
+        # Ignore the thin_packs argument
+
+    def get_url(self, path):
+        return urlparse.urlunsplit(('file', '', path, '', ''))
+
+    @classmethod
+    def from_parsedurl(cls, parsedurl, **kwargs):
+        return cls(**kwargs)
+
+    @classmethod
+    def _open_repo(cls, path):
+        from dulwich.repo import Repo
+        if not isinstance(path, str):
+            path = path.decode(sys.getfilesystemencoding())
+        return closing(Repo(path))
+
+    def send_pack(self, path, update_refs, generate_pack_contents,
+                  progress=None, write_pack=write_pack_objects):
+        """Upload a pack to a remote repository.
+
+        :param path: Repository path (as bytestring)
+        :param update_refs: Function to determine changes to remote refs.
+            Receive dict with existing remote refs, returns dict with
+            changed refs (name -> sha, where sha=ZERO_SHA for deletions)
+        :param generate_pack_contents: Function that can return a sequence of
+            the shas of the objects to upload.
+        :param progress: Optional progress function
+        :param write_pack: Function called with (file, iterable of objects) to
+            write the objects returned by generate_pack_contents to the server.
+
+        :raises SendPackError: if server rejects the pack data
+        :raises UpdateRefsError: if the server supports report-status
+                                 and rejects ref updates
+        :return: new_refs dictionary containing the changes that were made
+            {refname: new_ref}, including deleted refs.
+        """
+        if not progress:
+            def progress(x):
+                pass
+
+        with self._open_repo(path) as target:
+            old_refs = target.get_refs()
+            new_refs = update_refs(dict(old_refs))
+
+            have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
+            want = []
+            for refname, new_sha1 in new_refs.items():
+                if (new_sha1 not in have and
+                        new_sha1 not in want and
+                        new_sha1 != ZERO_SHA):
+                    want.append(new_sha1)
+
+            if (not want and
+                    set(new_refs.items()).issubset(set(old_refs.items()))):
+                return new_refs
+
+            target.object_store.add_objects(generate_pack_contents(have, want))
+
+            for refname, new_sha1 in new_refs.items():
+                old_sha1 = old_refs.get(refname, ZERO_SHA)
+                if new_sha1 != ZERO_SHA:
+                    if not target.refs.set_if_equals(
+                            refname, old_sha1, new_sha1):
+                        progress('unable to set %s to %s' %
+                                 (refname, new_sha1))
+                else:
+                    if not target.refs.remove_if_equals(refname, old_sha1):
+                        progress('unable to remove %s' % refname)
+
+        return new_refs
+
+    def fetch(self, path, target, determine_wants=None, progress=None):
+        """Fetch into a target repository.
+
+        :param path: Path to fetch from (as bytestring)
+        :param target: Target repository to fetch into
+        :param determine_wants: Optional function determine what refs
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch. Defaults to all shas.
+        :param progress: Optional progress function
+        :return: Dictionary with all remote refs (not just those fetched)
+        """
+        with self._open_repo(path) as r:
+            return r.fetch(target, determine_wants=determine_wants,
+                           progress=progress)
+
+    def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
+                   progress=None):
+        """Retrieve a pack from a git smart server.
+
+        :param path: Remote path to fetch from
+        :param determine_wants: Function determine what refs
+            to fetch. Receives dictionary of name->sha, should return
+            list of shas to fetch.
+        :param graph_walker: Object with next() and ack().
+        :param pack_data: Callback called for each bit of data in the pack
+        :param progress: Callback for progress reports (strings)
+        :return: Dictionary with all remote refs (not just those fetched)
+        """
+        with self._open_repo(path) as r:
+            objects_iter = r.fetch_objects(
+                determine_wants, graph_walker, progress)
+
+            # Did the process short-circuit (e.g. in a stateless RPC call)?
+            # Note that the client still expects a 0-object pack in most cases.
+            if objects_iter is None:
+                return
+            write_pack_objects(ProtocolFile(None, pack_data), objects_iter)
+            return r.get_refs()
+
+    def get_refs(self, path):
+        """Retrieve the current refs from a git smart server."""
+
+        with self._open_repo(path) as target:
+            return target.get_refs()
+
+
+# What Git client to use for local access
+default_local_git_client_cls = LocalGitClient
+
+
+class SSHVendor(object):
+    """A client side SSH implementation."""
+
+    def connect_ssh(self, host, command, username=None, port=None):
+        # This function was deprecated in 0.9.1
+        import warnings
+        warnings.warn(
+            "SSHVendor.connect_ssh has been renamed to SSHVendor.run_command",
+            DeprecationWarning)
+        return self.run_command(host, command, username=username, port=port)
+
+    def run_command(self, host, command, username=None, port=None):
+        """Connect to an SSH server.
+
+        Run a command remotely and return a file-like object for interaction
+        with the remote command.
+
+        :param host: Host name
+        :param command: Command to run (as argv array)
+        :param username: Optional ame of user to log in as
+        :param port: Optional SSH port to use
+        """
+        raise NotImplementedError(self.run_command)
+
+
+class SubprocessSSHVendor(SSHVendor):
+    """SSH vendor that shells out to the local 'ssh' command."""
+
+    def run_command(self, host, command, username=None, port=None):
+        # FIXME: This has no way to deal with passwords..
+        args = ['ssh', '-x']
+        if port is not None:
+            args.extend(['-p', str(port)])
+        if username is not None:
+            host = '%s@%s' % (username, host)
+        args.append(host)
+        proc = subprocess.Popen(args + [command], bufsize=0,
+                                stdin=subprocess.PIPE,
+                                stdout=subprocess.PIPE)
+        return SubprocessWrapper(proc)
+
+
+def ParamikoSSHVendor(**kwargs):
+    import warnings
+    warnings.warn(
+        "ParamikoSSHVendor has been moved to dulwich.contrib.paramiko_vendor.",
+        DeprecationWarning)
+    from dulwich.contrib.paramiko_vendor import ParamikoSSHVendor
+    return ParamikoSSHVendor(**kwargs)
+
+
+# Can be overridden by users
+get_ssh_vendor = SubprocessSSHVendor
+
+
+class SSHGitClient(TraditionalGitClient):
+
+    def __init__(self, host, port=None, username=None, vendor=None, **kwargs):
+        self.host = host
+        self.port = port
+        self.username = username
+        super(SSHGitClient, self).__init__(**kwargs)
+        self.alternative_paths = {}
+        if vendor is not None:
+            self.ssh_vendor = vendor
+        else:
+            self.ssh_vendor = get_ssh_vendor()
+
+    def get_url(self, path):
+        netloc = self.host
+        if self.port is not None:
+            netloc += ":%d" % self.port
+
+        if self.username is not None:
+            netloc = urlquote(self.username, '@/:') + "@" + netloc
+
+        return urlparse.urlunsplit(('ssh', netloc, path, '', ''))
+
+    @classmethod
+    def from_parsedurl(cls, parsedurl, **kwargs):
+        return cls(host=parsedurl.hostname, port=parsedurl.port,
+                   username=parsedurl.username, **kwargs)
+
+    def _get_cmd_path(self, cmd):
+        cmd = self.alternative_paths.get(cmd, b'git-' + cmd)
+        assert isinstance(cmd, bytes)
+        return cmd
+
+    def _connect(self, cmd, path):
+        if not isinstance(cmd, bytes):
+            raise TypeError(cmd)
+        if isinstance(path, bytes):
+            path = path.decode(self._remote_path_encoding)
+        if path.startswith("/~"):
+            path = path[1:]
+        argv = (self._get_cmd_path(cmd).decode(self._remote_path_encoding) +
+                " '" + path + "'")
+        con = self.ssh_vendor.run_command(
+            self.host, argv, port=self.port, username=self.username)
+        return (Protocol(con.read, con.write, con.close,
+                         report_activity=self._report_activity),
+                con.can_read)
+
+
+def default_user_agent_string():
+    return "dulwich/%s" % ".".join([str(x) for x in dulwich.__version__])
+
+
+def default_urllib2_opener(config):
+    if config is not None:
+        proxy_server = config.get("http", "proxy")
+    else:
+        proxy_server = None
+    handlers = []
+    if proxy_server is not None:
+        handlers.append(urllib2.ProxyHandler({"http": proxy_server}))
+    opener = urllib2.build_opener(*handlers)
+    if config is not None:
+        user_agent = config.get("http", "useragent")
+    else:
+        user_agent = None
+    if user_agent is None:
+        user_agent = default_user_agent_string()
+    opener.addheaders = [('User-agent', user_agent)]
+    return opener
+
+
+class HttpGitClient(GitClient):
+
+    def __init__(self, base_url, dumb=None, opener=None, config=None,
+                 username=None, password=None, **kwargs):
+        self._base_url = base_url.rstrip("/") + "/"
+        self._username = username
+        self._password = password
+        self.dumb = dumb
+        if opener is None:
+            self.opener = default_urllib2_opener(config)
+        else:
+            self.opener = opener
+        if username is not None:
+            pass_man = urllib2.HTTPPasswordMgrWithDefaultRealm()
+            pass_man.add_password(None, base_url, username, password)
+            self.opener.add_handler(urllib2.HTTPBasicAuthHandler(pass_man))
+        GitClient.__init__(self, **kwargs)
+
+    def get_url(self, path):
+        return self._get_url(path).rstrip("/")
+
+    @classmethod
+    def from_parsedurl(cls, parsedurl, **kwargs):
+        auth, host = urllib2.splituser(parsedurl.netloc)
+        password = parsedurl.password
+        if password is not None:
+            password = urlunquote(password)
+        username = parsedurl.username
+        if username is not None:
+            username = urlunquote(username)
+        # TODO(jelmer): This also strips the username
+        parsedurl = parsedurl._replace(netloc=host)
+        return cls(urlparse.urlunparse(parsedurl),
+                   password=password, username=username, **kwargs)
+
+    def __repr__(self):
+        return "%s(%r, dumb=%r)" % (
+            type(self).__name__, self._base_url, self.dumb)
+
+    def _get_url(self, path):
+        if not isinstance(path, str):
+            # TODO(jelmer): this is unrelated to the local filesystem;
+            # This is not necessarily the right encoding to decode the path
+            # with.
+            path = path.decode(sys.getfilesystemencoding())
+        return urlparse.urljoin(self._base_url, path).rstrip("/") + "/"
+
+    def _http_request(self, url, headers={}, data=None):
+        req = urllib2.Request(url, headers=headers, data=data)
+        try:
+            resp = self.opener.open(req)
+        except urllib2.HTTPError as e:
+            if e.code == 404:
+                raise NotGitRepository()
+            if e.code != 200:
+                raise GitProtocolError("unexpected http response %d" % e.code)
+        return resp
+
+    def _discover_references(self, service, url):
+        assert url[-1] == "/"
+        url = urlparse.urljoin(url, "info/refs")
+        headers = {}
+        if self.dumb is not False:
+            url += "?service=%s" % service.decode('ascii')
+            headers["Content-Type"] = "application/x-%s-request" % (
+                service.decode('ascii'))
+        resp = self._http_request(url, headers)
+        try:
+            content_type = resp.info().gettype()
+        except AttributeError:
+            content_type = resp.info().get_content_type()
+        try:
+            self.dumb = (not content_type.startswith("application/x-git-"))
+            if not self.dumb:
+                proto = Protocol(resp.read, None)
+                # The first line should mention the service
+                try:
+                    [pkt] = list(proto.read_pkt_seq())
+                except ValueError:
+                    raise GitProtocolError(
+                        "unexpected number of packets received")
+                if pkt.rstrip(b'\n') != (b'# service=' + service):
+                    raise GitProtocolError(
+                        "unexpected first line %r from smart server" % pkt)
+                return read_pkt_refs(proto)
+            else:
+                return read_info_refs(resp), set()
+        finally:
+            resp.close()
+
+    def _smart_request(self, service, url, data):
+        assert url[-1] == "/"
+        url = urlparse.urljoin(url, service)
+        headers = {
+            "Content-Type": "application/x-%s-request" % service
+        }
+        resp = self._http_request(url, headers, data)
+        try:
+            content_type = resp.info().gettype()
+        except AttributeError:
+            content_type = resp.info().get_content_type()
+        if content_type != (
+                "application/x-%s-result" % service):
+            raise GitProtocolError("Invalid content-type from server: %s"
+                                   % content_type)
+        return resp
+
+    def send_pack(self, path, update_refs, generate_pack_contents,
+                  progress=None, write_pack=write_pack_objects):
+        """Upload a pack to a remote repository.
+
+        :param path: Repository path (as bytestring)
+        :param update_refs: Function to determine changes to remote refs.
+            Receive dict with existing remote refs, returns dict with
+            changed refs (name -> sha, where sha=ZERO_SHA for deletions)
+        :param generate_pack_contents: Function that can return a sequence of
+            the shas of the objects to upload.
+        :param progress: Optional progress function
+        :param write_pack: Function called with (file, iterable of objects) to
+            write the objects returned by generate_pack_contents to the server.
+
+        :raises SendPackError: if server rejects the pack data
+        :raises UpdateRefsError: if the server supports report-status
+                                 and rejects ref updates
+        :return: new_refs dictionary containing the changes that were made
+            {refname: new_ref}, including deleted refs.
+        """
+        url = self._get_url(path)
+        old_refs, server_capabilities = self._discover_references(
+            b"git-receive-pack", url)
+        negotiated_capabilities = self._send_capabilities & server_capabilities
+
+        if CAPABILITY_REPORT_STATUS in negotiated_capabilities:
+            self._report_status_parser = ReportStatusParser()
+
+        new_refs = update_refs(dict(old_refs))
+        if new_refs is None:
+            # Determine wants function is aborting the push.
+            return old_refs
+        if self.dumb:
+            raise NotImplementedError(self.fetch_pack)
+        req_data = BytesIO()
+        req_proto = Protocol(None, req_data.write)
+        (have, want) = self._handle_receive_pack_head(
+            req_proto, negotiated_capabilities, old_refs, new_refs)
+        if not want and set(new_refs.items()).issubset(set(old_refs.items())):
+            return new_refs
+        objects = generate_pack_contents(have, want)
+        if len(objects) > 0:
+            write_pack(req_proto.write_file(), objects)
+        resp = self._smart_request("git-receive-pack", url,
+                                   data=req_data.getvalue())
+        try:
+            resp_proto = Protocol(resp.read, None)
+            self._handle_receive_pack_tail(
+                resp_proto, negotiated_capabilities, progress)
+            return new_refs
+        finally:
+            resp.close()
+
+    def fetch_pack(self, path, determine_wants, graph_walker, pack_data,
+                   progress=None):
+        """Retrieve a pack from a git smart server.
+
+        :param determine_wants: Callback that returns list of commits to fetch
+        :param graph_walker: Object with next() and ack().
+        :param pack_data: Callback called for each bit of data in the pack
+        :param progress: Callback for progress reports (strings)
+        :return: Dictionary with all remote refs (not just those fetched)
+        """
+        url = self._get_url(path)
+        refs, server_capabilities = self._discover_references(
+            b"git-upload-pack", url)
+        negotiated_capabilities = (
+            self._fetch_capabilities & server_capabilities)
+        wants = determine_wants(refs)
+        if wants is not None:
+            wants = [cid for cid in wants if cid != ZERO_SHA]
+        if not wants:
+            return refs
+        if self.dumb:
+            raise NotImplementedError(self.send_pack)
+        req_data = BytesIO()
+        req_proto = Protocol(None, req_data.write)
+        self._handle_upload_pack_head(
+                req_proto, negotiated_capabilities, graph_walker, wants,
+                lambda: False)
+        resp = self._smart_request(
+            "git-upload-pack", url, data=req_data.getvalue())
+        try:
+            resp_proto = Protocol(resp.read, None)
+            self._handle_upload_pack_tail(
+                resp_proto, negotiated_capabilities, graph_walker, pack_data,
+                progress)
+            return refs
+        finally:
+            resp.close()
+
+    def get_refs(self, path):
+        """Retrieve the current refs from a git smart server."""
+        url = self._get_url(path)
+        refs, _ = self._discover_references(
+            b"git-upload-pack", url)
+        return refs
+
+
+def get_transport_and_path_from_url(url, config=None, **kwargs):
+    """Obtain a git client from a URL.
+
+    :param url: URL to open (a unicode string)
+    :param config: Optional config object
+    :param thin_packs: Whether or not thin packs should be retrieved
+    :param report_activity: Optional callback for reporting transport
+        activity.
+    :return: Tuple with client instance and relative path.
+    """
+    parsed = urlparse.urlparse(url)
+    if parsed.scheme == 'git':
+        return (TCPGitClient.from_parsedurl(parsed, **kwargs),
+                parsed.path)
+    elif parsed.scheme in ('git+ssh', 'ssh'):
+        return SSHGitClient.from_parsedurl(parsed, **kwargs), parsed.path
+    elif parsed.scheme in ('http', 'https'):
+        return HttpGitClient.from_parsedurl(
+            parsed, config=config, **kwargs), parsed.path
+    elif parsed.scheme == 'file':
+        return default_local_git_client_cls.from_parsedurl(
+            parsed, **kwargs), parsed.path
+
+    raise ValueError("unknown scheme '%s'" % parsed.scheme)
+
+
+def get_transport_and_path(location, **kwargs):
+    """Obtain a git client from a URL.
+
+    :param location: URL or path (a string)
+    :param config: Optional config object
+    :param thin_packs: Whether or not thin packs should be retrieved
+    :param report_activity: Optional callback for reporting transport
+        activity.
+    :return: Tuple with client instance and relative path.
+    """
+    # First, try to parse it as a URL
+    try:
+        return get_transport_and_path_from_url(location, **kwargs)
+    except ValueError:
+        pass
+
+    if (sys.platform == 'win32' and
+            location[0].isalpha() and location[1:3] == ':\\'):
+        # Windows local path
+        return default_local_git_client_cls(**kwargs), location
+
+    if ':' in location and '@' not in location:
+        # SSH with no user@, zero or one leading slash.
+        (hostname, path) = location.split(':', 1)
+        return SSHGitClient(hostname, **kwargs), path
+    elif ':' in location:
+        # SSH with user@host:foo.
+        user_host, path = location.split(':', 1)
+        if '@' in user_host:
+            user, host = user_host.rsplit('@', 1)
+        else:
+            user = None
+            host = user_host
+        return SSHGitClient(host, username=user, **kwargs), path
+
+    # Otherwise, assume it's a local path.
+    return default_local_git_client_cls(**kwargs), location

+ 445 - 0
dulwich/config.py

@@ -0,0 +1,445 @@
+# config.py - Reading and writing Git config files
+# Copyright (C) 2011-2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Reading and writing Git configuration files.
+
+TODO:
+ * preserve formatting when updating configuration files
+ * treat subsection names as case-insensitive for [branch.foo] style
+   subsections
+"""
+
+import errno
+import os
+
+from collections import (
+    OrderedDict,
+    MutableMapping,
+    )
+
+
+from dulwich.file import GitFile
+
+
+class Config(object):
+    """A Git configuration."""
+
+    def get(self, section, name):
+        """Retrieve the contents of a configuration setting.
+
+        :param section: Tuple with section name and optional subsection namee
+        :param subsection: Subsection name
+        :return: Contents of the setting
+        :raise KeyError: if the value is not set
+        """
+        raise NotImplementedError(self.get)
+
+    def get_boolean(self, section, name, default=None):
+        """Retrieve a configuration setting as boolean.
+
+        :param section: Tuple with section name and optional subsection namee
+        :param name: Name of the setting, including section and possible
+            subsection.
+        :return: Contents of the setting
+        :raise KeyError: if the value is not set
+        """
+        try:
+            value = self.get(section, name)
+        except KeyError:
+            return default
+        if value.lower() == b"true":
+            return True
+        elif value.lower() == b"false":
+            return False
+        raise ValueError("not a valid boolean string: %r" % value)
+
+    def set(self, section, name, value):
+        """Set a configuration value.
+
+        :param section: Tuple with section name and optional subsection namee
+        :param name: Name of the configuration value, including section
+            and optional subsection
+        :param: Value of the setting
+        """
+        raise NotImplementedError(self.set)
+
+    def iteritems(self, section):
+        """Iterate over the configuration pairs for a specific section.
+
+        :param section: Tuple with section name and optional subsection namee
+        :return: Iterator over (name, value) pairs
+        """
+        raise NotImplementedError(self.iteritems)
+
+    def itersections(self):
+        """Iterate over the sections.
+
+        :return: Iterator over section tuples
+        """
+        raise NotImplementedError(self.itersections)
+
+    def has_section(self, name):
+        """Check if a specified section exists.
+
+        :param name: Name of section to check for
+        :return: boolean indicating whether the section exists
+        """
+        return (name in self.itersections())
+
+
+class ConfigDict(Config, MutableMapping):
+    """Git configuration stored in a dictionary."""
+
+    def __init__(self, values=None):
+        """Create a new ConfigDict."""
+        if values is None:
+            values = OrderedDict()
+        self._values = values
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self._values)
+
+    def __eq__(self, other):
+        return (
+            isinstance(other, self.__class__) and
+            other._values == self._values)
+
+    def __getitem__(self, key):
+        return self._values.__getitem__(key)
+
+    def __setitem__(self, key, value):
+        return self._values.__setitem__(key, value)
+
+    def __delitem__(self, key):
+        return self._values.__delitem__(key)
+
+    def __iter__(self):
+        return self._values.__iter__()
+
+    def __len__(self):
+        return self._values.__len__()
+
+    @classmethod
+    def _parse_setting(cls, name):
+        parts = name.split(".")
+        if len(parts) == 3:
+            return (parts[0], parts[1], parts[2])
+        else:
+            return (parts[0], None, parts[1])
+
+    def get(self, section, name):
+        if not isinstance(section, tuple):
+            section = (section, )
+        if len(section) > 1:
+            try:
+                return self._values[section][name]
+            except KeyError:
+                pass
+        return self._values[(section[0],)][name]
+
+    def set(self, section, name, value):
+        if not isinstance(section, tuple):
+            section = (section, )
+        if not isinstance(name, bytes):
+            raise TypeError(name)
+        if type(value) not in (bool, bytes):
+            raise TypeError(value)
+        self._values.setdefault(section, OrderedDict())[name] = value
+
+    def iteritems(self, section):
+        return self._values.get(section, OrderedDict()).items()
+
+    def itersections(self):
+        return self._values.keys()
+
+
+def _format_string(value):
+    if (value.startswith(b" ") or
+            value.startswith(b"\t") or
+            value.endswith(b" ") or
+            b'#' in value or
+            value.endswith(b"\t")):
+        return b'"' + _escape_value(value) + b'"'
+    else:
+        return _escape_value(value)
+
+
+_ESCAPE_TABLE = {
+    ord(b"\\"): ord(b"\\"),
+    ord(b"\""): ord(b"\""),
+    ord(b"n"): ord(b"\n"),
+    ord(b"t"): ord(b"\t"),
+    ord(b"b"): ord(b"\b"),
+    }
+_COMMENT_CHARS = [ord(b"#"), ord(b";")]
+_WHITESPACE_CHARS = [ord(b"\t"), ord(b" ")]
+
+
+def _parse_string(value):
+    value = bytearray(value.strip())
+    ret = bytearray()
+    whitespace = bytearray()
+    in_quotes = False
+    i = 0
+    while i < len(value):
+        c = value[i]
+        if c == ord(b"\\"):
+            i += 1
+            try:
+                v = _ESCAPE_TABLE[value[i]]
+            except IndexError:
+                raise ValueError(
+                    "escape character in %r at %d before end of string" %
+                    (value, i))
+            except KeyError:
+                raise ValueError(
+                    "escape character followed by unknown character "
+                    "%s at %d in %r" % (value[i], i, value))
+            if whitespace:
+                ret.extend(whitespace)
+                whitespace = bytearray()
+            ret.append(v)
+        elif c == ord(b"\""):
+            in_quotes = (not in_quotes)
+        elif c in _COMMENT_CHARS and not in_quotes:
+            # the rest of the line is a comment
+            break
+        elif c in _WHITESPACE_CHARS:
+            whitespace.append(c)
+        else:
+            if whitespace:
+                ret.extend(whitespace)
+                whitespace = bytearray()
+            ret.append(c)
+        i += 1
+
+    if in_quotes:
+        raise ValueError("missing end quote")
+
+    return bytes(ret)
+
+
+def _escape_value(value):
+    """Escape a value."""
+    value = value.replace(b"\\", b"\\\\")
+    value = value.replace(b"\n", b"\\n")
+    value = value.replace(b"\t", b"\\t")
+    value = value.replace(b"\"", b"\\\"")
+    return value
+
+
+def _check_variable_name(name):
+    for i in range(len(name)):
+        c = name[i:i+1]
+        if not c.isalnum() and c != b'-':
+            return False
+    return True
+
+
+def _check_section_name(name):
+    for i in range(len(name)):
+        c = name[i:i+1]
+        if not c.isalnum() and c not in (b'-', b'.'):
+            return False
+    return True
+
+
+def _strip_comments(line):
+    line = line.split(b"#")[0]
+    line = line.split(b";")[0]
+    return line
+
+
+class ConfigFile(ConfigDict):
+    """A Git configuration file, like .git/config or ~/.gitconfig.
+    """
+
+    @classmethod
+    def from_file(cls, f):
+        """Read configuration from a file-like object."""
+        ret = cls()
+        section = None
+        setting = None
+        for lineno, line in enumerate(f.readlines()):
+            line = line.lstrip()
+            if setting is None:
+                # Parse section header ("[bla]")
+                if len(line) > 0 and line[:1] == b"[":
+                    line = _strip_comments(line).rstrip()
+                    last = line.index(b"]")
+                    if last == -1:
+                        raise ValueError("expected trailing ]")
+                    pts = line[1:last].split(b" ", 1)
+                    line = line[last+1:]
+                    pts[0] = pts[0].lower()
+                    if len(pts) == 2:
+                        if pts[1][:1] != b"\"" or pts[1][-1:] != b"\"":
+                            raise ValueError(
+                                "Invalid subsection %r" % pts[1])
+                        else:
+                            pts[1] = pts[1][1:-1]
+                        if not _check_section_name(pts[0]):
+                            raise ValueError("invalid section name %r" %
+                                             pts[0])
+                        section = (pts[0], pts[1])
+                    else:
+                        if not _check_section_name(pts[0]):
+                            raise ValueError(
+                                "invalid section name %r" % pts[0])
+                        pts = pts[0].split(b".", 1)
+                        if len(pts) == 2:
+                            section = (pts[0], pts[1])
+                        else:
+                            section = (pts[0], )
+                    ret._values[section] = OrderedDict()
+                if _strip_comments(line).strip() == b"":
+                    continue
+                if section is None:
+                    raise ValueError("setting %r without section" % line)
+                try:
+                    setting, value = line.split(b"=", 1)
+                except ValueError:
+                    setting = line
+                    value = b"true"
+                setting = setting.strip().lower()
+                if not _check_variable_name(setting):
+                    raise ValueError("invalid variable name %s" % setting)
+                if value.endswith(b"\\\n"):
+                    continuation = value[:-2]
+                else:
+                    continuation = None
+                    value = _parse_string(value)
+                    ret._values[section][setting] = value
+                    setting = None
+            else:  # continuation line
+                if line.endswith(b"\\\n"):
+                    continuation += line[:-2]
+                else:
+                    continuation += line
+                    value = _parse_string(continuation)
+                    ret._values[section][setting] = value
+                    continuation = None
+                    setting = None
+        return ret
+
+    @classmethod
+    def from_path(cls, path):
+        """Read configuration from a file on disk."""
+        with GitFile(path, 'rb') as f:
+            ret = cls.from_file(f)
+            ret.path = path
+            return ret
+
+    def write_to_path(self, path=None):
+        """Write configuration to a file on disk."""
+        if path is None:
+            path = self.path
+        with GitFile(path, 'wb') as f:
+            self.write_to_file(f)
+
+    def write_to_file(self, f):
+        """Write configuration to a file-like object."""
+        for section, values in self._values.items():
+            try:
+                section_name, subsection_name = section
+            except ValueError:
+                (section_name, ) = section
+                subsection_name = None
+            if subsection_name is None:
+                f.write(b"[" + section_name + b"]\n")
+            else:
+                f.write(b"[" + section_name +
+                        b" \"" + subsection_name + b"\"]\n")
+            for key, value in values.items():
+                if value is True:
+                    value = b"true"
+                elif value is False:
+                    value = b"false"
+                else:
+                    value = _format_string(value)
+                f.write(b"\t" + key + b" = " + value + b"\n")
+
+
+class StackedConfig(Config):
+    """Configuration which reads from multiple config files.."""
+
+    def __init__(self, backends, writable=None):
+        self.backends = backends
+        self.writable = writable
+
+    def __repr__(self):
+        return "<%s for %r>" % (self.__class__.__name__, self.backends)
+
+    @classmethod
+    def default_backends(cls):
+        """Retrieve the default configuration.
+
+        See git-config(1) for details on the files searched.
+        """
+        paths = []
+        paths.append(os.path.expanduser("~/.gitconfig"))
+
+        xdg_config_home = os.environ.get(
+            "XDG_CONFIG_HOME", os.path.expanduser("~/.config/"),
+        )
+        paths.append(os.path.join(xdg_config_home, "git", "config"))
+
+        if "GIT_CONFIG_NOSYSTEM" not in os.environ:
+            paths.append("/etc/gitconfig")
+
+        backends = []
+        for path in paths:
+            try:
+                cf = ConfigFile.from_path(path)
+            except (IOError, OSError) as e:
+                if e.errno != errno.ENOENT:
+                    raise
+                else:
+                    continue
+            backends.append(cf)
+        return backends
+
+    def get(self, section, name):
+        for backend in self.backends:
+            try:
+                return backend.get(section, name)
+            except KeyError:
+                pass
+        raise KeyError(name)
+
+    def set(self, section, name, value):
+        if self.writable is None:
+            raise NotImplementedError(self.set)
+        return self.writable.set(section, name, value)
+
+
+def parse_submodules(config):
+    """Parse a gitmodules GitConfig file, returning submodules.
+
+   :param config: A `ConfigFile`
+   :return: list of tuples (submodule path, url, name),
+       where name is quoted part of the section's name.
+    """
+    for section in config.keys():
+        section_kind, section_name = section
+        if section_kind == b'submodule':
+            sm_path = config.get(section, b'path')
+            sm_url = config.get(section, b'url')
+            yield (sm_path, sm_url, section_name)

+ 30 - 0
dulwich/contrib/__init__.py

@@ -0,0 +1,30 @@
+# __init__.py -- Contrib module for Dulwich
+# Copyright (C) 2014 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+
+def test_suite():
+    import unittest
+    names = [
+        'release_robot',
+        'swift',
+        ]
+    module_names = ['dulwich.contrib.test_' + name for name in names]
+    loader = unittest.TestLoader()
+    return loader.loadTestsFromNames(module_names)

+ 138 - 0
dulwich/contrib/paramiko_vendor.py

@@ -0,0 +1,138 @@
+# paramiko_vendor.py -- paramiko implementation of the SSHVendor interface
+# Copyright (C) 2013 Aaron O'Mullan <aaron.omullan@friendco.de>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Paramiko SSH support for Dulwich.
+
+To use this implementation as the SSH implementation in Dulwich, override
+the dulwich.client.get_ssh_vendor attribute:
+
+  >>> from dulwich import client as _mod_client
+  >>> from dulwich.contrib.paramiko_vendor import ParamikoSSHVendor
+  >>> _mod_client.get_ssh_vendor = ParamikoSSHVendor
+
+This implementation is experimental and does not have any tests.
+"""
+
+import paramiko
+import paramiko.client
+import threading
+
+
+class _ParamikoWrapper(object):
+    STDERR_READ_N = 2048  # 2k
+
+    def __init__(self, client, channel, progress_stderr=None):
+        self.client = client
+        self.channel = channel
+        self.progress_stderr = progress_stderr
+        self.should_monitor = bool(progress_stderr) or True
+        self.monitor_thread = None
+        self.stderr = b''
+
+        # Channel must block
+        self.channel.setblocking(True)
+
+        # Start
+        if self.should_monitor:
+            self.monitor_thread = threading.Thread(
+                target=self.monitor_stderr)
+            self.monitor_thread.start()
+
+    def monitor_stderr(self):
+        while self.should_monitor:
+            # Block and read
+            data = self.read_stderr(self.STDERR_READ_N)
+
+            # Socket closed
+            if not data:
+                self.should_monitor = False
+                break
+
+            # Emit data
+            if self.progress_stderr:
+                self.progress_stderr(data)
+
+            # Append to buffer
+            self.stderr += data
+
+    def stop_monitoring(self):
+        # Stop StdErr thread
+        if self.should_monitor:
+            self.should_monitor = False
+            self.monitor_thread.join()
+
+            # Get left over data
+            data = self.channel.in_stderr_buffer.empty()
+            self.stderr += data
+
+    def can_read(self):
+        return self.channel.recv_ready()
+
+    def write(self, data):
+        return self.channel.sendall(data)
+
+    def read_stderr(self, n):
+        return self.channel.recv_stderr(n)
+
+    def read(self, n=None):
+        data = self.channel.recv(n)
+        data_len = len(data)
+
+        # Closed socket
+        if not data:
+            return
+
+        # Read more if needed
+        if n and data_len < n:
+            diff_len = n - data_len
+            return data + self.read(diff_len)
+        return data
+
+    def close(self):
+        self.channel.close()
+        self.stop_monitoring()
+
+
+class ParamikoSSHVendor(object):
+
+    def __init__(self):
+        self.ssh_kwargs = {}
+
+    def run_command(self, host, command, username=None, port=None,
+                    progress_stderr=None):
+        # Paramiko needs an explicit port. None is not valid
+        if port is None:
+            port = 22
+
+        client = paramiko.SSHClient()
+
+        policy = paramiko.client.MissingHostKeyPolicy()
+        client.set_missing_host_key_policy(policy)
+        client.connect(host, username=username, port=port,
+                       **self.ssh_kwargs)
+
+        # Open SSH session
+        channel = client.get_transport().open_session()
+
+        # Run commands
+        channel.exec_command(command)
+
+        return _ParamikoWrapper(
+            client, channel, progress_stderr=progress_stderr)

+ 143 - 0
dulwich/contrib/release_robot.py

@@ -0,0 +1,143 @@
+# release_robot.py
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Determine last version string from tags.
+
+Alternate to `Versioneer <https://pypi.python.org/pypi/versioneer/>`_ using
+`Dulwich <https://pypi.python.org/pypi/dulwich>`_ to sort tags by time from
+newest to oldest.
+
+Copy the following into the package ``__init__.py`` module::
+
+    from dulwich.contrib.release_robot import get_current_version
+    __version__ = get_current_version()
+
+This example assumes the tags have a leading "v" like "v0.3", and that the
+``.git`` folder is in a project folder that containts the package folder.
+
+EG::
+
+    * project
+    |
+    * .git
+    |
+    +-* package
+      |
+      * __init__.py  <-- put __version__ here
+
+
+"""
+
+import datetime
+import re
+import sys
+import time
+
+from dulwich.repo import Repo
+
+# CONSTANTS
+PROJDIR = '.'
+PATTERN = r'[ a-zA-Z_\-]*([\d\.]+[\-\w\.]*)'
+
+
+def get_recent_tags(projdir=PROJDIR):
+    """Get list of tags in order from newest to oldest and their datetimes.
+
+    :param projdir: path to ``.git``
+    :returns: list of tags sorted by commit time from newest to oldest
+
+    Each tag in the list contains the tag name, commit time, commit id, author
+    and any tag meta. If a tag isn't annotated, then its tag meta is ``None``.
+    Otherwise the tag meta is a tuple containing the tag time, tag id and tag
+    name. Time is in UTC.
+    """
+    with Repo(projdir) as project:  # dulwich repository object
+        refs = project.get_refs()  # dictionary of refs and their SHA-1 values
+        tags = {}  # empty dictionary to hold tags, commits and datetimes
+        # iterate over refs in repository
+        for key, value in refs.items():
+            key = key.decode('utf-8')  # compatible with Python-3
+            obj = project.get_object(value)  # dulwich object from SHA-1
+            # don't just check if object is "tag" b/c it could be a "commit"
+            # instead check if "tags" is in the ref-name
+            if u'tags' not in key:
+                # skip ref if not a tag
+                continue
+            # strip the leading text from refs to get "tag name"
+            _, tag = key.rsplit(u'/', 1)
+            # check if tag object is "commit" or "tag" pointing to a "commit"
+            try:
+                commit = obj.object  # a tuple (commit class, commit id)
+            except AttributeError:
+                commit = obj
+                tag_meta = None
+            else:
+                tag_meta = (
+                    datetime.datetime(*time.gmtime(obj.tag_time)[:6]),
+                    obj.id.decode('utf-8'),
+                    obj.name.decode('utf-8')
+                )  # compatible with Python-3
+                commit = project.get_object(commit[1])  # commit object
+            # get tag commit datetime, but dulwich returns seconds since
+            # beginning of epoch, so use Python time module to convert it to
+            # timetuple then convert to datetime
+            tags[tag] = [
+                datetime.datetime(*time.gmtime(commit.commit_time)[:6]),
+                commit.id.decode('utf-8'),
+                commit.author.decode('utf-8'),
+                tag_meta
+            ]  # compatible with Python-3
+
+    # return list of tags sorted by their datetimes from newest to oldest
+    return sorted(tags.items(), key=lambda tag: tag[1][0], reverse=True)
+
+
+def get_current_version(projdir=PROJDIR, pattern=PATTERN, logger=None):
+    """Return the most recent tag, using an options regular expression pattern.
+
+    The default pattern will strip any characters preceding the first semantic
+    version. *EG*: "Release-0.2.1-rc.1" will be come "0.2.1-rc.1". If no match
+    is found, then the most recent tag is return without modification.
+
+    :param projdir: path to ``.git``
+    :param pattern: regular expression pattern with group that matches version
+    :param logger: a Python logging instance to capture exception
+    :returns: tag matching first group in regular expression pattern
+    """
+    tags = get_recent_tags(projdir)
+    try:
+        tag = tags[0][0]
+    except IndexError:
+        return
+    matches = re.match(pattern, tag)
+    try:
+        current_version = matches.group(1)
+    except (IndexError, AttributeError) as err:
+        if logger:
+            logger.exception(err)
+        return tag
+    return current_version
+
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        _PROJDIR = sys.argv[1]
+    else:
+        _PROJDIR = PROJDIR
+    print(get_current_version(projdir=_PROJDIR))

+ 1052 - 0
dulwich/contrib/swift.py

@@ -0,0 +1,1052 @@
+# swift.py -- Repo implementation atop OpenStack SWIFT
+# Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+#
+# Author: Fabien Boucher <fabien.boucher@enovance.com>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Repo implementation atop OpenStack SWIFT."""
+
+# TODO: Refactor to share more code with dulwich/repo.py.
+# TODO(fbo): Second attempt to _send() must be notified via real log
+# TODO(fbo): More logs for operations
+
+import os
+import stat
+import zlib
+import tempfile
+import posixpath
+
+try:
+    import urlparse
+except ImportError:
+    import urllib.parse as urlparse
+
+from io import BytesIO
+try:
+    from ConfigParser import ConfigParser
+except ImportError:
+    from configparser import ConfigParser
+from geventhttpclient import HTTPClient
+
+from dulwich.greenthreads import (
+    GreenThreadsMissingObjectFinder,
+    GreenThreadsObjectStoreIterator,
+    )
+
+from dulwich.lru_cache import LRUSizeCache
+from dulwich.objects import (
+    Blob,
+    Commit,
+    Tree,
+    Tag,
+    S_ISGITLINK,
+    )
+from dulwich.object_store import (
+    PackBasedObjectStore,
+    PACKDIR,
+    INFODIR,
+    )
+from dulwich.pack import (
+    PackData,
+    Pack,
+    PackIndexer,
+    PackStreamCopier,
+    write_pack_header,
+    compute_file_sha,
+    iter_sha1,
+    write_pack_index_v2,
+    load_pack_index_file,
+    read_pack_header,
+    _compute_object_size,
+    unpack_object,
+    write_pack_object,
+    )
+from dulwich.protocol import TCP_GIT_PORT
+from dulwich.refs import (
+    InfoRefsContainer,
+    read_info_refs,
+    write_info_refs,
+    )
+from dulwich.repo import (
+    BaseRepo,
+    OBJECTDIR,
+    )
+from dulwich.server import (
+    Backend,
+    TCPGitServer,
+    )
+
+try:
+    from simplejson import loads as json_loads
+    from simplejson import dumps as json_dumps
+except ImportError:
+    from json import loads as json_loads
+    from json import dumps as json_dumps
+
+import sys
+
+
+"""
+# Configuration file sample
+[swift]
+# Authentication URL (Keystone or Swift)
+auth_url = http://127.0.0.1:5000/v2.0
+# Authentication version to use
+auth_ver = 2
+# The tenant and username separated by a semicolon
+username = admin;admin
+# The user password
+password = pass
+# The Object storage region to use (auth v2) (Default RegionOne)
+region_name = RegionOne
+# The Object storage endpoint URL to use (auth v2) (Default internalURL)
+endpoint_type = internalURL
+# Concurrency to use for parallel tasks (Default 10)
+concurrency = 10
+# Size of the HTTP pool (Default 10)
+http_pool_length = 10
+# Timeout delay for HTTP connections (Default 20)
+http_timeout = 20
+# Chunk size to read from pack (Bytes) (Default 12228)
+chunk_length = 12228
+# Cache size (MBytes) (Default 20)
+cache_length = 20
+"""
+
+
+class PackInfoObjectStoreIterator(GreenThreadsObjectStoreIterator):
+
+    def __len__(self):
+        while len(self.finder.objects_to_send):
+            for _ in range(0, len(self.finder.objects_to_send)):
+                sha = self.finder.next()
+                self._shas.append(sha)
+        return len(self._shas)
+
+
+class PackInfoMissingObjectFinder(GreenThreadsMissingObjectFinder):
+
+    def next(self):
+        while True:
+            if not self.objects_to_send:
+                return None
+            (sha, name, leaf) = self.objects_to_send.pop()
+            if sha not in self.sha_done:
+                break
+        if not leaf:
+            info = self.object_store.pack_info_get(sha)
+            if info[0] == Commit.type_num:
+                self.add_todo([(info[2], "", False)])
+            elif info[0] == Tree.type_num:
+                self.add_todo([tuple(i) for i in info[1]])
+            elif info[0] == Tag.type_num:
+                self.add_todo([(info[1], None, False)])
+            if sha in self._tagged:
+                self.add_todo([(self._tagged[sha], None, True)])
+        self.sha_done.add(sha)
+        self.progress("counting objects: %d\r" % len(self.sha_done))
+        return (sha, name)
+
+
+def load_conf(path=None, file=None):
+    """Load configuration in global var CONF
+
+    :param path: The path to the configuration file
+    :param file: If provided read instead the file like object
+    """
+    conf = ConfigParser()
+    if file:
+        try:
+            conf.read_file(file, path)
+        except AttributeError:
+            # read_file only exists in Python3
+            conf.readfp(file)
+        return conf
+    confpath = None
+    if not path:
+        try:
+            confpath = os.environ['DULWICH_SWIFT_CFG']
+        except KeyError:
+            raise Exception("You need to specify a configuration file")
+    else:
+        confpath = path
+    if not os.path.isfile(confpath):
+        raise Exception("Unable to read configuration file %s" % confpath)
+    conf.read(confpath)
+    return conf
+
+
+def swift_load_pack_index(scon, filename):
+    """Read a pack index file from Swift
+
+    :param scon: a `SwiftConnector` instance
+    :param filename: Path to the index file objectise
+    :return: a `PackIndexer` instance
+    """
+    f = scon.get_object(filename)
+    try:
+        return load_pack_index_file(filename, f)
+    finally:
+        f.close()
+
+
+def pack_info_create(pack_data, pack_index):
+    pack = Pack.from_objects(pack_data, pack_index)
+    info = {}
+    for obj in pack.iterobjects():
+        # Commit
+        if obj.type_num == Commit.type_num:
+            info[obj.id] = (obj.type_num, obj.parents, obj.tree)
+        # Tree
+        elif obj.type_num == Tree.type_num:
+            shas = [(s, n, not stat.S_ISDIR(m)) for
+                    n, m, s in obj.items() if not S_ISGITLINK(m)]
+            info[obj.id] = (obj.type_num, shas)
+        # Blob
+        elif obj.type_num == Blob.type_num:
+            info[obj.id] = None
+        # Tag
+        elif obj.type_num == Tag.type_num:
+            info[obj.id] = (obj.type_num, obj.object[1])
+    return zlib.compress(json_dumps(info))
+
+
+def load_pack_info(filename, scon=None, file=None):
+    if not file:
+        f = scon.get_object(filename)
+    else:
+        f = file
+    if not f:
+        return None
+    try:
+        return json_loads(zlib.decompress(f.read()))
+    finally:
+        f.close()
+
+
+class SwiftException(Exception):
+    pass
+
+
+class SwiftConnector(object):
+    """A Connector to swift that manage authentication and errors catching
+    """
+
+    def __init__(self, root, conf):
+        """ Initialize a SwiftConnector
+
+        :param root: The swift container that will act as Git bare repository
+        :param conf: A ConfigParser Object
+        """
+        self.conf = conf
+        self.auth_ver = self.conf.get("swift", "auth_ver")
+        if self.auth_ver not in ["1", "2"]:
+            raise NotImplementedError(
+                "Wrong authentication version use either 1 or 2")
+        self.auth_url = self.conf.get("swift", "auth_url")
+        self.user = self.conf.get("swift", "username")
+        self.password = self.conf.get("swift", "password")
+        self.concurrency = self.conf.getint('swift', 'concurrency') or 10
+        self.http_timeout = self.conf.getint('swift', 'http_timeout') or 20
+        self.http_pool_length = \
+            self.conf.getint('swift', 'http_pool_length') or 10
+        self.region_name = self.conf.get("swift", "region_name") or "RegionOne"
+        self.endpoint_type = \
+            self.conf.get("swift", "endpoint_type") or "internalURL"
+        self.cache_length = self.conf.getint("swift", "cache_length") or 20
+        self.chunk_length = self.conf.getint("swift", "chunk_length") or 12228
+        self.root = root
+        block_size = 1024 * 12  # 12KB
+        if self.auth_ver == "1":
+            self.storage_url, self.token = self.swift_auth_v1()
+        else:
+            self.storage_url, self.token = self.swift_auth_v2()
+
+        token_header = {'X-Auth-Token': str(self.token)}
+        self.httpclient = \
+            HTTPClient.from_url(str(self.storage_url),
+                                concurrency=self.http_pool_length,
+                                block_size=block_size,
+                                connection_timeout=self.http_timeout,
+                                network_timeout=self.http_timeout,
+                                headers=token_header)
+        self.base_path = str(posixpath.join(
+                urlparse.urlparse(self.storage_url).path, self.root))
+
+    def swift_auth_v1(self):
+        self.user = self.user.replace(";", ":")
+        auth_httpclient = HTTPClient.from_url(
+            self.auth_url,
+            connection_timeout=self.http_timeout,
+            network_timeout=self.http_timeout,
+            )
+        headers = {'X-Auth-User': self.user,
+                   'X-Auth-Key': self.password}
+        path = urlparse.urlparse(self.auth_url).path
+
+        ret = auth_httpclient.request('GET', path, headers=headers)
+
+        # Should do something with redirections (301 in my case)
+
+        if ret.status_code < 200 or ret.status_code >= 300:
+            raise SwiftException('AUTH v1.0 request failed on ' +
+                                 '%s with error code %s (%s)'
+                                 % (str(auth_httpclient.get_base_url()) +
+                                    path, ret.status_code,
+                                    str(ret.items())))
+        storage_url = ret['X-Storage-Url']
+        token = ret['X-Auth-Token']
+        return storage_url, token
+
+    def swift_auth_v2(self):
+        self.tenant, self.user = self.user.split(';')
+        auth_dict = {}
+        auth_dict['auth'] = {'passwordCredentials':
+                             {
+                                 'username': self.user,
+                                 'password': self.password,
+                             },
+                             'tenantName': self.tenant}
+        auth_json = json_dumps(auth_dict)
+        headers = {'Content-Type': 'application/json'}
+        auth_httpclient = HTTPClient.from_url(
+            self.auth_url,
+            connection_timeout=self.http_timeout,
+            network_timeout=self.http_timeout,
+            )
+        path = urlparse.urlparse(self.auth_url).path
+        if not path.endswith('tokens'):
+            path = posixpath.join(path, 'tokens')
+        ret = auth_httpclient.request('POST', path,
+                                      body=auth_json,
+                                      headers=headers)
+
+        if ret.status_code < 200 or ret.status_code >= 300:
+            raise SwiftException('AUTH v2.0 request failed on ' +
+                                 '%s with error code %s (%s)'
+                                 % (str(auth_httpclient.get_base_url()) +
+                                    path, ret.status_code,
+                                    str(ret.items())))
+        auth_ret_json = json_loads(ret.read())
+        token = auth_ret_json['access']['token']['id']
+        catalogs = auth_ret_json['access']['serviceCatalog']
+        object_store = [o_store for o_store in catalogs if
+                        o_store['type'] == 'object-store'][0]
+        endpoints = object_store['endpoints']
+        endpoint = [endp for endp in endpoints if
+                    endp["region"] == self.region_name][0]
+        return endpoint[self.endpoint_type], token
+
+    def test_root_exists(self):
+        """Check that Swift container exist
+
+        :return: True if exist or None it not
+        """
+        ret = self.httpclient.request('HEAD', self.base_path)
+        if ret.status_code == 404:
+            return None
+        if ret.status_code < 200 or ret.status_code > 300:
+            raise SwiftException('HEAD request failed with error code %s'
+                                 % ret.status_code)
+        return True
+
+    def create_root(self):
+        """Create the Swift container
+
+        :raise: `SwiftException` if unable to create
+        """
+        if not self.test_root_exists():
+            ret = self.httpclient.request('PUT', self.base_path)
+            if ret.status_code < 200 or ret.status_code > 300:
+                raise SwiftException('PUT request failed with error code %s'
+                                     % ret.status_code)
+
+    def get_container_objects(self):
+        """Retrieve objects list in a container
+
+        :return: A list of dict that describe objects
+                 or None if container does not exist
+        """
+        qs = '?format=json'
+        path = self.base_path + qs
+        ret = self.httpclient.request('GET', path)
+        if ret.status_code == 404:
+            return None
+        if ret.status_code < 200 or ret.status_code > 300:
+            raise SwiftException('GET request failed with error code %s'
+                                 % ret.status_code)
+        content = ret.read()
+        return json_loads(content)
+
+    def get_object_stat(self, name):
+        """Retrieve object stat
+
+        :param name: The object name
+        :return: A dict that describe the object
+                 or None if object does not exist
+        """
+        path = self.base_path + '/' + name
+        ret = self.httpclient.request('HEAD', path)
+        if ret.status_code == 404:
+            return None
+        if ret.status_code < 200 or ret.status_code > 300:
+            raise SwiftException('HEAD request failed with error code %s'
+                                 % ret.status_code)
+        resp_headers = {}
+        for header, value in ret.items():
+            resp_headers[header.lower()] = value
+        return resp_headers
+
+    def put_object(self, name, content):
+        """Put an object
+
+        :param name: The object name
+        :param content: A file object
+        :raise: `SwiftException` if unable to create
+        """
+        content.seek(0)
+        data = content.read()
+        path = self.base_path + '/' + name
+        headers = {'Content-Length': str(len(data))}
+
+        def _send():
+            ret = self.httpclient.request('PUT', path,
+                                          body=data,
+                                          headers=headers)
+            return ret
+
+        try:
+            # Sometime got Broken Pipe - Dirty workaround
+            ret = _send()
+        except Exception:
+            # Second attempt work
+            ret = _send()
+
+        if ret.status_code < 200 or ret.status_code > 300:
+            raise SwiftException('PUT request failed with error code %s'
+                                 % ret.status_code)
+
+    def get_object(self, name, range=None):
+        """Retrieve an object
+
+        :param name: The object name
+        :param range: A string range like "0-10" to
+                      retrieve specified bytes in object content
+        :return: A file like instance
+                 or bytestring if range is specified
+        """
+        headers = {}
+        if range:
+            headers['Range'] = 'bytes=%s' % range
+        path = self.base_path + '/' + name
+        ret = self.httpclient.request('GET', path, headers=headers)
+        if ret.status_code == 404:
+            return None
+        if ret.status_code < 200 or ret.status_code > 300:
+            raise SwiftException('GET request failed with error code %s'
+                                 % ret.status_code)
+        content = ret.read()
+
+        if range:
+            return content
+        return BytesIO(content)
+
+    def del_object(self, name):
+        """Delete an object
+
+        :param name: The object name
+        :raise: `SwiftException` if unable to delete
+        """
+        path = self.base_path + '/' + name
+        ret = self.httpclient.request('DELETE', path)
+        if ret.status_code < 200 or ret.status_code > 300:
+            raise SwiftException('DELETE request failed with error code %s'
+                                 % ret.status_code)
+
+    def del_root(self):
+        """Delete the root container by removing container content
+
+        :raise: `SwiftException` if unable to delete
+        """
+        for obj in self.get_container_objects():
+            self.del_object(obj['name'])
+        ret = self.httpclient.request('DELETE', self.base_path)
+        if ret.status_code < 200 or ret.status_code > 300:
+            raise SwiftException('DELETE request failed with error code %s'
+                                 % ret.status_code)
+
+
+class SwiftPackReader(object):
+    """A SwiftPackReader that mimic read and sync method
+
+    The reader allows to read a specified amount of bytes from
+    a given offset of a Swift object. A read offset is kept internaly.
+    The reader will read from Swift a specified amount of data to complete
+    its internal buffer. chunk_length specifiy the amount of data
+    to read from Swift.
+    """
+
+    def __init__(self, scon, filename, pack_length):
+        """Initialize a SwiftPackReader
+
+        :param scon: a `SwiftConnector` instance
+        :param filename: the pack filename
+        :param pack_length: The size of the pack object
+        """
+        self.scon = scon
+        self.filename = filename
+        self.pack_length = pack_length
+        self.offset = 0
+        self.base_offset = 0
+        self.buff = b''
+        self.buff_length = self.scon.chunk_length
+
+    def _read(self, more=False):
+        if more:
+            self.buff_length = self.buff_length * 2
+        l = self.base_offset
+        r = min(self.base_offset + self.buff_length, self.pack_length)
+        ret = self.scon.get_object(self.filename, range="%s-%s" % (l, r))
+        self.buff = ret
+
+    def read(self, length):
+        """Read a specified amount of Bytes form the pack object
+
+        :param length: amount of bytes to read
+        :return: bytestring
+        """
+        end = self.offset+length
+        if self.base_offset + end > self.pack_length:
+            data = self.buff[self.offset:]
+            self.offset = end
+            return data
+        if end > len(self.buff):
+            # Need to read more from swift
+            self._read(more=True)
+            return self.read(length)
+        data = self.buff[self.offset:end]
+        self.offset = end
+        return data
+
+    def seek(self, offset):
+        """Seek to a specified offset
+
+        :param offset: the offset to seek to
+        """
+        self.base_offset = offset
+        self._read()
+        self.offset = 0
+
+    def read_checksum(self):
+        """Read the checksum from the pack
+
+        :return: the checksum bytestring
+        """
+        return self.scon.get_object(self.filename, range="-20")
+
+
+class SwiftPackData(PackData):
+    """The data contained in a packfile.
+
+    We use the SwiftPackReader to read bytes from packs stored in Swift
+    using the Range header feature of Swift.
+    """
+
+    def __init__(self, scon, filename):
+        """ Initialize a SwiftPackReader
+
+        :param scon: a `SwiftConnector` instance
+        :param filename: the pack filename
+        """
+        self.scon = scon
+        self._filename = filename
+        self._header_size = 12
+        headers = self.scon.get_object_stat(self._filename)
+        self.pack_length = int(headers['content-length'])
+        pack_reader = SwiftPackReader(self.scon, self._filename,
+                                      self.pack_length)
+        (version, self._num_objects) = read_pack_header(pack_reader.read)
+        self._offset_cache = LRUSizeCache(1024*1024*self.scon.cache_length,
+                                          compute_size=_compute_object_size)
+        self.pack = None
+
+    def get_object_at(self, offset):
+        if offset in self._offset_cache:
+            return self._offset_cache[offset]
+        assert offset >= self._header_size
+        pack_reader = SwiftPackReader(self.scon, self._filename,
+                                      self.pack_length)
+        pack_reader.seek(offset)
+        unpacked, _ = unpack_object(pack_reader.read)
+        return (unpacked.pack_type_num, unpacked._obj())
+
+    def get_stored_checksum(self):
+        pack_reader = SwiftPackReader(self.scon, self._filename,
+                                      self.pack_length)
+        return pack_reader.read_checksum()
+
+    def close(self):
+        pass
+
+
+class SwiftPack(Pack):
+    """A Git pack object.
+
+    Same implementation as pack.Pack except that _idx_load and
+    _data_load are bounded to Swift version of load_pack_index and
+    PackData.
+    """
+
+    def __init__(self, *args, **kwargs):
+        self.scon = kwargs['scon']
+        del kwargs['scon']
+        super(SwiftPack, self).__init__(*args, **kwargs)
+        self._pack_info_path = self._basename + '.info'
+        self._pack_info = None
+        self._pack_info_load = lambda: load_pack_info(self._pack_info_path,
+                                                      self.scon)
+        self._idx_load = lambda: swift_load_pack_index(self.scon,
+                                                       self._idx_path)
+        self._data_load = lambda: SwiftPackData(self.scon, self._data_path)
+
+    @property
+    def pack_info(self):
+        """The pack data object being used."""
+        if self._pack_info is None:
+            self._pack_info = self._pack_info_load()
+        return self._pack_info
+
+
+class SwiftObjectStore(PackBasedObjectStore):
+    """A Swift Object Store
+
+    Allow to manage a bare Git repository from Openstack Swift.
+    This object store only supports pack files and not loose objects.
+    """
+    def __init__(self, scon):
+        """Open a Swift object store.
+
+        :param scon: A `SwiftConnector` instance
+        """
+        super(SwiftObjectStore, self).__init__()
+        self.scon = scon
+        self.root = self.scon.root
+        self.pack_dir = posixpath.join(OBJECTDIR, PACKDIR)
+        self._alternates = None
+
+    @property
+    def packs(self):
+        """List with pack objects."""
+        if not self._pack_cache:
+            self._update_pack_cache()
+        return self._pack_cache.values()
+
+    def _update_pack_cache(self):
+        for pack in self._load_packs():
+            self._pack_cache[pack._basename] = pack
+
+    def _iter_loose_objects(self):
+        """Loose objects are not supported by this repository
+        """
+        return []
+
+    def iter_shas(self, finder):
+        """An iterator over pack's ObjectStore.
+
+        :return: a `ObjectStoreIterator` or `GreenThreadsObjectStoreIterator`
+                 instance if gevent is enabled
+        """
+        shas = iter(finder.next, None)
+        return PackInfoObjectStoreIterator(
+            self, shas, finder, self.scon.concurrency)
+
+    def find_missing_objects(self, *args, **kwargs):
+        kwargs['concurrency'] = self.scon.concurrency
+        return PackInfoMissingObjectFinder(self, *args, **kwargs)
+
+    def _load_packs(self):
+        """Load all packs from Swift
+
+        :return: a list of `SwiftPack` instances
+        """
+        objects = self.scon.get_container_objects()
+        pack_files = [o['name'].replace(".pack", "")
+                      for o in objects if o['name'].endswith(".pack")]
+        return [SwiftPack(pack, scon=self.scon) for pack in pack_files]
+
+    def pack_info_get(self, sha):
+        for pack in self.packs:
+            if sha in pack:
+                return pack.pack_info[sha]
+
+    def _collect_ancestors(self, heads, common=set()):
+        def _find_parents(commit):
+            for pack in self.packs:
+                if commit in pack:
+                    try:
+                        parents = pack.pack_info[commit][1]
+                    except KeyError:
+                        # Seems to have no parents
+                        return []
+                    return parents
+
+        bases = set()
+        commits = set()
+        queue = []
+        queue.extend(heads)
+        while queue:
+            e = queue.pop(0)
+            if e in common:
+                bases.add(e)
+            elif e not in commits:
+                commits.add(e)
+                parents = _find_parents(e)
+                queue.extend(parents)
+        return (commits, bases)
+
+    def add_pack(self):
+        """Add a new pack to this object store.
+
+        :return: Fileobject to write to and a commit function to
+            call when the pack is finished.
+        """
+        f = BytesIO()
+
+        def commit():
+            f.seek(0)
+            pack = PackData(file=f, filename="")
+            entries = pack.sorted_entries()
+            if len(entries):
+                basename = posixpath.join(self.pack_dir,
+                                          "pack-%s" %
+                                          iter_sha1(entry[0] for
+                                                    entry in entries))
+                index = BytesIO()
+                write_pack_index_v2(index, entries, pack.get_stored_checksum())
+                self.scon.put_object(basename + ".pack", f)
+                f.close()
+                self.scon.put_object(basename + ".idx", index)
+                index.close()
+                final_pack = SwiftPack(basename, scon=self.scon)
+                final_pack.check_length_and_checksum()
+                self._add_known_pack(basename, final_pack)
+                return final_pack
+            else:
+                return None
+
+        def abort():
+            pass
+        return f, commit, abort
+
+    def add_object(self, obj):
+        self.add_objects([(obj, None), ])
+
+    def _pack_cache_stale(self):
+        return False
+
+    def _get_loose_object(self, sha):
+        return None
+
+    def add_thin_pack(self, read_all, read_some):
+        """Read a thin pack
+
+        Read it from a stream and complete it in a temporary file.
+        Then the pack and the corresponding index file are uploaded to Swift.
+        """
+        fd, path = tempfile.mkstemp(prefix='tmp_pack_')
+        f = os.fdopen(fd, 'w+b')
+        try:
+            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+            copier = PackStreamCopier(read_all, read_some, f,
+                                      delta_iter=indexer)
+            copier.verify()
+            return self._complete_thin_pack(f, path, copier, indexer)
+        finally:
+            f.close()
+            os.unlink(path)
+
+    def _complete_thin_pack(self, f, path, copier, indexer):
+        entries = list(indexer)
+
+        # Update the header with the new number of objects.
+        f.seek(0)
+        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+        # Must flush before reading (http://bugs.python.org/issue3207)
+        f.flush()
+
+        # Rescan the rest of the pack, computing the SHA with the new header.
+        new_sha = compute_file_sha(f, end_ofs=-20)
+
+        # Must reposition before writing (http://bugs.python.org/issue3207)
+        f.seek(0, os.SEEK_CUR)
+
+        # Complete the pack.
+        for ext_sha in indexer.ext_refs():
+            assert len(ext_sha) == 20
+            type_num, data = self.get_raw(ext_sha)
+            offset = f.tell()
+            crc32 = write_pack_object(f, type_num, data, sha=new_sha)
+            entries.append((ext_sha, offset, crc32))
+        pack_sha = new_sha.digest()
+        f.write(pack_sha)
+        f.flush()
+
+        # Move the pack in.
+        entries.sort()
+        pack_base_name = posixpath.join(
+            self.pack_dir,
+            'pack-' + iter_sha1(e[0] for e in entries).decode(
+                sys.getfilesystemencoding()))
+        self.scon.put_object(pack_base_name + '.pack', f)
+
+        # Write the index.
+        filename = pack_base_name + '.idx'
+        index_file = BytesIO()
+        write_pack_index_v2(index_file, entries, pack_sha)
+        self.scon.put_object(filename, index_file)
+
+        # Write pack info.
+        f.seek(0)
+        pack_data = PackData(filename="", file=f)
+        index_file.seek(0)
+        pack_index = load_pack_index_file('', index_file)
+        serialized_pack_info = pack_info_create(pack_data, pack_index)
+        f.close()
+        index_file.close()
+        pack_info_file = BytesIO(serialized_pack_info)
+        filename = pack_base_name + '.info'
+        self.scon.put_object(filename, pack_info_file)
+        pack_info_file.close()
+
+        # Add the pack to the store and return it.
+        final_pack = SwiftPack(pack_base_name, scon=self.scon)
+        final_pack.check_length_and_checksum()
+        self._add_known_pack(pack_base_name, final_pack)
+        return final_pack
+
+
+class SwiftInfoRefsContainer(InfoRefsContainer):
+    """Manage references in info/refs object.
+    """
+
+    def __init__(self, scon, store):
+        self.scon = scon
+        self.filename = 'info/refs'
+        self.store = store
+        f = self.scon.get_object(self.filename)
+        if not f:
+            f = BytesIO(b'')
+        super(SwiftInfoRefsContainer, self).__init__(f)
+
+    def _load_check_ref(self, name, old_ref):
+        self._check_refname(name)
+        f = self.scon.get_object(self.filename)
+        if not f:
+            return {}
+        refs = read_info_refs(f)
+        if old_ref is not None:
+            if refs[name] != old_ref:
+                return False
+        return refs
+
+    def _write_refs(self, refs):
+        f = BytesIO()
+        f.writelines(write_info_refs(refs, self.store))
+        self.scon.put_object(self.filename, f)
+
+    def set_if_equals(self, name, old_ref, new_ref):
+        """Set a refname to new_ref only if it currently equals old_ref.
+        """
+        if name == 'HEAD':
+            return True
+        refs = self._load_check_ref(name, old_ref)
+        if not isinstance(refs, dict):
+            return False
+        refs[name] = new_ref
+        self._write_refs(refs)
+        self._refs[name] = new_ref
+        return True
+
+    def remove_if_equals(self, name, old_ref):
+        """Remove a refname only if it currently equals old_ref.
+        """
+        if name == 'HEAD':
+            return True
+        refs = self._load_check_ref(name, old_ref)
+        if not isinstance(refs, dict):
+            return False
+        del refs[name]
+        self._write_refs(refs)
+        del self._refs[name]
+        return True
+
+    def allkeys(self):
+        try:
+            self._refs['HEAD'] = self._refs['refs/heads/master']
+        except KeyError:
+            pass
+        return self._refs.keys()
+
+
+class SwiftRepo(BaseRepo):
+
+    def __init__(self, root, conf):
+        """Init a Git bare Repository on top of a Swift container.
+
+        References are managed in info/refs objects by
+        `SwiftInfoRefsContainer`. The root attribute is the Swift
+        container that contain the Git bare repository.
+
+        :param root: The container which contains the bare repo
+        :param conf: A ConfigParser object
+        """
+        self.root = root.lstrip('/')
+        self.conf = conf
+        self.scon = SwiftConnector(self.root, self.conf)
+        objects = self.scon.get_container_objects()
+        if not objects:
+            raise Exception('There is not any GIT repo here : %s' % self.root)
+        objects = [o['name'].split('/')[0] for o in objects]
+        if OBJECTDIR not in objects:
+            raise Exception('This repository (%s) is not bare.' % self.root)
+        self.bare = True
+        self._controldir = self.root
+        object_store = SwiftObjectStore(self.scon)
+        refs = SwiftInfoRefsContainer(self.scon, object_store)
+        BaseRepo.__init__(self, object_store, refs)
+
+    def _determine_file_mode(self):
+        """Probe the file-system to determine whether permissions can be trusted.
+
+        :return: True if permissions can be trusted, False otherwise.
+        """
+        return False
+
+    def _put_named_file(self, filename, contents):
+        """Put an object in a Swift container
+
+        :param filename: the path to the object to put on Swift
+        :param contents: the content as bytestring
+        """
+        f = BytesIO()
+        f.write(contents)
+        self.scon.put_object(filename, f)
+        f.close()
+
+    @classmethod
+    def init_bare(cls, scon, conf):
+        """Create a new bare repository.
+
+        :param scon: a `SwiftConnector` instance
+        :param conf: a ConfigParser object
+        :return: a `SwiftRepo` instance
+        """
+        scon.create_root()
+        for obj in [posixpath.join(OBJECTDIR, PACKDIR),
+                    posixpath.join(INFODIR, 'refs')]:
+            scon.put_object(obj, BytesIO(b''))
+        ret = cls(scon.root, conf)
+        ret._init_files(True)
+        return ret
+
+
+class SwiftSystemBackend(Backend):
+
+    def __init__(self, logger, conf):
+        self.conf = conf
+        self.logger = logger
+
+    def open_repository(self, path):
+        self.logger.info('opening repository at %s', path)
+        return SwiftRepo(path, self.conf)
+
+
+def cmd_daemon(args):
+    """Entry point for starting a TCP git server."""
+    import optparse
+    parser = optparse.OptionParser()
+    parser.add_option("-l", "--listen_address", dest="listen_address",
+                      default="127.0.0.1",
+                      help="Binding IP address.")
+    parser.add_option("-p", "--port", dest="port", type=int,
+                      default=TCP_GIT_PORT,
+                      help="Binding TCP port.")
+    parser.add_option("-c", "--swift_config", dest="swift_config",
+                      default="",
+                      help="Path to the configuration file for Swift backend.")
+    options, args = parser.parse_args(args)
+
+    try:
+        import gevent
+        import geventhttpclient  # noqa: F401
+    except ImportError:
+        print("gevent and geventhttpclient libraries are mandatory "
+              " for use the Swift backend.")
+        sys.exit(1)
+    import gevent.monkey
+    gevent.monkey.patch_socket()
+    from dulwich.contrib.swift import load_conf
+    from dulwich import log_utils
+    logger = log_utils.getLogger(__name__)
+    conf = load_conf(options.swift_config)
+    backend = SwiftSystemBackend(logger, conf)
+
+    log_utils.default_logging_config()
+    server = TCPGitServer(backend, options.listen_address,
+                          port=options.port)
+    server.serve_forever()
+
+
+def cmd_init(args):
+    import optparse
+    parser = optparse.OptionParser()
+    parser.add_option("-c", "--swift_config", dest="swift_config",
+                      default="",
+                      help="Path to the configuration file for Swift backend.")
+    options, args = parser.parse_args(args)
+
+    conf = load_conf(options.swift_config)
+    if args == []:
+        parser.error("missing repository name")
+    repo = args[0]
+    scon = SwiftConnector(repo, conf)
+    SwiftRepo.init_bare(scon, conf)
+
+
+def main(argv=sys.argv):
+    commands = {
+        "init": cmd_init,
+        "daemon": cmd_daemon,
+    }
+
+    if len(sys.argv) < 2:
+        print("Usage: %s <%s> [OPTIONS...]" % (
+                sys.argv[0], "|".join(commands.keys())))
+        sys.exit(1)
+
+    cmd = sys.argv[1]
+    if cmd not in commands:
+        print("No such subcommand: %s" % cmd)
+        sys.exit(1)
+    commands[cmd](sys.argv[2:])
+
+
+if __name__ == '__main__':
+    main()

+ 127 - 0
dulwich/contrib/test_release_robot.py

@@ -0,0 +1,127 @@
+# release_robot.py
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for release_robot."""
+
+import datetime
+import os
+import re
+import shutil
+import tempfile
+import time
+import unittest
+
+from dulwich.contrib import release_robot
+from dulwich.repo import Repo
+from dulwich.tests.utils import make_commit, make_tag
+
+BASEDIR = os.path.abspath(os.path.dirname(__file__))  # this directory
+
+
+def gmtime_to_datetime(gmt):
+    return datetime.datetime(*time.gmtime(gmt)[:6])
+
+
+class TagPatternTests(unittest.TestCase):
+    """test tag patterns"""
+
+    def test_tag_pattern(self):
+        """test tag patterns"""
+        test_cases = {
+            '0.3': '0.3', 'v0.3': '0.3', 'release0.3': '0.3',
+            'Release-0.3': '0.3', 'v0.3rc1': '0.3rc1', 'v0.3-rc1': '0.3-rc1',
+            'v0.3-rc.1': '0.3-rc.1', 'version 0.3': '0.3',
+            'version_0.3_rc_1': '0.3_rc_1', 'v1': '1', '0.3rc1': '0.3rc1'
+        }
+        for testcase, version in test_cases.items():
+            matches = re.match(release_robot.PATTERN, testcase)
+            self.assertEqual(matches.group(1), version)
+
+
+class GetRecentTagsTest(unittest.TestCase):
+    """test get recent tags"""
+
+    # Git repo for dulwich project
+    test_repo = os.path.join(BASEDIR, 'dulwich_test_repo.zip')
+    committer = b"Mark Mikofski <mark.mikofski@sunpowercorp.com>"
+    test_tags = [b'v0.1a', b'v0.1']
+    tag_test_data = {
+        test_tags[0]: [1484788003, b'0' * 40, None],
+        test_tags[1]: [1484788314, b'1' * 40, (1484788401, b'2' * 40)]
+    }
+
+    @classmethod
+    def setUpClass(cls):
+        cls.projdir = tempfile.mkdtemp()  # temporary project directory
+        cls.repo = Repo.init(cls.projdir)  # test repo
+        obj_store = cls.repo.object_store  # test repo object store
+        # commit 1 ('2017-01-19T01:06:43')
+        cls.c1 = make_commit(
+            id=cls.tag_test_data[cls.test_tags[0]][1],
+            commit_time=cls.tag_test_data[cls.test_tags[0]][0],
+            message=b'unannotated tag',
+            author=cls.committer
+        )
+        obj_store.add_object(cls.c1)
+        # tag 1: unannotated
+        cls.t1 = cls.test_tags[0]
+        cls.repo[b'refs/tags/' + cls.t1] = cls.c1.id  # add unannotated tag
+        # commit 2 ('2017-01-19T01:11:54')
+        cls.c2 = make_commit(
+            id=cls.tag_test_data[cls.test_tags[1]][1],
+            commit_time=cls.tag_test_data[cls.test_tags[1]][0],
+            message=b'annotated tag',
+            parents=[cls.c1.id],
+            author=cls.committer
+        )
+        obj_store.add_object(cls.c2)
+        # tag 2: annotated ('2017-01-19T01:13:21')
+        cls.t2 = make_tag(
+            cls.c2,
+            id=cls.tag_test_data[cls.test_tags[1]][2][1],
+            name=cls.test_tags[1],
+            tag_time=cls.tag_test_data[cls.test_tags[1]][2][0]
+        )
+        obj_store.add_object(cls.t2)
+        cls.repo[b'refs/heads/master'] = cls.c2.id
+        cls.repo[b'refs/tags/' + cls.t2.name] = cls.t2.id  # add annotated tag
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.repo.close()
+        shutil.rmtree(cls.projdir)
+
+    def test_get_recent_tags(self):
+        """test get recent tags"""
+        tags = release_robot.get_recent_tags(self.projdir)  # get test tags
+        for tag, metadata in tags:
+            tag = tag.encode('utf-8')
+            test_data = self.tag_test_data[tag]  # test data tag
+            # test commit date, id and author name
+            self.assertEqual(metadata[0], gmtime_to_datetime(test_data[0]))
+            self.assertEqual(metadata[1].encode('utf-8'), test_data[1])
+            self.assertEqual(metadata[2].encode('utf-8'), self.committer)
+            # skip unannotated tags
+            tag_obj = test_data[2]
+            if not tag_obj:
+                continue
+            # tag date, id and name
+            self.assertEqual(metadata[3][0], gmtime_to_datetime(tag_obj[0]))
+            self.assertEqual(metadata[3][1].encode('utf-8'), tag_obj[1])
+            self.assertEqual(metadata[3][2].encode('utf-8'), tag)

+ 656 - 0
dulwich/contrib/test_swift.py

@@ -0,0 +1,656 @@
+# test_swift.py -- Unittests for the Swift backend.
+# Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+#
+# Author: Fabien Boucher <fabien.boucher@enovance.com>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for dulwich.contrib.swift."""
+
+import posixpath
+
+from time import time
+from io import BytesIO
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+import sys
+from unittest import skipIf
+
+from dulwich.tests import (
+    TestCase,
+    )
+from dulwich.tests.test_object_store import (
+    ObjectStoreTests,
+    )
+from dulwich.tests.utils import (
+    build_pack,
+    )
+from dulwich.objects import (
+    Blob,
+    Commit,
+    Tree,
+    Tag,
+    parse_timezone,
+    )
+from dulwich.pack import (
+    REF_DELTA,
+    write_pack_index_v2,
+    PackData,
+    load_pack_index_file,
+    )
+
+try:
+    from simplejson import dumps as json_dumps
+except ImportError:
+    from json import dumps as json_dumps
+
+missing_libs = []
+
+try:
+    import gevent  # noqa:F401
+except ImportError:
+    missing_libs.append("gevent")
+
+try:
+    import geventhttpclient  # noqa:F401
+except ImportError:
+    missing_libs.append("geventhttpclient")
+
+try:
+    from mock import patch
+except ImportError:
+    missing_libs.append("mock")
+
+skipmsg = "Required libraries are not installed (%r)" % missing_libs
+
+skipIfPY3 = skipIf(sys.version_info[0] == 3,
+                   "SWIFT module not yet ported to python3.")
+
+if not missing_libs:
+    from dulwich.contrib import swift
+
+config_file = """[swift]
+auth_url = http://127.0.0.1:8080/auth/%(version_str)s
+auth_ver = %(version_int)s
+username = test;tester
+password = testing
+region_name = %(region_name)s
+endpoint_type = %(endpoint_type)s
+concurrency = %(concurrency)s
+chunk_length = %(chunk_length)s
+cache_length = %(cache_length)s
+http_pool_length = %(http_pool_length)s
+http_timeout = %(http_timeout)s
+"""
+
+def_config_file = {'version_str': 'v1.0',
+                   'version_int': 1,
+                   'concurrency': 1,
+                   'chunk_length': 12228,
+                   'cache_length': 1,
+                   'region_name': 'test',
+                   'endpoint_type': 'internalURL',
+                   'http_pool_length': 1,
+                   'http_timeout': 1}
+
+
+def create_swift_connector(store={}):
+    return lambda root, conf: FakeSwiftConnector(root,
+                                                 conf=conf,
+                                                 store=store)
+
+
+class Response(object):
+
+    def __init__(self, headers={}, status=200, content=None):
+        self.headers = headers
+        self.status_code = status
+        self.content = content
+
+    def __getitem__(self, key):
+        return self.headers[key]
+
+    def items(self):
+        return self.headers.items()
+
+    def read(self):
+        return self.content
+
+
+def fake_auth_request_v1(*args, **kwargs):
+    ret = Response({'X-Storage-Url':
+                    'http://127.0.0.1:8080/v1.0/AUTH_fakeuser',
+                    'X-Auth-Token': '12' * 10},
+                   200)
+    return ret
+
+
+def fake_auth_request_v1_error(*args, **kwargs):
+    ret = Response({},
+                   401)
+    return ret
+
+
+def fake_auth_request_v2(*args, **kwargs):
+    s_url = 'http://127.0.0.1:8080/v1.0/AUTH_fakeuser'
+    resp = {'access': {'token': {'id': '12' * 10},
+                       'serviceCatalog':
+                       [
+                           {'type': 'object-store',
+                            'endpoints': [{'region': 'test',
+                                          'internalURL': s_url,
+                                           },
+                                          ]
+                            },
+                       ]
+                       }
+            }
+    ret = Response(status=200, content=json_dumps(resp))
+    return ret
+
+
+def create_commit(data, marker=b'Default', blob=None):
+    if not blob:
+        blob = Blob.from_string(b'The blob content ' + marker)
+    tree = Tree()
+    tree.add(b"thefile_" + marker, 0o100644, blob.id)
+    cmt = Commit()
+    if data:
+        assert isinstance(data[-1], Commit)
+        cmt.parents = [data[-1].id]
+    cmt.tree = tree.id
+    author = b"John Doe " + marker + b" <john@doe.net>"
+    cmt.author = cmt.committer = author
+    tz = parse_timezone(b'-0200')[0]
+    cmt.commit_time = cmt.author_time = int(time())
+    cmt.commit_timezone = cmt.author_timezone = tz
+    cmt.encoding = b"UTF-8"
+    cmt.message = b"The commit message " + marker
+    tag = Tag()
+    tag.tagger = b"john@doe.net"
+    tag.message = b"Annotated tag"
+    tag.tag_timezone = parse_timezone(b'-0200')[0]
+    tag.tag_time = cmt.author_time
+    tag.object = (Commit, cmt.id)
+    tag.name = b"v_" + marker + b"_0.1"
+    return blob, tree, tag, cmt
+
+
+def create_commits(length=1, marker=b'Default'):
+    data = []
+    for i in range(0, length):
+        _marker = ("%s_%s" % (marker, i)).encode()
+        blob, tree, tag, cmt = create_commit(data, _marker)
+        data.extend([blob, tree, tag, cmt])
+    return data
+
+
+@skipIf(missing_libs, skipmsg)
+class FakeSwiftConnector(object):
+
+    def __init__(self, root, conf, store=None):
+        if store:
+            self.store = store
+        else:
+            self.store = {}
+        self.conf = conf
+        self.root = root
+        self.concurrency = 1
+        self.chunk_length = 12228
+        self.cache_length = 1
+
+    def put_object(self, name, content):
+        name = posixpath.join(self.root, name)
+        if hasattr(content, 'seek'):
+            content.seek(0)
+            content = content.read()
+        self.store[name] = content
+
+    def get_object(self, name, range=None):
+        name = posixpath.join(self.root, name)
+        if not range:
+            try:
+                return BytesIO(self.store[name])
+            except KeyError:
+                return None
+        else:
+            l, r = range.split('-')
+            try:
+                if not l:
+                    r = -int(r)
+                    return self.store[name][r:]
+                else:
+                    return self.store[name][int(l):int(r)]
+            except KeyError:
+                return None
+
+    def get_container_objects(self):
+        return [{'name': k.replace(self.root + '/', '')}
+                for k in self.store]
+
+    def create_root(self):
+        if self.root in self.store.keys():
+            pass
+        else:
+            self.store[self.root] = ''
+
+    def get_object_stat(self, name):
+        name = posixpath.join(self.root, name)
+        if name not in self.store:
+            return None
+        return {'content-length': len(self.store[name])}
+
+
+@skipIf(missing_libs, skipmsg)
+@skipIfPY3
+class TestSwiftObjectStore(TestCase):
+
+    def setUp(self):
+        super(TestSwiftObjectStore, self).setUp()
+        self.conf = swift.load_conf(file=StringIO(config_file %
+                                                  def_config_file))
+        self.fsc = FakeSwiftConnector('fakerepo', conf=self.conf)
+
+    def _put_pack(self, sos, commit_amount=1, marker='Default'):
+        odata = create_commits(length=commit_amount, marker=marker)
+        data = [(d.type_num, d.as_raw_string()) for d in odata]
+        f = BytesIO()
+        build_pack(f, data, store=sos)
+        sos.add_thin_pack(f.read, None)
+        return odata
+
+    def test_load_packs(self):
+        store = {'fakerepo/objects/pack/pack-'+'1'*40+'.idx': '',
+                 'fakerepo/objects/pack/pack-'+'1'*40+'.pack': '',
+                 'fakerepo/objects/pack/pack-'+'1'*40+'.info': '',
+                 'fakerepo/objects/pack/pack-'+'2'*40+'.idx': '',
+                 'fakerepo/objects/pack/pack-'+'2'*40+'.pack': '',
+                 'fakerepo/objects/pack/pack-'+'2'*40+'.info': ''}
+        fsc = FakeSwiftConnector('fakerepo', conf=self.conf, store=store)
+        sos = swift.SwiftObjectStore(fsc)
+        packs = sos._load_packs()
+        self.assertEqual(len(packs), 2)
+        for pack in packs:
+            self.assertTrue(isinstance(pack, swift.SwiftPack))
+
+    def test_add_thin_pack(self):
+        sos = swift.SwiftObjectStore(self.fsc)
+        self._put_pack(sos, 1, 'Default')
+        self.assertEqual(len(self.fsc.store), 3)
+
+    def test_find_missing_objects(self):
+        commit_amount = 3
+        sos = swift.SwiftObjectStore(self.fsc)
+        odata = self._put_pack(sos, commit_amount, 'Default')
+        head = odata[-1].id
+        i = sos.iter_shas(sos.find_missing_objects([],
+                                                   [head, ],
+                                                   progress=None,
+                                                   get_tagged=None))
+        self.assertEqual(len(i), commit_amount * 3)
+        shas = [d.id for d in odata]
+        for sha, path in i:
+            self.assertIn(sha.id, shas)
+
+    def test_find_missing_objects_with_tag(self):
+        commit_amount = 3
+        sos = swift.SwiftObjectStore(self.fsc)
+        odata = self._put_pack(sos, commit_amount, 'Default')
+        head = odata[-1].id
+        peeled_sha = dict([(sha.object[1], sha.id)
+                           for sha in odata if isinstance(sha, Tag)])
+
+        def get_tagged():
+            return peeled_sha
+        i = sos.iter_shas(sos.find_missing_objects([],
+                                                   [head, ],
+                                                   progress=None,
+                                                   get_tagged=get_tagged))
+        self.assertEqual(len(i), commit_amount * 4)
+        shas = [d.id for d in odata]
+        for sha, path in i:
+            self.assertIn(sha.id, shas)
+
+    def test_find_missing_objects_with_common(self):
+        commit_amount = 3
+        sos = swift.SwiftObjectStore(self.fsc)
+        odata = self._put_pack(sos, commit_amount, 'Default')
+        head = odata[-1].id
+        have = odata[7].id
+        i = sos.iter_shas(sos.find_missing_objects([have, ],
+                                                   [head, ],
+                                                   progress=None,
+                                                   get_tagged=None))
+        self.assertEqual(len(i), 3)
+
+    def test_find_missing_objects_multiple_packs(self):
+        sos = swift.SwiftObjectStore(self.fsc)
+        commit_amount_a = 3
+        odataa = self._put_pack(sos, commit_amount_a, 'Default1')
+        heada = odataa[-1].id
+        commit_amount_b = 2
+        odatab = self._put_pack(sos, commit_amount_b, 'Default2')
+        headb = odatab[-1].id
+        i = sos.iter_shas(sos.find_missing_objects([],
+                                                   [heada, headb],
+                                                   progress=None,
+                                                   get_tagged=None))
+        self.assertEqual(len(self.fsc.store), 6)
+        self.assertEqual(len(i),
+                         commit_amount_a * 3 +
+                         commit_amount_b * 3)
+        shas = [d.id for d in odataa]
+        shas.extend([d.id for d in odatab])
+        for sha, path in i:
+            self.assertIn(sha.id, shas)
+
+    def test_add_thin_pack_ext_ref(self):
+        sos = swift.SwiftObjectStore(self.fsc)
+        odata = self._put_pack(sos, 1, 'Default1')
+        ref_blob_content = odata[0].as_raw_string()
+        ref_blob_id = odata[0].id
+        new_blob = Blob.from_string(ref_blob_content.replace('blob',
+                                                             'yummy blob'))
+        blob, tree, tag, cmt = \
+            create_commit([], marker='Default2', blob=new_blob)
+        data = [(REF_DELTA, (ref_blob_id, blob.as_raw_string())),
+                (tree.type_num, tree.as_raw_string()),
+                (cmt.type_num, cmt.as_raw_string()),
+                (tag.type_num, tag.as_raw_string())]
+        f = BytesIO()
+        build_pack(f, data, store=sos)
+        sos.add_thin_pack(f.read, None)
+        self.assertEqual(len(self.fsc.store), 6)
+
+
+@skipIf(missing_libs, skipmsg)
+class TestSwiftRepo(TestCase):
+
+    def setUp(self):
+        super(TestSwiftRepo, self).setUp()
+        self.conf = swift.load_conf(file=StringIO(config_file %
+                                                  def_config_file))
+
+    def test_init(self):
+        store = {'fakerepo/objects/pack': ''}
+        with patch('dulwich.contrib.swift.SwiftConnector',
+                   new_callable=create_swift_connector,
+                   store=store):
+            swift.SwiftRepo('fakerepo', conf=self.conf)
+
+    def test_init_no_data(self):
+        with patch('dulwich.contrib.swift.SwiftConnector',
+                   new_callable=create_swift_connector):
+            self.assertRaises(Exception, swift.SwiftRepo,
+                              'fakerepo', self.conf)
+
+    def test_init_bad_data(self):
+        store = {'fakerepo/.git/objects/pack': ''}
+        with patch('dulwich.contrib.swift.SwiftConnector',
+                   new_callable=create_swift_connector,
+                   store=store):
+            self.assertRaises(Exception, swift.SwiftRepo,
+                              'fakerepo', self.conf)
+
+    def test_put_named_file(self):
+        store = {'fakerepo/objects/pack': ''}
+        with patch('dulwich.contrib.swift.SwiftConnector',
+                   new_callable=create_swift_connector,
+                   store=store):
+            repo = swift.SwiftRepo('fakerepo', conf=self.conf)
+            desc = b'Fake repo'
+            repo._put_named_file('description', desc)
+        self.assertEqual(repo.scon.store['fakerepo/description'],
+                         desc)
+
+    def test_init_bare(self):
+        fsc = FakeSwiftConnector('fakeroot', conf=self.conf)
+        with patch('dulwich.contrib.swift.SwiftConnector',
+                   new_callable=create_swift_connector,
+                   store=fsc.store):
+            swift.SwiftRepo.init_bare(fsc, conf=self.conf)
+        self.assertIn('fakeroot/objects/pack', fsc.store)
+        self.assertIn('fakeroot/info/refs', fsc.store)
+        self.assertIn('fakeroot/description', fsc.store)
+
+
+@skipIf(missing_libs, skipmsg)
+@skipIfPY3
+class TestPackInfoLoadDump(TestCase):
+
+    def setUp(self):
+        super(TestPackInfoLoadDump, self).setUp()
+        conf = swift.load_conf(file=StringIO(config_file %
+                                             def_config_file))
+        sos = swift.SwiftObjectStore(
+            FakeSwiftConnector('fakerepo', conf=conf))
+        commit_amount = 10
+        self.commits = create_commits(length=commit_amount, marker="m")
+        data = [(d.type_num, d.as_raw_string()) for d in self.commits]
+        f = BytesIO()
+        fi = BytesIO()
+        expected = build_pack(f, data, store=sos)
+        entries = [(sha, ofs, checksum) for
+                   ofs, _, _, sha, checksum in expected]
+        self.pack_data = PackData.from_file(file=f, size=None)
+        write_pack_index_v2(
+            fi, entries, self.pack_data.calculate_checksum())
+        fi.seek(0)
+        self.pack_index = load_pack_index_file('', fi)
+
+#    def test_pack_info_perf(self):
+#        dump_time = []
+#        load_time = []
+#        for i in range(0, 100):
+#            start = time()
+#            dumps = swift.pack_info_create(self.pack_data, self.pack_index)
+#            dump_time.append(time() - start)
+#        for i in range(0, 100):
+#            start = time()
+#            pack_infos = swift.load_pack_info('', file=BytesIO(dumps))
+#            load_time.append(time() - start)
+#        print sum(dump_time) / float(len(dump_time))
+#        print sum(load_time) / float(len(load_time))
+
+    def test_pack_info(self):
+        dumps = swift.pack_info_create(self.pack_data, self.pack_index)
+        pack_infos = swift.load_pack_info('', file=BytesIO(dumps))
+        for obj in self.commits:
+            self.assertIn(obj.id, pack_infos)
+
+
+@skipIf(missing_libs, skipmsg)
+class TestSwiftInfoRefsContainer(TestCase):
+
+    def setUp(self):
+        super(TestSwiftInfoRefsContainer, self).setUp()
+        content = (
+            b"22effb216e3a82f97da599b8885a6cadb488b4c5\trefs/heads/master\n"
+            b"cca703b0e1399008b53a1a236d6b4584737649e4\trefs/heads/dev")
+        self.store = {'fakerepo/info/refs': content}
+        self.conf = swift.load_conf(file=StringIO(config_file %
+                                                  def_config_file))
+        self.fsc = FakeSwiftConnector('fakerepo', conf=self.conf)
+        self.object_store = {}
+
+    def test_init(self):
+        """info/refs does not exists"""
+        irc = swift.SwiftInfoRefsContainer(self.fsc, self.object_store)
+        self.assertEqual(len(irc._refs), 0)
+        self.fsc.store = self.store
+        irc = swift.SwiftInfoRefsContainer(self.fsc, self.object_store)
+        self.assertIn(b'refs/heads/dev', irc.allkeys())
+        self.assertIn(b'refs/heads/master', irc.allkeys())
+
+    def test_set_if_equals(self):
+        self.fsc.store = self.store
+        irc = swift.SwiftInfoRefsContainer(self.fsc, self.object_store)
+        irc.set_if_equals(b'refs/heads/dev',
+                          b"cca703b0e1399008b53a1a236d6b4584737649e4", b'1'*40)
+        self.assertEqual(irc[b'refs/heads/dev'], b'1'*40)
+
+    def test_remove_if_equals(self):
+        self.fsc.store = self.store
+        irc = swift.SwiftInfoRefsContainer(self.fsc, self.object_store)
+        irc.remove_if_equals(b'refs/heads/dev',
+                             b"cca703b0e1399008b53a1a236d6b4584737649e4")
+        self.assertNotIn(b'refs/heads/dev', irc.allkeys())
+
+
+@skipIf(missing_libs, skipmsg)
+class TestSwiftConnector(TestCase):
+
+    def setUp(self):
+        super(TestSwiftConnector, self).setUp()
+        self.conf = swift.load_conf(file=StringIO(config_file %
+                                                  def_config_file))
+        with patch('geventhttpclient.HTTPClient.request',
+                   fake_auth_request_v1):
+            self.conn = swift.SwiftConnector('fakerepo', conf=self.conf)
+
+    def test_init_connector(self):
+        self.assertEqual(self.conn.auth_ver, '1')
+        self.assertEqual(self.conn.auth_url,
+                         'http://127.0.0.1:8080/auth/v1.0')
+        self.assertEqual(self.conn.user, 'test:tester')
+        self.assertEqual(self.conn.password, 'testing')
+        self.assertEqual(self.conn.root, 'fakerepo')
+        self.assertEqual(self.conn.storage_url,
+                         'http://127.0.0.1:8080/v1.0/AUTH_fakeuser')
+        self.assertEqual(self.conn.token, '12' * 10)
+        self.assertEqual(self.conn.http_timeout, 1)
+        self.assertEqual(self.conn.http_pool_length, 1)
+        self.assertEqual(self.conn.concurrency, 1)
+        self.conf.set('swift', 'auth_ver', '2')
+        self.conf.set('swift', 'auth_url', 'http://127.0.0.1:8080/auth/v2.0')
+        with patch('geventhttpclient.HTTPClient.request',
+                   fake_auth_request_v2):
+            conn = swift.SwiftConnector('fakerepo', conf=self.conf)
+        self.assertEqual(conn.user, 'tester')
+        self.assertEqual(conn.tenant, 'test')
+        self.conf.set('swift', 'auth_ver', '1')
+        self.conf.set('swift', 'auth_url', 'http://127.0.0.1:8080/auth/v1.0')
+        with patch('geventhttpclient.HTTPClient.request',
+                   fake_auth_request_v1_error):
+            self.assertRaises(swift.SwiftException,
+                              lambda: swift.SwiftConnector('fakerepo',
+                                                           conf=self.conf))
+
+    def test_root_exists(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args: Response()):
+            self.assertEqual(self.conn.test_root_exists(), True)
+
+    def test_root_not_exists(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args: Response(status=404)):
+            self.assertEqual(self.conn.test_root_exists(), None)
+
+    def test_create_root(self):
+        with patch('dulwich.contrib.swift.SwiftConnector.test_root_exists',
+                   lambda *args: None):
+            with patch('geventhttpclient.HTTPClient.request',
+                       lambda *args: Response()):
+                self.assertEqual(self.conn.create_root(), None)
+
+    def test_create_root_fails(self):
+        with patch('dulwich.contrib.swift.SwiftConnector.test_root_exists',
+                   lambda *args: None):
+            with patch('geventhttpclient.HTTPClient.request',
+                       lambda *args: Response(status=404)):
+                self.assertRaises(swift.SwiftException,
+                                  lambda: self.conn.create_root())
+
+    def test_get_container_objects(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args: Response(content=json_dumps(
+                       (({'name': 'a'}, {'name': 'b'}))))):
+            self.assertEqual(len(self.conn.get_container_objects()), 2)
+
+    def test_get_container_objects_fails(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args: Response(status=404)):
+            self.assertEqual(self.conn.get_container_objects(), None)
+
+    def test_get_object_stat(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args: Response(headers={'content-length': '10'})):
+            self.assertEqual(self.conn.get_object_stat('a')['content-length'],
+                             '10')
+
+    def test_get_object_stat_fails(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args: Response(status=404)):
+            self.assertEqual(self.conn.get_object_stat('a'), None)
+
+    def test_put_object(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args, **kwargs: Response()):
+            self.assertEqual(self.conn.put_object('a', BytesIO(b'content')),
+                             None)
+
+    def test_put_object_fails(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args, **kwargs: Response(status=400)):
+            self.assertRaises(swift.SwiftException,
+                              lambda: self.conn.put_object(
+                                  'a', BytesIO(b'content')))
+
+    def test_get_object(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args, **kwargs: Response(content=b'content')):
+            self.assertEqual(self.conn.get_object('a').read(), b'content')
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args, **kwargs: Response(content=b'content')):
+            self.assertEqual(
+                    self.conn.get_object('a', range='0-6'),
+                    b'content')
+
+    def test_get_object_fails(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args, **kwargs: Response(status=404)):
+            self.assertEqual(self.conn.get_object('a'), None)
+
+    def test_del_object(self):
+        with patch('geventhttpclient.HTTPClient.request',
+                   lambda *args: Response()):
+            self.assertEqual(self.conn.del_object('a'), None)
+
+    def test_del_root(self):
+        with patch('dulwich.contrib.swift.SwiftConnector.del_object',
+                   lambda *args: None):
+            with patch('dulwich.contrib.swift.SwiftConnector.'
+                       'get_container_objects',
+                       lambda *args: ({'name': 'a'}, {'name': 'b'})):
+                with patch('geventhttpclient.HTTPClient.request',
+                           lambda *args: Response()):
+                    self.assertEqual(self.conn.del_root(), None)
+
+
+@skipIf(missing_libs, skipmsg)
+class SwiftObjectStoreTests(ObjectStoreTests, TestCase):
+
+    def setUp(self):
+        TestCase.setUp(self)
+        conf = swift.load_conf(file=StringIO(config_file %
+                               def_config_file))
+        fsc = FakeSwiftConnector('fakerepo', conf=conf)
+        self.store = swift.SwiftObjectStore(fsc)

+ 317 - 0
dulwich/contrib/test_swift_smoke.py

@@ -0,0 +1,317 @@
+# test_smoke.py -- Functional tests for the Swift backend.
+# Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+#
+# Author: Fabien Boucher <fabien.boucher@enovance.com>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Start functional tests
+
+A Swift installation must be available before
+starting those tests. The account and authentication method used
+during this functional tests must be changed in the configuration file
+passed as environment variable.
+The container used to create a fake repository is defined
+in cls.fakerepo and will be deleted after the tests.
+
+DULWICH_SWIFT_CFG=/tmp/conf.cfg PYTHONPATH=. python -m unittest \
+    dulwich.tests_swift.test_smoke
+"""
+
+import os
+import unittest
+import tempfile
+import shutil
+
+import gevent
+from gevent import monkey
+monkey.patch_all()
+
+from dulwich import (  # noqa:E402
+    server,
+    repo,
+    index,
+    client,
+    objects,
+    )
+from dulwich.contrib import swift  # noqa:E402
+
+
+class DulwichServer():
+    """Start the TCPGitServer with Swift backend
+    """
+    def __init__(self, backend, port):
+        self.port = port
+        self.backend = backend
+
+    def run(self):
+        self.server = server.TCPGitServer(self.backend,
+                                          'localhost',
+                                          port=self.port)
+        self.job = gevent.spawn(self.server.serve_forever)
+
+    def stop(self):
+        self.server.shutdown()
+        gevent.joinall((self.job,))
+
+
+class SwiftSystemBackend(server.Backend):
+
+    def open_repository(self, path):
+        return swift.SwiftRepo(path, conf=swift.load_conf())
+
+
+class SwiftRepoSmokeTest(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        cls.backend = SwiftSystemBackend()
+        cls.port = 9148
+        cls.server_address = 'localhost'
+        cls.fakerepo = 'fakerepo'
+        cls.th_server = DulwichServer(cls.backend, cls.port)
+        cls.th_server.run()
+        cls.conf = swift.load_conf()
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.th_server.stop()
+
+    def setUp(self):
+        self.scon = swift.SwiftConnector(self.fakerepo, self.conf)
+        if self.scon.test_root_exists():
+            try:
+                self.scon.del_root()
+            except swift.SwiftException:
+                pass
+        self.temp_d = tempfile.mkdtemp()
+        if os.path.isdir(self.temp_d):
+            shutil.rmtree(self.temp_d)
+
+    def tearDown(self):
+        if self.scon.test_root_exists():
+            try:
+                self.scon.del_root()
+            except swift.SwiftException:
+                pass
+        if os.path.isdir(self.temp_d):
+            shutil.rmtree(self.temp_d)
+
+    def test_init_bare(self):
+        swift.SwiftRepo.init_bare(self.scon, self.conf)
+        self.assertTrue(self.scon.test_root_exists())
+        obj = self.scon.get_container_objects()
+        filtered = [o for o in obj if o['name'] == 'info/refs'
+                    or o['name'] == 'objects/pack']
+        self.assertEqual(len(filtered), 2)
+
+    def test_clone_bare(self):
+        local_repo = repo.Repo.init(self.temp_d, mkdir=True)
+        swift.SwiftRepo.init_bare(self.scon, self.conf)
+        tcp_client = client.TCPGitClient(self.server_address,
+                                         port=self.port)
+        remote_refs = tcp_client.fetch(self.fakerepo, local_repo)
+        # The remote repo is empty (no refs retreived)
+        self.assertEqual(remote_refs, None)
+
+    def test_push_commit(self):
+        def determine_wants(*args):
+            return {"refs/heads/master": local_repo.refs["HEAD"]}
+
+        local_repo = repo.Repo.init(self.temp_d, mkdir=True)
+        # Nothing in the staging area
+        local_repo.do_commit('Test commit', 'fbo@localhost')
+        sha = local_repo.refs.read_loose_ref('refs/heads/master')
+        swift.SwiftRepo.init_bare(self.scon, self.conf)
+        tcp_client = client.TCPGitClient(self.server_address,
+                                         port=self.port)
+        tcp_client.send_pack(self.fakerepo,
+                             determine_wants,
+                             local_repo.object_store.generate_pack_contents)
+        swift_repo = swift.SwiftRepo("fakerepo", self.conf)
+        remote_sha = swift_repo.refs.read_loose_ref('refs/heads/master')
+        self.assertEqual(sha, remote_sha)
+
+    def test_push_branch(self):
+        def determine_wants(*args):
+            return {"refs/heads/mybranch":
+                    local_repo.refs["refs/heads/mybranch"]}
+
+        local_repo = repo.Repo.init(self.temp_d, mkdir=True)
+        # Nothing in the staging area
+        local_repo.do_commit('Test commit', 'fbo@localhost',
+                             ref='refs/heads/mybranch')
+        sha = local_repo.refs.read_loose_ref('refs/heads/mybranch')
+        swift.SwiftRepo.init_bare(self.scon, self.conf)
+        tcp_client = client.TCPGitClient(self.server_address,
+                                         port=self.port)
+        tcp_client.send_pack("/fakerepo",
+                             determine_wants,
+                             local_repo.object_store.generate_pack_contents)
+        swift_repo = swift.SwiftRepo(self.fakerepo, self.conf)
+        remote_sha = swift_repo.refs.read_loose_ref('refs/heads/mybranch')
+        self.assertEqual(sha, remote_sha)
+
+    def test_push_multiple_branch(self):
+        def determine_wants(*args):
+            return {"refs/heads/mybranch":
+                    local_repo.refs["refs/heads/mybranch"],
+                    "refs/heads/master":
+                    local_repo.refs["refs/heads/master"],
+                    "refs/heads/pullr-108":
+                    local_repo.refs["refs/heads/pullr-108"]}
+
+        local_repo = repo.Repo.init(self.temp_d, mkdir=True)
+        # Nothing in the staging area
+        local_shas = {}
+        remote_shas = {}
+        for branch in ('master', 'mybranch', 'pullr-108'):
+            local_shas[branch] = local_repo.do_commit(
+                'Test commit %s' % branch, 'fbo@localhost',
+                ref='refs/heads/%s' % branch)
+        swift.SwiftRepo.init_bare(self.scon, self.conf)
+        tcp_client = client.TCPGitClient(self.server_address,
+                                         port=self.port)
+        tcp_client.send_pack(self.fakerepo,
+                             determine_wants,
+                             local_repo.object_store.generate_pack_contents)
+        swift_repo = swift.SwiftRepo("fakerepo", self.conf)
+        for branch in ('master', 'mybranch', 'pullr-108'):
+            remote_shas[branch] = swift_repo.refs.read_loose_ref(
+                'refs/heads/%s' % branch)
+        self.assertDictEqual(local_shas, remote_shas)
+
+    def test_push_data_branch(self):
+        def determine_wants(*args):
+            return {"refs/heads/master": local_repo.refs["HEAD"]}
+        local_repo = repo.Repo.init(self.temp_d, mkdir=True)
+        os.mkdir(os.path.join(self.temp_d, "dir"))
+        files = ('testfile', 'testfile2', 'dir/testfile3')
+        i = 0
+        for f in files:
+            open(os.path.join(self.temp_d, f), 'w').write("DATA %s" % i)
+            i += 1
+        local_repo.stage(files)
+        local_repo.do_commit('Test commit', 'fbo@localhost',
+                             ref='refs/heads/master')
+        swift.SwiftRepo.init_bare(self.scon, self.conf)
+        tcp_client = client.TCPGitClient(self.server_address,
+                                         port=self.port)
+        tcp_client.send_pack(self.fakerepo,
+                             determine_wants,
+                             local_repo.object_store.generate_pack_contents)
+        swift_repo = swift.SwiftRepo("fakerepo", self.conf)
+        commit_sha = swift_repo.refs.read_loose_ref('refs/heads/master')
+        otype, data = swift_repo.object_store.get_raw(commit_sha)
+        commit = objects.ShaFile.from_raw_string(otype, data)
+        otype, data = swift_repo.object_store.get_raw(commit._tree)
+        tree = objects.ShaFile.from_raw_string(otype, data)
+        objs = tree.items()
+        objs_ = []
+        for tree_entry in objs:
+            objs_.append(swift_repo.object_store.get_raw(tree_entry.sha))
+        # Blob
+        self.assertEqual(objs_[1][1], 'DATA 0')
+        self.assertEqual(objs_[2][1], 'DATA 1')
+        # Tree
+        self.assertEqual(objs_[0][0], 2)
+
+    def test_clone_then_push_data(self):
+        self.test_push_data_branch()
+        shutil.rmtree(self.temp_d)
+        local_repo = repo.Repo.init(self.temp_d, mkdir=True)
+        tcp_client = client.TCPGitClient(self.server_address,
+                                         port=self.port)
+        remote_refs = tcp_client.fetch(self.fakerepo, local_repo)
+        files = (os.path.join(self.temp_d, 'testfile'),
+                 os.path.join(self.temp_d, 'testfile2'))
+        local_repo["HEAD"] = remote_refs["refs/heads/master"]
+        indexfile = local_repo.index_path()
+        tree = local_repo["HEAD"].tree
+        index.build_index_from_tree(local_repo.path, indexfile,
+                                    local_repo.object_store, tree)
+        for f in files:
+            self.assertEqual(os.path.isfile(f), True)
+
+        def determine_wants(*args):
+            return {"refs/heads/master": local_repo.refs["HEAD"]}
+        os.mkdir(os.path.join(self.temp_d, "test"))
+        files = ('testfile11', 'testfile22', 'test/testfile33')
+        i = 0
+        for f in files:
+            open(os.path.join(self.temp_d, f), 'w').write("DATA %s" % i)
+            i += 1
+        local_repo.stage(files)
+        local_repo.do_commit('Test commit', 'fbo@localhost',
+                             ref='refs/heads/master')
+        tcp_client.send_pack("/fakerepo",
+                             determine_wants,
+                             local_repo.object_store.generate_pack_contents)
+
+    def test_push_remove_branch(self):
+        def determine_wants(*args):
+            return {"refs/heads/pullr-108": objects.ZERO_SHA,
+                    "refs/heads/master":
+                    local_repo.refs['refs/heads/master'],
+                    "refs/heads/mybranch":
+                    local_repo.refs['refs/heads/mybranch'],
+                    }
+        self.test_push_multiple_branch()
+        local_repo = repo.Repo(self.temp_d)
+        tcp_client = client.TCPGitClient(self.server_address,
+                                         port=self.port)
+        tcp_client.send_pack(self.fakerepo,
+                             determine_wants,
+                             local_repo.object_store.generate_pack_contents)
+        swift_repo = swift.SwiftRepo("fakerepo", self.conf)
+        self.assertNotIn('refs/heads/pullr-108', swift_repo.refs.allkeys())
+
+    def test_push_annotated_tag(self):
+        def determine_wants(*args):
+            return {"refs/heads/master": local_repo.refs["HEAD"],
+                    "refs/tags/v1.0": local_repo.refs["refs/tags/v1.0"]}
+        local_repo = repo.Repo.init(self.temp_d, mkdir=True)
+        # Nothing in the staging area
+        sha = local_repo.do_commit('Test commit', 'fbo@localhost')
+        otype, data = local_repo.object_store.get_raw(sha)
+        commit = objects.ShaFile.from_raw_string(otype, data)
+        tag = objects.Tag()
+        tag.tagger = "fbo@localhost"
+        tag.message = "Annotated tag"
+        tag.tag_timezone = objects.parse_timezone('-0200')[0]
+        tag.tag_time = commit.author_time
+        tag.object = (objects.Commit, commit.id)
+        tag.name = "v0.1"
+        local_repo.object_store.add_object(tag)
+        local_repo.refs['refs/tags/v1.0'] = tag.id
+        swift.SwiftRepo.init_bare(self.scon, self.conf)
+        tcp_client = client.TCPGitClient(self.server_address,
+                                         port=self.port)
+        tcp_client.send_pack(self.fakerepo,
+                             determine_wants,
+                             local_repo.object_store.generate_pack_contents)
+        swift_repo = swift.SwiftRepo(self.fakerepo, self.conf)
+        tag_sha = swift_repo.refs.read_loose_ref('refs/tags/v1.0')
+        otype, data = swift_repo.object_store.get_raw(tag_sha)
+        rtag = objects.ShaFile.from_raw_string(otype, data)
+        self.assertEqual(rtag.object[1], commit.id)
+        self.assertEqual(rtag.id, tag.id)
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 600 - 0
dulwich/diff_tree.py

@@ -0,0 +1,600 @@
+# diff_tree.py -- Utilities for diffing files and trees.
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Utilities for diffing files and trees."""
+import sys
+from collections import (
+    defaultdict,
+    namedtuple,
+    )
+
+from io import BytesIO
+from itertools import chain
+import stat
+
+from dulwich.objects import (
+    S_ISGITLINK,
+    TreeEntry,
+    )
+
+
+# TreeChange type constants.
+CHANGE_ADD = 'add'
+CHANGE_MODIFY = 'modify'
+CHANGE_DELETE = 'delete'
+CHANGE_RENAME = 'rename'
+CHANGE_COPY = 'copy'
+CHANGE_UNCHANGED = 'unchanged'
+
+RENAME_CHANGE_TYPES = (CHANGE_RENAME, CHANGE_COPY)
+
+_NULL_ENTRY = TreeEntry(None, None, None)
+
+_MAX_SCORE = 100
+RENAME_THRESHOLD = 60
+MAX_FILES = 200
+REWRITE_THRESHOLD = None
+
+
+class TreeChange(namedtuple('TreeChange', ['type', 'old', 'new'])):
+    """Named tuple a single change between two trees."""
+
+    @classmethod
+    def add(cls, new):
+        return cls(CHANGE_ADD, _NULL_ENTRY, new)
+
+    @classmethod
+    def delete(cls, old):
+        return cls(CHANGE_DELETE, old, _NULL_ENTRY)
+
+
+def _tree_entries(path, tree):
+    result = []
+    if not tree:
+        return result
+    for entry in tree.iteritems(name_order=True):
+        result.append(entry.in_path(path))
+    return result
+
+
+def _merge_entries(path, tree1, tree2):
+    """Merge the entries of two trees.
+
+    :param path: A path to prepend to all tree entry names.
+    :param tree1: The first Tree object to iterate, or None.
+    :param tree2: The second Tree object to iterate, or None.
+    :return: A list of pairs of TreeEntry objects for each pair of entries in
+        the trees. If an entry exists in one tree but not the other, the other
+        entry will have all attributes set to None. If neither entry's path is
+        None, they are guaranteed to match.
+    """
+    entries1 = _tree_entries(path, tree1)
+    entries2 = _tree_entries(path, tree2)
+    i1 = i2 = 0
+    len1 = len(entries1)
+    len2 = len(entries2)
+
+    result = []
+    while i1 < len1 and i2 < len2:
+        entry1 = entries1[i1]
+        entry2 = entries2[i2]
+        if entry1.path < entry2.path:
+            result.append((entry1, _NULL_ENTRY))
+            i1 += 1
+        elif entry1.path > entry2.path:
+            result.append((_NULL_ENTRY, entry2))
+            i2 += 1
+        else:
+            result.append((entry1, entry2))
+            i1 += 1
+            i2 += 1
+    for i in range(i1, len1):
+        result.append((entries1[i], _NULL_ENTRY))
+    for i in range(i2, len2):
+        result.append((_NULL_ENTRY, entries2[i]))
+    return result
+
+
+def _is_tree(entry):
+    mode = entry.mode
+    if mode is None:
+        return False
+    return stat.S_ISDIR(mode)
+
+
+def walk_trees(store, tree1_id, tree2_id, prune_identical=False):
+    """Recursively walk all the entries of two trees.
+
+    Iteration is depth-first pre-order, as in e.g. os.walk.
+
+    :param store: An ObjectStore for looking up objects.
+    :param tree1_id: The SHA of the first Tree object to iterate, or None.
+    :param tree2_id: The SHA of the second Tree object to iterate, or None.
+    :param prune_identical: If True, identical subtrees will not be walked.
+    :return: Iterator over Pairs of TreeEntry objects for each pair of entries
+        in the trees and their subtrees recursively. If an entry exists in one
+        tree but not the other, the other entry will have all attributes set
+        to None. If neither entry's path is None, they are guaranteed to
+        match.
+    """
+    # This could be fairly easily generalized to >2 trees if we find a use
+    # case.
+    mode1 = tree1_id and stat.S_IFDIR or None
+    mode2 = tree2_id and stat.S_IFDIR or None
+    todo = [(TreeEntry(b'', mode1, tree1_id), TreeEntry(b'', mode2, tree2_id))]
+    while todo:
+        entry1, entry2 = todo.pop()
+        is_tree1 = _is_tree(entry1)
+        is_tree2 = _is_tree(entry2)
+        if prune_identical and is_tree1 and is_tree2 and entry1 == entry2:
+            continue
+
+        tree1 = is_tree1 and store[entry1.sha] or None
+        tree2 = is_tree2 and store[entry2.sha] or None
+        path = entry1.path or entry2.path
+        todo.extend(reversed(_merge_entries(path, tree1, tree2)))
+        yield entry1, entry2
+
+
+def _skip_tree(entry):
+    if entry.mode is None or stat.S_ISDIR(entry.mode):
+        return _NULL_ENTRY
+    return entry
+
+
+def tree_changes(store, tree1_id, tree2_id, want_unchanged=False,
+                 rename_detector=None):
+    """Find the differences between the contents of two trees.
+
+    :param store: An ObjectStore for looking up objects.
+    :param tree1_id: The SHA of the source tree.
+    :param tree2_id: The SHA of the target tree.
+    :param want_unchanged: If True, include TreeChanges for unmodified entries
+        as well.
+    :param rename_detector: RenameDetector object for detecting renames.
+    :return: Iterator over TreeChange instances for each change between the
+        source and target tree.
+    """
+    if (rename_detector is not None and tree1_id is not None and
+            tree2_id is not None):
+        for change in rename_detector.changes_with_renames(
+                tree1_id, tree2_id, want_unchanged=want_unchanged):
+            yield change
+        return
+
+    entries = walk_trees(store, tree1_id, tree2_id,
+                         prune_identical=(not want_unchanged))
+    for entry1, entry2 in entries:
+        if entry1 == entry2 and not want_unchanged:
+            continue
+
+        # Treat entries for trees as missing.
+        entry1 = _skip_tree(entry1)
+        entry2 = _skip_tree(entry2)
+
+        if entry1 != _NULL_ENTRY and entry2 != _NULL_ENTRY:
+            if stat.S_IFMT(entry1.mode) != stat.S_IFMT(entry2.mode):
+                # File type changed: report as delete/add.
+                yield TreeChange.delete(entry1)
+                entry1 = _NULL_ENTRY
+                change_type = CHANGE_ADD
+            elif entry1 == entry2:
+                change_type = CHANGE_UNCHANGED
+            else:
+                change_type = CHANGE_MODIFY
+        elif entry1 != _NULL_ENTRY:
+            change_type = CHANGE_DELETE
+        elif entry2 != _NULL_ENTRY:
+            change_type = CHANGE_ADD
+        else:
+            # Both were None because at least one was a tree.
+            continue
+        yield TreeChange(change_type, entry1, entry2)
+
+
+def _all_eq(seq, key, value):
+    for e in seq:
+        if key(e) != value:
+            return False
+    return True
+
+
+def _all_same(seq, key):
+    return _all_eq(seq[1:], key, key(seq[0]))
+
+
+def tree_changes_for_merge(store, parent_tree_ids, tree_id,
+                           rename_detector=None):
+    """Get the tree changes for a merge tree relative to all its parents.
+
+    :param store: An ObjectStore for looking up objects.
+    :param parent_tree_ids: An iterable of the SHAs of the parent trees.
+    :param tree_id: The SHA of the merge tree.
+    :param rename_detector: RenameDetector object for detecting renames.
+
+    :return: Iterator over lists of TreeChange objects, one per conflicted path
+        in the merge.
+
+        Each list contains one element per parent, with the TreeChange for that
+        path relative to that parent. An element may be None if it never
+        existed in one parent and was deleted in two others.
+
+        A path is only included in the output if it is a conflict, i.e. its SHA
+        in the merge tree is not found in any of the parents, or in the case of
+        deletes, if not all of the old SHAs match.
+    """
+    all_parent_changes = [tree_changes(store, t, tree_id,
+                                       rename_detector=rename_detector)
+                          for t in parent_tree_ids]
+    num_parents = len(parent_tree_ids)
+    changes_by_path = defaultdict(lambda: [None] * num_parents)
+
+    # Organize by path.
+    for i, parent_changes in enumerate(all_parent_changes):
+        for change in parent_changes:
+            if change.type == CHANGE_DELETE:
+                path = change.old.path
+            else:
+                path = change.new.path
+            changes_by_path[path][i] = change
+
+    def old_sha(c):
+        return c.old.sha
+
+    def change_type(c):
+        return c.type
+
+    # Yield only conflicting changes.
+    for _, changes in sorted(changes_by_path.items()):
+        assert len(changes) == num_parents
+        have = [c for c in changes if c is not None]
+        if _all_eq(have, change_type, CHANGE_DELETE):
+            if not _all_same(have, old_sha):
+                yield changes
+        elif not _all_same(have, change_type):
+            yield changes
+        elif None not in changes:
+            # If no change was found relative to one parent, that means the SHA
+            # must have matched the SHA in that parent, so it is not a
+            # conflict.
+            yield changes
+
+
+_BLOCK_SIZE = 64
+
+
+def _count_blocks(obj):
+    """Count the blocks in an object.
+
+    Splits the data into blocks either on lines or <=64-byte chunks of lines.
+
+    :param obj: The object to count blocks for.
+    :return: A dict of block hashcode -> total bytes occurring.
+    """
+    block_counts = defaultdict(int)
+    block = BytesIO()
+    n = 0
+
+    # Cache attrs as locals to avoid expensive lookups in the inner loop.
+    block_write = block.write
+    block_seek = block.seek
+    block_truncate = block.truncate
+    block_getvalue = block.getvalue
+
+    for c in chain(*obj.as_raw_chunks()):
+        if sys.version_info[0] == 3:
+            c = c.to_bytes(1, 'big')
+        block_write(c)
+        n += 1
+        if c == b'\n' or n == _BLOCK_SIZE:
+            value = block_getvalue()
+            block_counts[hash(value)] += len(value)
+            block_seek(0)
+            block_truncate()
+            n = 0
+    if n > 0:
+        last_block = block_getvalue()
+        block_counts[hash(last_block)] += len(last_block)
+    return block_counts
+
+
+def _common_bytes(blocks1, blocks2):
+    """Count the number of common bytes in two block count dicts.
+
+    :param block1: The first dict of block hashcode -> total bytes.
+    :param block2: The second dict of block hashcode -> total bytes.
+    :return: The number of bytes in common between blocks1 and blocks2. This is
+        only approximate due to possible hash collisions.
+    """
+    # Iterate over the smaller of the two dicts, since this is symmetrical.
+    if len(blocks1) > len(blocks2):
+        blocks1, blocks2 = blocks2, blocks1
+    score = 0
+    for block, count1 in blocks1.items():
+        count2 = blocks2.get(block)
+        if count2:
+            score += min(count1, count2)
+    return score
+
+
+def _similarity_score(obj1, obj2, block_cache=None):
+    """Compute a similarity score for two objects.
+
+    :param obj1: The first object to score.
+    :param obj2: The second object to score.
+    :param block_cache: An optional dict of SHA to block counts to cache
+        results between calls.
+    :return: The similarity score between the two objects, defined as the
+        number of bytes in common between the two objects divided by the
+        maximum size, scaled to the range 0-100.
+    """
+    if block_cache is None:
+        block_cache = {}
+    if obj1.id not in block_cache:
+        block_cache[obj1.id] = _count_blocks(obj1)
+    if obj2.id not in block_cache:
+        block_cache[obj2.id] = _count_blocks(obj2)
+
+    common_bytes = _common_bytes(block_cache[obj1.id], block_cache[obj2.id])
+    max_size = max(obj1.raw_length(), obj2.raw_length())
+    if not max_size:
+        return _MAX_SCORE
+    return int(float(common_bytes) * _MAX_SCORE / max_size)
+
+
+def _tree_change_key(entry):
+    # Sort by old path then new path. If only one exists, use it for both keys.
+    path1 = entry.old.path
+    path2 = entry.new.path
+    if path1 is None:
+        path1 = path2
+    if path2 is None:
+        path2 = path1
+    return (path1, path2)
+
+
+class RenameDetector(object):
+    """Object for handling rename detection between two trees."""
+
+    def __init__(self, store, rename_threshold=RENAME_THRESHOLD,
+                 max_files=MAX_FILES,
+                 rewrite_threshold=REWRITE_THRESHOLD,
+                 find_copies_harder=False):
+        """Initialize the rename detector.
+
+        :param store: An ObjectStore for looking up objects.
+        :param rename_threshold: The threshold similarity score for considering
+            an add/delete pair to be a rename/copy; see _similarity_score.
+        :param max_files: The maximum number of adds and deletes to consider,
+            or None for no limit. The detector is guaranteed to compare no more
+            than max_files ** 2 add/delete pairs. This limit is provided
+            because rename detection can be quadratic in the project size. If
+            the limit is exceeded, no content rename detection is attempted.
+        :param rewrite_threshold: The threshold similarity score below which a
+            modify should be considered a delete/add, or None to not break
+            modifies; see _similarity_score.
+        :param find_copies_harder: If True, consider unmodified files when
+            detecting copies.
+        """
+        self._store = store
+        self._rename_threshold = rename_threshold
+        self._rewrite_threshold = rewrite_threshold
+        self._max_files = max_files
+        self._find_copies_harder = find_copies_harder
+        self._want_unchanged = False
+
+    def _reset(self):
+        self._adds = []
+        self._deletes = []
+        self._changes = []
+
+    def _should_split(self, change):
+        if (self._rewrite_threshold is None or change.type != CHANGE_MODIFY or
+                change.old.sha == change.new.sha):
+            return False
+        old_obj = self._store[change.old.sha]
+        new_obj = self._store[change.new.sha]
+        return _similarity_score(old_obj, new_obj) < self._rewrite_threshold
+
+    def _add_change(self, change):
+        if change.type == CHANGE_ADD:
+            self._adds.append(change)
+        elif change.type == CHANGE_DELETE:
+            self._deletes.append(change)
+        elif self._should_split(change):
+            self._deletes.append(TreeChange.delete(change.old))
+            self._adds.append(TreeChange.add(change.new))
+        elif ((self._find_copies_harder and change.type == CHANGE_UNCHANGED)
+              or change.type == CHANGE_MODIFY):
+            # Treat all modifies as potential deletes for rename detection,
+            # but don't split them (to avoid spurious renames). Setting
+            # find_copies_harder means we treat unchanged the same as
+            # modified.
+            self._deletes.append(change)
+        else:
+            self._changes.append(change)
+
+    def _collect_changes(self, tree1_id, tree2_id):
+        want_unchanged = self._find_copies_harder or self._want_unchanged
+        for change in tree_changes(self._store, tree1_id, tree2_id,
+                                   want_unchanged=want_unchanged):
+            self._add_change(change)
+
+    def _prune(self, add_paths, delete_paths):
+        self._adds = [a for a in self._adds if a.new.path not in add_paths]
+        self._deletes = [d for d in self._deletes
+                         if d.old.path not in delete_paths]
+
+    def _find_exact_renames(self):
+        add_map = defaultdict(list)
+        for add in self._adds:
+            add_map[add.new.sha].append(add.new)
+        delete_map = defaultdict(list)
+        for delete in self._deletes:
+            # Keep track of whether the delete was actually marked as a delete.
+            # If not, it needs to be marked as a copy.
+            is_delete = delete.type == CHANGE_DELETE
+            delete_map[delete.old.sha].append((delete.old, is_delete))
+
+        add_paths = set()
+        delete_paths = set()
+        for sha, sha_deletes in delete_map.items():
+            sha_adds = add_map[sha]
+            for (old, is_delete), new in zip(sha_deletes, sha_adds):
+                if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
+                    continue
+                if is_delete:
+                    delete_paths.add(old.path)
+                add_paths.add(new.path)
+                new_type = is_delete and CHANGE_RENAME or CHANGE_COPY
+                self._changes.append(TreeChange(new_type, old, new))
+
+            num_extra_adds = len(sha_adds) - len(sha_deletes)
+            # TODO(dborowitz): Less arbitrary way of dealing with extra copies.
+            old = sha_deletes[0][0]
+            if num_extra_adds > 0:
+                for new in sha_adds[-num_extra_adds:]:
+                    add_paths.add(new.path)
+                    self._changes.append(TreeChange(CHANGE_COPY, old, new))
+        self._prune(add_paths, delete_paths)
+
+    def _should_find_content_renames(self):
+        return len(self._adds) * len(self._deletes) <= self._max_files ** 2
+
+    def _rename_type(self, check_paths, delete, add):
+        if check_paths and delete.old.path == add.new.path:
+            # If the paths match, this must be a split modify, so make sure it
+            # comes out as a modify.
+            return CHANGE_MODIFY
+        elif delete.type != CHANGE_DELETE:
+            # If it's in deletes but not marked as a delete, it must have been
+            # added due to find_copies_harder, and needs to be marked as a
+            # copy.
+            return CHANGE_COPY
+        return CHANGE_RENAME
+
+    def _find_content_rename_candidates(self):
+        candidates = self._candidates = []
+        # TODO: Optimizations:
+        #  - Compare object sizes before counting blocks.
+        #  - Skip if delete's S_IFMT differs from all adds.
+        #  - Skip if adds or deletes is empty.
+        # Match C git's behavior of not attempting to find content renames if
+        # the matrix size exceeds the threshold.
+        if not self._should_find_content_renames():
+            return
+
+        block_cache = {}
+        check_paths = self._rename_threshold is not None
+        for delete in self._deletes:
+            if S_ISGITLINK(delete.old.mode):
+                continue  # Git links don't exist in this repo.
+            old_sha = delete.old.sha
+            old_obj = self._store[old_sha]
+            block_cache[old_sha] = _count_blocks(old_obj)
+            for add in self._adds:
+                if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
+                    continue
+                new_obj = self._store[add.new.sha]
+                score = _similarity_score(old_obj, new_obj,
+                                          block_cache=block_cache)
+                if score > self._rename_threshold:
+                    new_type = self._rename_type(check_paths, delete, add)
+                    rename = TreeChange(new_type, delete.old, add.new)
+                    candidates.append((-score, rename))
+
+    def _choose_content_renames(self):
+        # Sort scores from highest to lowest, but keep names in ascending
+        # order.
+        self._candidates.sort()
+
+        delete_paths = set()
+        add_paths = set()
+        for _, change in self._candidates:
+            new_path = change.new.path
+            if new_path in add_paths:
+                continue
+            old_path = change.old.path
+            orig_type = change.type
+            if old_path in delete_paths:
+                change = TreeChange(CHANGE_COPY, change.old, change.new)
+
+            # If the candidate was originally a copy, that means it came from a
+            # modified or unchanged path, so we don't want to prune it.
+            if orig_type != CHANGE_COPY:
+                delete_paths.add(old_path)
+            add_paths.add(new_path)
+            self._changes.append(change)
+        self._prune(add_paths, delete_paths)
+
+    def _join_modifies(self):
+        if self._rewrite_threshold is None:
+            return
+
+        modifies = {}
+        delete_map = dict((d.old.path, d) for d in self._deletes)
+        for add in self._adds:
+            path = add.new.path
+            delete = delete_map.get(path)
+            if (delete is not None and
+                    stat.S_IFMT(delete.old.mode) == stat.S_IFMT(add.new.mode)):
+                modifies[path] = TreeChange(CHANGE_MODIFY, delete.old, add.new)
+
+        self._adds = [a for a in self._adds if a.new.path not in modifies]
+        self._deletes = [a for a in self._deletes if a.new.path not in
+                         modifies]
+        self._changes += modifies.values()
+
+    def _sorted_changes(self):
+        result = []
+        result.extend(self._adds)
+        result.extend(self._deletes)
+        result.extend(self._changes)
+        result.sort(key=_tree_change_key)
+        return result
+
+    def _prune_unchanged(self):
+        if self._want_unchanged:
+            return
+        self._deletes = [
+            d for d in self._deletes if d.type != CHANGE_UNCHANGED]
+
+    def changes_with_renames(self, tree1_id, tree2_id, want_unchanged=False):
+        """Iterate TreeChanges between two tree SHAs, with rename detection."""
+        self._reset()
+        self._want_unchanged = want_unchanged
+        self._collect_changes(tree1_id, tree2_id)
+        self._find_exact_renames()
+        self._find_content_rename_candidates()
+        self._choose_content_renames()
+        self._join_modifies()
+        self._prune_unchanged()
+        return self._sorted_changes()
+
+
+# Hold on to the pure-python implementations for testing.
+_is_tree_py = _is_tree
+_merge_entries_py = _merge_entries
+_count_blocks_py = _count_blocks
+try:
+    # Try to import C versions
+    from dulwich._diff_tree import _is_tree, _merge_entries, _count_blocks
+except ImportError:
+    pass

+ 180 - 0
dulwich/errors.py

@@ -0,0 +1,180 @@
+# errors.py -- errors for dulwich
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copyright (C) 2009-2012 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Dulwich-related exception classes and utility functions."""
+
+import binascii
+
+
+class ChecksumMismatch(Exception):
+    """A checksum didn't match the expected contents."""
+
+    def __init__(self, expected, got, extra=None):
+        if len(expected) == 20:
+            expected = binascii.hexlify(expected)
+        if len(got) == 20:
+            got = binascii.hexlify(got)
+        self.expected = expected
+        self.got = got
+        self.extra = extra
+        if self.extra is None:
+            Exception.__init__(
+                self, "Checksum mismatch: Expected %s, got %s" %
+                (expected, got))
+        else:
+            Exception.__init__(
+                self, "Checksum mismatch: Expected %s, got %s; %s" %
+                (expected, got, extra))
+
+
+class WrongObjectException(Exception):
+    """Baseclass for all the _ is not a _ exceptions on objects.
+
+    Do not instantiate directly.
+
+    Subclasses should define a type_name attribute that indicates what
+    was expected if they were raised.
+    """
+
+    def __init__(self, sha, *args, **kwargs):
+        Exception.__init__(self, "%s is not a %s" % (sha, self.type_name))
+
+
+class NotCommitError(WrongObjectException):
+    """Indicates that the sha requested does not point to a commit."""
+
+    type_name = 'commit'
+
+
+class NotTreeError(WrongObjectException):
+    """Indicates that the sha requested does not point to a tree."""
+
+    type_name = 'tree'
+
+
+class NotTagError(WrongObjectException):
+    """Indicates that the sha requested does not point to a tag."""
+
+    type_name = 'tag'
+
+
+class NotBlobError(WrongObjectException):
+    """Indicates that the sha requested does not point to a blob."""
+
+    type_name = 'blob'
+
+
+class MissingCommitError(Exception):
+    """Indicates that a commit was not found in the repository"""
+
+    def __init__(self, sha, *args, **kwargs):
+        self.sha = sha
+        Exception.__init__(self, "%s is not in the revision store" % sha)
+
+
+class ObjectMissing(Exception):
+    """Indicates that a requested object is missing."""
+
+    def __init__(self, sha, *args, **kwargs):
+        Exception.__init__(self, "%s is not in the pack" % sha)
+
+
+class ApplyDeltaError(Exception):
+    """Indicates that applying a delta failed."""
+
+    def __init__(self, *args, **kwargs):
+        Exception.__init__(self, *args, **kwargs)
+
+
+class NotGitRepository(Exception):
+    """Indicates that no Git repository was found."""
+
+    def __init__(self, *args, **kwargs):
+        Exception.__init__(self, *args, **kwargs)
+
+
+class GitProtocolError(Exception):
+    """Git protocol exception."""
+
+    def __init__(self, *args, **kwargs):
+        Exception.__init__(self, *args, **kwargs)
+
+
+class SendPackError(GitProtocolError):
+    """An error occurred during send_pack."""
+
+    def __init__(self, *args, **kwargs):
+        Exception.__init__(self, *args, **kwargs)
+
+
+class UpdateRefsError(GitProtocolError):
+    """The server reported errors updating refs."""
+
+    def __init__(self, *args, **kwargs):
+        self.ref_status = kwargs.pop('ref_status')
+        Exception.__init__(self, *args, **kwargs)
+
+
+class HangupException(GitProtocolError):
+    """Hangup exception."""
+
+    def __init__(self):
+        Exception.__init__(
+            self, "The remote server unexpectedly closed the connection.")
+
+
+class UnexpectedCommandError(GitProtocolError):
+    """Unexpected command received in a proto line."""
+
+    def __init__(self, command):
+        if command is None:
+            command = 'flush-pkt'
+        else:
+            command = 'command %s' % command
+        GitProtocolError.__init__(self, 'Protocol got unexpected %s' % command)
+
+
+class FileFormatException(Exception):
+    """Base class for exceptions relating to reading git file formats."""
+
+
+class PackedRefsException(FileFormatException):
+    """Indicates an error parsing a packed-refs file."""
+
+
+class ObjectFormatException(FileFormatException):
+    """Indicates an error parsing an object."""
+
+
+class NoIndexPresent(Exception):
+    """No index is present."""
+
+
+class CommitError(Exception):
+    """An error occurred while performing a commit."""
+
+
+class RefFormatError(Exception):
+    """Indicates an invalid ref name."""
+
+
+class HookError(Exception):
+    """An error occurred while executing a hook."""

+ 250 - 0
dulwich/fastexport.py

@@ -0,0 +1,250 @@
+# __init__.py -- Fast export/import functionality
+# Copyright (C) 2010-2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+
+"""Fast export/import functionality."""
+
+import sys
+
+from dulwich.index import (
+    commit_tree,
+    )
+from dulwich.objects import (
+    Blob,
+    Commit,
+    Tag,
+    ZERO_SHA,
+    )
+from fastimport import __version__ as fastimport_version
+if (fastimport_version <= (0, 9, 5) and
+        sys.version_info[0] == 3 and sys.version_info[1] < 5):
+    raise ImportError("Older versions of fastimport don't support python3<3.5")
+from fastimport import (  # noqa: E402
+    commands,
+    errors as fastimport_errors,
+    parser,
+    processor,
+    )
+
+import stat  # noqa: E402
+
+
+def split_email(text):
+    (name, email) = text.rsplit(b" <", 1)
+    return (name, email.rstrip(b">"))
+
+
+class GitFastExporter(object):
+    """Generate a fast-export output stream for Git objects."""
+
+    def __init__(self, outf, store):
+        self.outf = outf
+        self.store = store
+        self.markers = {}
+        self._marker_idx = 0
+
+    def print_cmd(self, cmd):
+        self.outf.write(getattr(cmd, "__bytes__", cmd.__repr__)() + b"\n")
+
+    def _allocate_marker(self):
+        self._marker_idx += 1
+        return ("%d" % (self._marker_idx,)).encode('ascii')
+
+    def _export_blob(self, blob):
+        marker = self._allocate_marker()
+        self.markers[marker] = blob.id
+        return (commands.BlobCommand(marker, blob.data), marker)
+
+    def emit_blob(self, blob):
+        (cmd, marker) = self._export_blob(blob)
+        self.print_cmd(cmd)
+        return marker
+
+    def _iter_files(self, base_tree, new_tree):
+        for ((old_path, new_path), (old_mode, new_mode),
+             (old_hexsha, new_hexsha)) in \
+                self.store.tree_changes(base_tree, new_tree):
+            if new_path is None:
+                yield commands.FileDeleteCommand(old_path)
+                continue
+            if not stat.S_ISDIR(new_mode):
+                blob = self.store[new_hexsha]
+                marker = self.emit_blob(blob)
+            if old_path != new_path and old_path is not None:
+                yield commands.FileRenameCommand(old_path, new_path)
+            if old_mode != new_mode or old_hexsha != new_hexsha:
+                prefixed_marker = b':' + marker
+                yield commands.FileModifyCommand(
+                    new_path, new_mode, prefixed_marker, None
+                )
+
+    def _export_commit(self, commit, ref, base_tree=None):
+        file_cmds = list(self._iter_files(base_tree, commit.tree))
+        marker = self._allocate_marker()
+        if commit.parents:
+            from_ = commit.parents[0]
+            merges = commit.parents[1:]
+        else:
+            from_ = None
+            merges = []
+        author, author_email = split_email(commit.author)
+        committer, committer_email = split_email(commit.committer)
+        cmd = commands.CommitCommand(
+            ref, marker,
+            (author, author_email, commit.author_time, commit.author_timezone),
+            (committer, committer_email, commit.commit_time,
+                commit.commit_timezone),
+            commit.message, from_, merges, file_cmds)
+        return (cmd, marker)
+
+    def emit_commit(self, commit, ref, base_tree=None):
+        cmd, marker = self._export_commit(commit, ref, base_tree)
+        self.print_cmd(cmd)
+        return marker
+
+
+class GitImportProcessor(processor.ImportProcessor):
+    """An import processor that imports into a Git repository using Dulwich.
+
+    """
+    # FIXME: Batch creation of objects?
+
+    def __init__(self, repo, params=None, verbose=False, outf=None):
+        processor.ImportProcessor.__init__(self, params, verbose)
+        self.repo = repo
+        self.last_commit = ZERO_SHA
+        self.markers = {}
+        self._contents = {}
+
+    def import_stream(self, stream):
+        p = parser.ImportParser(stream)
+        self.process(p.iter_commands)
+        return self.markers
+
+    def blob_handler(self, cmd):
+        """Process a BlobCommand."""
+        blob = Blob.from_string(cmd.data)
+        self.repo.object_store.add_object(blob)
+        if cmd.mark:
+            self.markers[cmd.mark] = blob.id
+
+    def checkpoint_handler(self, cmd):
+        """Process a CheckpointCommand."""
+        pass
+
+    def commit_handler(self, cmd):
+        """Process a CommitCommand."""
+        commit = Commit()
+        if cmd.author is not None:
+            author = cmd.author
+        else:
+            author = cmd.committer
+        (author_name, author_email, author_timestamp, author_timezone) = author
+        (committer_name, committer_email, commit_timestamp,
+            commit_timezone) = cmd.committer
+        commit.author = author_name + b" <" + author_email + b">"
+        commit.author_timezone = author_timezone
+        commit.author_time = int(author_timestamp)
+        commit.committer = committer_name + b" <" + committer_email + b">"
+        commit.commit_timezone = commit_timezone
+        commit.commit_time = int(commit_timestamp)
+        commit.message = cmd.message
+        commit.parents = []
+        if cmd.from_:
+            self._reset_base(cmd.from_)
+        for filecmd in cmd.iter_files():
+            if filecmd.name == b"filemodify":
+                if filecmd.data is not None:
+                    blob = Blob.from_string(filecmd.data)
+                    self.repo.object_store.add(blob)
+                    blob_id = blob.id
+                else:
+                    assert filecmd.dataref.startswith(b":"), \
+                           ("non-marker refs not supported yet (%r)" %
+                            filecmd.dataref)
+                    blob_id = self.markers[filecmd.dataref[1:]]
+                self._contents[filecmd.path] = (filecmd.mode, blob_id)
+            elif filecmd.name == b"filedelete":
+                del self._contents[filecmd.path]
+            elif filecmd.name == b"filecopy":
+                self._contents[filecmd.dest_path] = self._contents[
+                    filecmd.src_path]
+            elif filecmd.name == b"filerename":
+                self._contents[filecmd.new_path] = self._contents[
+                    filecmd.old_path]
+                del self._contents[filecmd.old_path]
+            elif filecmd.name == b"filedeleteall":
+                self._contents = {}
+            else:
+                raise Exception("Command %s not supported" % filecmd.name)
+        commit.tree = commit_tree(
+            self.repo.object_store,
+            ((path, hexsha, mode) for (path, (mode, hexsha)) in
+                self._contents.items()))
+        if self.last_commit != ZERO_SHA:
+            commit.parents.append(self.last_commit)
+        for merge in cmd.merges:
+            if merge.startswith(b':'):
+                merge = self.markers[merge[1:]]
+            commit.parents.append(merge)
+        self.repo.object_store.add_object(commit)
+        self.repo[cmd.ref] = commit.id
+        self.last_commit = commit.id
+        if cmd.mark:
+            self.markers[cmd.mark] = commit.id
+
+    def progress_handler(self, cmd):
+        """Process a ProgressCommand."""
+        pass
+
+    def _reset_base(self, commit_id):
+        if self.last_commit == commit_id:
+            return
+        self._contents = {}
+        self.last_commit = commit_id
+        if commit_id != ZERO_SHA:
+            tree_id = self.repo[commit_id].tree
+            for (path, mode, hexsha) in (
+                    self.repo.object_store.iter_tree_contents(tree_id)):
+                self._contents[path] = (mode, hexsha)
+
+    def reset_handler(self, cmd):
+        """Process a ResetCommand."""
+        if cmd.from_ is None:
+            from_ = ZERO_SHA
+        else:
+            from_ = cmd.from_
+            if from_.startswith(b":"):
+                from_ = self.markers[from_[1:]]
+        self._reset_base(from_)
+        self.repo.refs[cmd.ref] = from_
+
+    def tag_handler(self, cmd):
+        """Process a TagCommand."""
+        tag = Tag()
+        tag.tagger = cmd.tagger
+        tag.message = cmd.message
+        tag.name = cmd.tag
+        self.repo.add_object(tag)
+        self.repo.refs["refs/tags/" + tag.name] = tag.id
+
+    def feature_handler(self, cmd):
+        """Process a FeatureCommand."""
+        raise fastimport_errors.UnknownFeature(cmd.feature_name)

+ 176 - 0
dulwich/file.py

@@ -0,0 +1,176 @@
+# file.py -- Safe access to git files
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Safe access to git files."""
+
+import errno
+import io
+import os
+import sys
+import tempfile
+
+
+def ensure_dir_exists(dirname):
+    """Ensure a directory exists, creating if necessary."""
+    try:
+        os.makedirs(dirname)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
+
+def _fancy_rename(oldname, newname):
+    """Rename file with temporary backup file to rollback if rename fails"""
+    if not os.path.exists(newname):
+        try:
+            os.rename(oldname, newname)
+        except OSError:
+            raise
+        return
+
+    # destination file exists
+    try:
+        (fd, tmpfile) = tempfile.mkstemp(".tmp", prefix=oldname+".", dir=".")
+        os.close(fd)
+        os.remove(tmpfile)
+    except OSError:
+        # either file could not be created (e.g. permission problem)
+        # or could not be deleted (e.g. rude virus scanner)
+        raise
+    try:
+        os.rename(newname, tmpfile)
+    except OSError:
+        raise   # no rename occurred
+    try:
+        os.rename(oldname, newname)
+    except OSError:
+        os.rename(tmpfile, newname)
+        raise
+    os.remove(tmpfile)
+
+
+def GitFile(filename, mode='rb', bufsize=-1):
+    """Create a file object that obeys the git file locking protocol.
+
+    :return: a builtin file object or a _GitFile object
+
+    :note: See _GitFile for a description of the file locking protocol.
+
+    Only read-only and write-only (binary) modes are supported; r+, w+, and a
+    are not.  To read and write from the same file, you can take advantage of
+    the fact that opening a file for write does not actually open the file you
+    request.
+    """
+    if 'a' in mode:
+        raise IOError('append mode not supported for Git files')
+    if '+' in mode:
+        raise IOError('read/write mode not supported for Git files')
+    if 'b' not in mode:
+        raise IOError('text mode not supported for Git files')
+    if 'w' in mode:
+        return _GitFile(filename, mode, bufsize)
+    else:
+        return io.open(filename, mode, bufsize)
+
+
+class _GitFile(object):
+    """File that follows the git locking protocol for writes.
+
+    All writes to a file foo will be written into foo.lock in the same
+    directory, and the lockfile will be renamed to overwrite the original file
+    on close.
+
+    :note: You *must* call close() or abort() on a _GitFile for the lock to be
+        released. Typically this will happen in a finally block.
+    """
+
+    PROXY_PROPERTIES = set(['closed', 'encoding', 'errors', 'mode', 'name',
+                            'newlines', 'softspace'])
+    PROXY_METHODS = ('__iter__', 'flush', 'fileno', 'isatty', 'read',
+                     'readline', 'readlines', 'seek', 'tell',
+                     'truncate', 'write', 'writelines')
+
+    def __init__(self, filename, mode, bufsize):
+        self._filename = filename
+        self._lockfilename = '%s.lock' % self._filename
+        fd = os.open(
+            self._lockfilename,
+            os.O_RDWR | os.O_CREAT | os.O_EXCL | getattr(os, "O_BINARY", 0))
+        self._file = os.fdopen(fd, mode, bufsize)
+        self._closed = False
+
+        for method in self.PROXY_METHODS:
+            setattr(self, method, getattr(self._file, method))
+
+    def abort(self):
+        """Close and discard the lockfile without overwriting the target.
+
+        If the file is already closed, this is a no-op.
+        """
+        if self._closed:
+            return
+        self._file.close()
+        try:
+            os.remove(self._lockfilename)
+            self._closed = True
+        except OSError as e:
+            # The file may have been removed already, which is ok.
+            if e.errno != errno.ENOENT:
+                raise
+            self._closed = True
+
+    def close(self):
+        """Close this file, saving the lockfile over the original.
+
+        :note: If this method fails, it will attempt to delete the lockfile.
+            However, it is not guaranteed to do so (e.g. if a filesystem
+            becomes suddenly read-only), which will prevent future writes to
+            this file until the lockfile is removed manually.
+        :raises OSError: if the original file could not be overwritten. The
+            lock file is still closed, so further attempts to write to the same
+            file object will raise ValueError.
+        """
+        if self._closed:
+            return
+        self._file.close()
+        try:
+            try:
+                os.rename(self._lockfilename, self._filename)
+            except OSError as e:
+                if sys.platform == 'win32' and e.errno == errno.EEXIST:
+                    # Windows versions prior to Vista don't support atomic
+                    # renames
+                    _fancy_rename(self._lockfilename, self._filename)
+                else:
+                    raise
+        finally:
+            self.abort()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def __getattr__(self, name):
+        """Proxy property calls to the underlying file."""
+        if name in self.PROXY_PROPERTIES:
+            return getattr(self._file, name)
+        raise AttributeError(name)

+ 142 - 0
dulwich/greenthreads.py

@@ -0,0 +1,142 @@
+# greenthreads.py -- Utility module for querying an ObjectStore with gevent
+# Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
+#
+# Author: Fabien Boucher <fabien.boucher@enovance.com>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Utility module for querying an ObjectStore with gevent."""
+
+import gevent
+from gevent import pool
+
+from dulwich.objects import (
+    Commit,
+    Tag,
+    )
+from dulwich.object_store import (
+    MissingObjectFinder,
+    _collect_filetree_revs,
+    ObjectStoreIterator,
+    )
+
+
+def _split_commits_and_tags(obj_store, lst,
+                            ignore_unknown=False, pool=None):
+    """Split object id list into two list with commit SHA1s and tag SHA1s.
+
+    Same implementation as object_store._split_commits_and_tags
+    except we use gevent to parallelize object retrieval.
+    """
+    commits = set()
+    tags = set()
+
+    def find_commit_type(sha):
+        try:
+            o = obj_store[sha]
+        except KeyError:
+            if not ignore_unknown:
+                raise
+        else:
+            if isinstance(o, Commit):
+                commits.add(sha)
+            elif isinstance(o, Tag):
+                tags.add(sha)
+                commits.add(o.object[1])
+            else:
+                raise KeyError('Not a commit or a tag: %s' % sha)
+    jobs = [pool.spawn(find_commit_type, s) for s in lst]
+    gevent.joinall(jobs)
+    return (commits, tags)
+
+
+class GreenThreadsMissingObjectFinder(MissingObjectFinder):
+    """Find the objects missing from another object store.
+
+    Same implementation as object_store.MissingObjectFinder
+    except we use gevent to parallelize object retrieval.
+    """
+    def __init__(self, object_store, haves, wants,
+                 progress=None, get_tagged=None,
+                 concurrency=1, get_parents=None):
+
+        def collect_tree_sha(sha):
+            self.sha_done.add(sha)
+            cmt = object_store[sha]
+            _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
+
+        self.object_store = object_store
+        p = pool.Pool(size=concurrency)
+
+        have_commits, have_tags = \
+            _split_commits_and_tags(object_store, haves,
+                                    True, p)
+        want_commits, want_tags = \
+            _split_commits_and_tags(object_store, wants,
+                                    False, p)
+        all_ancestors = object_store._collect_ancestors(have_commits)[0]
+        missing_commits, common_commits = \
+            object_store._collect_ancestors(want_commits, all_ancestors)
+
+        self.sha_done = set()
+        jobs = [p.spawn(collect_tree_sha, c) for c in common_commits]
+        gevent.joinall(jobs)
+        for t in have_tags:
+            self.sha_done.add(t)
+        missing_tags = want_tags.difference(have_tags)
+        wants = missing_commits.union(missing_tags)
+        self.objects_to_send = set([(w, None, False) for w in wants])
+        if progress is None:
+            self.progress = lambda x: None
+        else:
+            self.progress = progress
+        self._tagged = get_tagged and get_tagged() or {}
+
+
+class GreenThreadsObjectStoreIterator(ObjectStoreIterator):
+    """ObjectIterator that works on top of an ObjectStore.
+
+    Same implementation as object_store.ObjectStoreIterator
+    except we use gevent to parallelize object retrieval.
+    """
+    def __init__(self, store, shas, finder, concurrency=1):
+        self.finder = finder
+        self.p = pool.Pool(size=concurrency)
+        super(GreenThreadsObjectStoreIterator, self).__init__(store, shas)
+
+    def retrieve(self, args):
+        sha, path = args
+        return self.store[sha], path
+
+    def __iter__(self):
+        for sha, path in self.p.imap_unordered(self.retrieve,
+                                               self.itershas()):
+            yield sha, path
+
+    def __len__(self):
+        if len(self._shas) > 0:
+            return len(self._shas)
+        while len(self.finder.objects_to_send):
+            jobs = []
+            for _ in range(0, len(self.finder.objects_to_send)):
+                jobs.append(self.p.spawn(self.finder.next))
+            gevent.joinall(jobs)
+            for j in jobs:
+                if j.value is not None:
+                    self._shas.append(j.value)
+        return len(self._shas)

+ 152 - 0
dulwich/hooks.py

@@ -0,0 +1,152 @@
+# hooks.py -- for dealing with git hooks
+# Copyright (C) 2012-2013 Jelmer Vernooij and others.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Access to hooks."""
+
+import os
+import subprocess
+import sys
+import tempfile
+
+from dulwich.errors import (
+    HookError,
+)
+
+
+class Hook(object):
+    """Generic hook object."""
+
+    def execute(self, *args):
+        """Execute the hook with the given args
+
+        :param args: argument list to hook
+        :raise HookError: hook execution failure
+        :return: a hook may return a useful value
+        """
+        raise NotImplementedError(self.execute)
+
+
+class ShellHook(Hook):
+    """Hook by executable file
+
+    Implements standard githooks(5) [0]:
+
+    [0] http://www.kernel.org/pub/software/scm/git/docs/githooks.html
+    """
+
+    def __init__(self, name, path, numparam,
+                 pre_exec_callback=None, post_exec_callback=None):
+        """Setup shell hook definition
+
+        :param name: name of hook for error messages
+        :param path: absolute path to executable file
+        :param numparam: number of requirements parameters
+        :param pre_exec_callback: closure for setup before execution
+            Defaults to None. Takes in the variable argument list from the
+            execute functions and returns a modified argument list for the
+            shell hook.
+        :param post_exec_callback: closure for cleanup after execution
+            Defaults to None. Takes in a boolean for hook success and the
+            modified argument list and returns the final hook return value
+            if applicable
+        """
+        self.name = name
+        self.filepath = path
+        self.numparam = numparam
+
+        self.pre_exec_callback = pre_exec_callback
+        self.post_exec_callback = post_exec_callback
+
+        if sys.version_info[0] == 2 and sys.platform == 'win32':
+            # Python 2 on windows does not support unicode file paths
+            # http://bugs.python.org/issue1759845
+            self.filepath = self.filepath.encode(sys.getfilesystemencoding())
+
+    def execute(self, *args):
+        """Execute the hook with given args"""
+
+        if len(args) != self.numparam:
+            raise HookError("Hook %s executed with wrong number of args. \
+                            Expected %d. Saw %d. args: %s"
+                            % (self.name, self.numparam, len(args), args))
+
+        if (self.pre_exec_callback is not None):
+            args = self.pre_exec_callback(*args)
+
+        try:
+            ret = subprocess.call([self.filepath] + list(args))
+            if ret != 0:
+                if (self.post_exec_callback is not None):
+                    self.post_exec_callback(0, *args)
+                raise HookError("Hook %s exited with non-zero status"
+                                % (self.name))
+            if (self.post_exec_callback is not None):
+                return self.post_exec_callback(1, *args)
+        except OSError:  # no file. silent failure.
+            if (self.post_exec_callback is not None):
+                self.post_exec_callback(0, *args)
+
+
+class PreCommitShellHook(ShellHook):
+    """pre-commit shell hook"""
+
+    def __init__(self, controldir):
+        filepath = os.path.join(controldir, 'hooks', 'pre-commit')
+
+        ShellHook.__init__(self, 'pre-commit', filepath, 0)
+
+
+class PostCommitShellHook(ShellHook):
+    """post-commit shell hook"""
+
+    def __init__(self, controldir):
+        filepath = os.path.join(controldir, 'hooks', 'post-commit')
+
+        ShellHook.__init__(self, 'post-commit', filepath, 0)
+
+
+class CommitMsgShellHook(ShellHook):
+    """commit-msg shell hook
+
+    :param args[0]: commit message
+    :return: new commit message or None
+    """
+
+    def __init__(self, controldir):
+        filepath = os.path.join(controldir, 'hooks', 'commit-msg')
+
+        def prepare_msg(*args):
+            (fd, path) = tempfile.mkstemp()
+
+            with os.fdopen(fd, 'wb') as f:
+                f.write(args[0])
+
+            return (path,)
+
+        def clean_msg(success, *args):
+            if success:
+                with open(args[0], 'rb') as f:
+                    new_msg = f.read()
+                os.unlink(args[0])
+                return new_msg
+            os.unlink(args[0])
+
+        ShellHook.__init__(self, 'commit-msg', filepath, 1,
+                           prepare_msg, clean_msg)

+ 358 - 0
dulwich/ignore.py

@@ -0,0 +1,358 @@
+# Copyright (C) 2017 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Parsing of gitignore files.
+
+For details for the matching rules, see https://git-scm.com/docs/gitignore
+"""
+
+import os.path
+import re
+import sys
+
+
+def _translate_segment(segment):
+    if segment == b"*":
+        return b'[^/]+'
+    res = b""
+    i, n = 0, len(segment)
+    while i < n:
+        c = segment[i:i+1]
+        i = i+1
+        if c == b'*':
+            res += b'[^/]*'
+        elif c == b'?':
+            res += b'.'
+        elif c == b'[':
+            j = i
+            if j < n and segment[j:j+1] == b'!':
+                j = j+1
+            if j < n and segment[j:j+1] == b']':
+                j = j+1
+            while j < n and segment[j:j+1] != b']':
+                j = j+1
+            if j >= n:
+                res += b'\\['
+            else:
+                stuff = segment[i:j].replace(b'\\', b'\\\\')
+                i = j+1
+                if stuff.startswith(b'!'):
+                    stuff = b'^' + stuff[1:]
+                elif stuff.startswith(b'^'):
+                    stuff = b'\\' + stuff
+                res += b'[' + stuff + b']'
+        else:
+            res += re.escape(c)
+    return res
+
+
+def translate(pat):
+    """Translate a shell PATTERN to a regular expression.
+
+    There is no way to quote meta-characters.
+
+    Originally copied from fnmatch in Python 2.7, but modified for Dulwich
+    to cope with features in Git ignore patterns.
+    """
+
+    res = b'(?ms)'
+
+    if b'/' not in pat[:-1]:
+        # If there's no slash, this is a filename-based match
+        res += b'(.*/)?'
+
+    if pat.startswith(b'**/'):
+        # Leading **/
+        pat = pat[2:]
+        res += b'(.*/)?'
+
+    if pat.startswith(b'/'):
+        pat = pat[1:]
+
+    for i, segment in enumerate(pat.split(b'/')):
+        if segment == b'**':
+            res += b'(/.*)?'
+            continue
+        else:
+            res += ((re.escape(b'/') if i > 0 else b'') +
+                    _translate_segment(segment))
+
+    if not pat.endswith(b'/'):
+        res += b'/?'
+
+    return res + b'\Z'
+
+
+def read_ignore_patterns(f):
+    """Read a git ignore file.
+
+    :param f: File-like object to read from
+    :return: List of patterns
+    """
+
+    for l in f:
+        l = l.rstrip(b"\r\n")
+
+        # Ignore blank lines, they're used for readability.
+        if not l:
+            continue
+
+        if l.startswith(b'#'):
+            # Comment
+            continue
+
+        # Trailing spaces are ignored unless they are quoted with a backslash.
+        while l.endswith(b' ') and not l.endswith(b'\\ '):
+            l = l[:-1]
+        l = l.replace(b'\\ ', b' ')
+
+        yield l
+
+
+def match_pattern(path, pattern, ignorecase=False):
+    """Match a gitignore-style pattern against a path.
+
+    :param path: Path to match
+    :param pattern: Pattern to match
+    :param ignorecase: Whether to do case-sensitive matching
+    :return: bool indicating whether the pattern matched
+    """
+    return Pattern(pattern, ignorecase).match(path)
+
+
+class Pattern(object):
+    """A single ignore pattern."""
+
+    def __init__(self, pattern, ignorecase=False):
+        self.pattern = pattern
+        self.ignorecase = ignorecase
+        if pattern[0:1] == b'!':
+            self.is_exclude = False
+            pattern = pattern[1:]
+        else:
+            if pattern[0:1] == b'\\':
+                pattern = pattern[1:]
+            self.is_exclude = True
+        flags = 0
+        if self.ignorecase:
+            flags = re.IGNORECASE
+        self._re = re.compile(translate(pattern), flags)
+
+    def __bytes__(self):
+        return self.pattern
+
+    def __str__(self):
+        return self.pattern.decode(sys.getfilesystemencoding())
+
+    def __eq__(self, other):
+        return (type(self) == type(other) and
+                self.pattern == other.pattern and
+                self.ignorecase == other.ignorecase)
+
+    def __repr__(self):
+        return "%s(%s, %r)" % (
+            type(self).__name__, self.pattern, self.ignorecase)
+
+    def match(self, path):
+        """Try to match a path against this ignore pattern.
+
+        :param path: Path to match (relative to ignore location)
+        :return: boolean
+        """
+        return bool(self._re.match(path))
+
+
+class IgnoreFilter(object):
+
+    def __init__(self, patterns, ignorecase=False):
+        self._patterns = []
+        self._ignorecase = ignorecase
+        for pattern in patterns:
+            self.append_pattern(pattern)
+
+    def append_pattern(self, pattern):
+        """Add a pattern to the set."""
+        self._patterns.append(Pattern(pattern, self._ignorecase))
+
+    def find_matching(self, path):
+        """Yield all matching patterns for path.
+
+        :param path: Path to match
+        :return: Iterator over  iterators
+        """
+        if not isinstance(path, bytes):
+            path = path.encode(sys.getfilesystemencoding())
+        for pattern in self._patterns:
+            if pattern.match(path):
+                yield pattern
+
+    def is_ignored(self, path):
+        """Check whether a path is ignored.
+
+        For directories, include a trailing slash.
+
+        :return: status is None if file is not mentioned, True if it is
+            included, False if it is explicitly excluded.
+        """
+        status = None
+        for pattern in self.find_matching(path):
+            status = pattern.is_exclude
+        return status
+
+    @classmethod
+    def from_path(cls, path, ignorecase=False):
+        with open(path, 'rb') as f:
+            ret = cls(read_ignore_patterns(f), ignorecase)
+            ret._path = path
+            return ret
+
+    def __repr__(self):
+        if getattr(self, '_path', None) is None:
+            return "<%s>" % (type(self).__name__)
+        else:
+            return "%s.from_path(%r)" % (type(self).__name__, self._path)
+
+
+class IgnoreFilterStack(object):
+    """Check for ignore status in multiple filters."""
+
+    def __init__(self, filters):
+        self._filters = filters
+
+    def is_ignored(self, path):
+        """Check whether a path is explicitly included or excluded in ignores.
+
+        :param path: Path to check
+        :return: None if the file is not mentioned, True if it is included,
+            False if it is explicitly excluded.
+        """
+        status = None
+        for filter in self._filters:
+            status = filter.is_ignored(path)
+            if status is not None:
+                return status
+        return status
+
+
+def default_user_ignore_filter_path(config):
+    """Return default user ignore filter path.
+
+    :param config: A Config object
+    :return: Path to a global ignore file
+    """
+    try:
+        return config.get(('core', ), 'excludesFile')
+    except KeyError:
+        pass
+
+    xdg_config_home = os.environ.get(
+        "XDG_CONFIG_HOME", os.path.expanduser("~/.config/"),
+    )
+    return os.path.join(xdg_config_home, 'git', 'ignore')
+
+
+class IgnoreFilterManager(object):
+    """Ignore file manager."""
+
+    def __init__(self, top_path, global_filters, ignorecase):
+        self._path_filters = {}
+        self._top_path = top_path
+        self._global_filters = global_filters
+        self._ignorecase = ignorecase
+
+    def __repr__(self):
+        return "%s(%s, %r, %r)" % (
+            type(self).__name__, self._top_path,
+            self._global_filters,
+            self._ignorecase)
+
+    def _load_path(self, path):
+        try:
+            return self._path_filters[path]
+        except KeyError:
+            pass
+
+        p = os.path.join(self._top_path, path, '.gitignore')
+        try:
+            self._path_filters[path] = IgnoreFilter.from_path(
+                p, self._ignorecase)
+        except IOError:
+            self._path_filters[path] = None
+        return self._path_filters[path]
+
+    def find_matching(self, path):
+        """Find matching patterns for path.
+
+        Stops after the first ignore file with matches.
+
+        :param path: Path to check
+        :return: Iterator over Pattern instances
+        """
+        if os.path.isabs(path):
+            raise ValueError('%s is an absolute path' % path)
+        filters = [(0, f) for f in self._global_filters]
+        if os.path.sep != '/':
+            path = path.replace(os.path.sep, '/')
+        parts = path.split('/')
+        for i in range(len(parts)+1):
+            dirname = '/'.join(parts[:i])
+            for s, f in filters:
+                relpath = '/'.join(parts[s:i])
+                if i < len(parts):
+                    # Paths leading up to the final part are all directories,
+                    # so need a trailing slash.
+                    relpath += '/'
+                matches = list(f.find_matching(relpath))
+                if matches:
+                    return iter(matches)
+            ignore_filter = self._load_path(dirname)
+            if ignore_filter is not None:
+                filters.insert(0, (i, ignore_filter))
+        return iter([])
+
+    def is_ignored(self, path):
+        """Check whether a path is explicitly included or excluded in ignores.
+
+        :param path: Path to check
+        :return: None if the file is not mentioned, True if it is included,
+            False if it is explicitly excluded.
+        """
+        matches = list(self.find_matching(path))
+        if matches:
+            return matches[-1].is_exclude
+        return None
+
+    @classmethod
+    def from_repo(cls, repo):
+        """Create a IgnoreFilterManager from a repository.
+
+        :param repo: Repository object
+        :return: A `IgnoreFilterManager` object
+        """
+        global_filters = []
+        for p in [
+                os.path.join(repo.controldir(), 'info', 'exclude'),
+                default_user_ignore_filter_path(repo.get_config_stack())]:
+            try:
+                global_filters.append(IgnoreFilter.from_path(p))
+            except IOError:
+                pass
+        config = repo.get_config_stack()
+        ignorecase = config.get_boolean((b'core'), (b'ignorecase'), False)
+        return cls(repo.path, global_filters, ignorecase)

+ 648 - 0
dulwich/index.py

@@ -0,0 +1,648 @@
+# index.py -- File parser/writer for the git index file
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Parser for the git index file format."""
+
+import collections
+import errno
+import os
+import stat
+import struct
+import sys
+
+from dulwich.file import GitFile
+from dulwich.objects import (
+    Blob,
+    S_IFGITLINK,
+    S_ISGITLINK,
+    Tree,
+    hex_to_sha,
+    sha_to_hex,
+    )
+from dulwich.pack import (
+    SHA1Reader,
+    SHA1Writer,
+    )
+
+
+IndexEntry = collections.namedtuple(
+    'IndexEntry', [
+        'ctime', 'mtime', 'dev', 'ino', 'mode', 'uid', 'gid', 'size', 'sha',
+        'flags'])
+
+
+def pathsplit(path):
+    """Split a /-delimited path into a directory part and a basename.
+
+    :param path: The path to split.
+    :return: Tuple with directory name and basename
+    """
+    try:
+        (dirname, basename) = path.rsplit(b"/", 1)
+    except ValueError:
+        return (b"", path)
+    else:
+        return (dirname, basename)
+
+
+def pathjoin(*args):
+    """Join a /-delimited path.
+
+    """
+    return b"/".join([p for p in args if p])
+
+
+def read_cache_time(f):
+    """Read a cache time.
+
+    :param f: File-like object to read from
+    :return: Tuple with seconds and nanoseconds
+    """
+    return struct.unpack(">LL", f.read(8))
+
+
+def write_cache_time(f, t):
+    """Write a cache time.
+
+    :param f: File-like object to write to
+    :param t: Time to write (as int, float or tuple with secs and nsecs)
+    """
+    if isinstance(t, int):
+        t = (t, 0)
+    elif isinstance(t, float):
+        (secs, nsecs) = divmod(t, 1.0)
+        t = (int(secs), int(nsecs * 1000000000))
+    elif not isinstance(t, tuple):
+        raise TypeError(t)
+    f.write(struct.pack(">LL", *t))
+
+
+def read_cache_entry(f):
+    """Read an entry from a cache file.
+
+    :param f: File-like object to read from
+    :return: tuple with: device, inode, mode, uid, gid, size, sha, flags
+    """
+    beginoffset = f.tell()
+    ctime = read_cache_time(f)
+    mtime = read_cache_time(f)
+    (dev, ino, mode, uid, gid, size, sha, flags, ) = \
+        struct.unpack(">LLLLLL20sH", f.read(20 + 4 * 6 + 2))
+    name = f.read((flags & 0x0fff))
+    # Padding:
+    real_size = ((f.tell() - beginoffset + 8) & ~7)
+    f.read((beginoffset + real_size) - f.tell())
+    return (name, ctime, mtime, dev, ino, mode, uid, gid, size,
+            sha_to_hex(sha), flags & ~0x0fff)
+
+
+def write_cache_entry(f, entry):
+    """Write an index entry to a file.
+
+    :param f: File object
+    :param entry: Entry to write, tuple with:
+        (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags)
+    """
+    beginoffset = f.tell()
+    (name, ctime, mtime, dev, ino, mode, uid, gid, size, sha, flags) = entry
+    write_cache_time(f, ctime)
+    write_cache_time(f, mtime)
+    flags = len(name) | (flags & ~0x0fff)
+    f.write(struct.pack(
+            b'>LLLLLL20sH', dev & 0xFFFFFFFF, ino & 0xFFFFFFFF,
+            mode, uid, gid, size, hex_to_sha(sha), flags))
+    f.write(name)
+    real_size = ((f.tell() - beginoffset + 8) & ~7)
+    f.write(b'\0' * ((beginoffset + real_size) - f.tell()))
+
+
+def read_index(f):
+    """Read an index file, yielding the individual entries."""
+    header = f.read(4)
+    if header != b'DIRC':
+        raise AssertionError("Invalid index file header: %r" % header)
+    (version, num_entries) = struct.unpack(b'>LL', f.read(4 * 2))
+    assert version in (1, 2)
+    for i in range(num_entries):
+        yield read_cache_entry(f)
+
+
+def read_index_dict(f):
+    """Read an index file and return it as a dictionary.
+
+    :param f: File object to read from
+    """
+    ret = {}
+    for x in read_index(f):
+        ret[x[0]] = IndexEntry(*x[1:])
+    return ret
+
+
+def write_index(f, entries):
+    """Write an index file.
+
+    :param f: File-like object to write to
+    :param entries: Iterable over the entries to write
+    """
+    f.write(b'DIRC')
+    f.write(struct.pack(b'>LL', 2, len(entries)))
+    for x in entries:
+        write_cache_entry(f, x)
+
+
+def write_index_dict(f, entries):
+    """Write an index file based on the contents of a dictionary.
+
+    """
+    entries_list = []
+    for name in sorted(entries):
+        entries_list.append((name,) + tuple(entries[name]))
+    write_index(f, entries_list)
+
+
+def cleanup_mode(mode):
+    """Cleanup a mode value.
+
+    This will return a mode that can be stored in a tree object.
+
+    :param mode: Mode to clean up.
+    """
+    if stat.S_ISLNK(mode):
+        return stat.S_IFLNK
+    elif stat.S_ISDIR(mode):
+        return stat.S_IFDIR
+    elif S_ISGITLINK(mode):
+        return S_IFGITLINK
+    ret = stat.S_IFREG | 0o644
+    ret |= (mode & 0o111)
+    return ret
+
+
+class Index(object):
+    """A Git Index file."""
+
+    def __init__(self, filename):
+        """Open an index file.
+
+        :param filename: Path to the index file
+        """
+        self._filename = filename
+        self.clear()
+        self.read()
+
+    @property
+    def path(self):
+        return self._filename
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self._filename)
+
+    def write(self):
+        """Write current contents of index to disk."""
+        f = GitFile(self._filename, 'wb')
+        try:
+            f = SHA1Writer(f)
+            write_index_dict(f, self._byname)
+        finally:
+            f.close()
+
+    def read(self):
+        """Read current contents of index from disk."""
+        if not os.path.exists(self._filename):
+            return
+        f = GitFile(self._filename, 'rb')
+        try:
+            f = SHA1Reader(f)
+            for x in read_index(f):
+                self[x[0]] = IndexEntry(*x[1:])
+            # FIXME: Additional data?
+            f.read(os.path.getsize(self._filename)-f.tell()-20)
+            f.check_sha()
+        finally:
+            f.close()
+
+    def __len__(self):
+        """Number of entries in this index file."""
+        return len(self._byname)
+
+    def __getitem__(self, name):
+        """Retrieve entry by relative path.
+
+        :return: tuple with (ctime, mtime, dev, ino, mode, uid, gid, size, sha,
+            flags)
+        """
+        return self._byname[name]
+
+    def __iter__(self):
+        """Iterate over the paths in this index."""
+        return iter(self._byname)
+
+    def get_sha1(self, path):
+        """Return the (git object) SHA1 for the object at a path."""
+        return self[path].sha
+
+    def get_mode(self, path):
+        """Return the POSIX file mode for the object at a path."""
+        return self[path].mode
+
+    def iterblobs(self):
+        """Iterate over path, sha, mode tuples for use with commit_tree."""
+        for path in self:
+            entry = self[path]
+            yield path, entry.sha, cleanup_mode(entry.mode)
+
+    def clear(self):
+        """Remove all contents from this index."""
+        self._byname = {}
+
+    def __setitem__(self, name, x):
+        assert isinstance(name, bytes)
+        assert len(x) == 10
+        # Remove the old entry if any
+        self._byname[name] = x
+
+    def __delitem__(self, name):
+        assert isinstance(name, bytes)
+        del self._byname[name]
+
+    def iteritems(self):
+        return self._byname.items()
+
+    def update(self, entries):
+        for name, value in entries.items():
+            self[name] = value
+
+    def changes_from_tree(self, object_store, tree, want_unchanged=False):
+        """Find the differences between the contents of this index and a tree.
+
+        :param object_store: Object store to use for retrieving tree contents
+        :param tree: SHA1 of the root tree
+        :param want_unchanged: Whether unchanged files should be reported
+        :return: Iterator over tuples with (oldpath, newpath), (oldmode,
+            newmode), (oldsha, newsha)
+        """
+        def lookup_entry(path):
+            entry = self[path]
+            return entry.sha, entry.mode
+        for (name, mode, sha) in changes_from_tree(
+                self._byname.keys(), lookup_entry, object_store, tree,
+                want_unchanged=want_unchanged):
+            yield (name, mode, sha)
+
+    def commit(self, object_store):
+        """Create a new tree from an index.
+
+        :param object_store: Object store to save the tree in
+        :return: Root tree SHA
+        """
+        return commit_tree(object_store, self.iterblobs())
+
+
+def commit_tree(object_store, blobs):
+    """Commit a new tree.
+
+    :param object_store: Object store to add trees to
+    :param blobs: Iterable over blob path, sha, mode entries
+    :return: SHA1 of the created tree.
+    """
+
+    trees = {b'': {}}
+
+    def add_tree(path):
+        if path in trees:
+            return trees[path]
+        dirname, basename = pathsplit(path)
+        t = add_tree(dirname)
+        assert isinstance(basename, bytes)
+        newtree = {}
+        t[basename] = newtree
+        trees[path] = newtree
+        return newtree
+
+    for path, sha, mode in blobs:
+        tree_path, basename = pathsplit(path)
+        tree = add_tree(tree_path)
+        tree[basename] = (mode, sha)
+
+    def build_tree(path):
+        tree = Tree()
+        for basename, entry in trees[path].items():
+            if isinstance(entry, dict):
+                mode = stat.S_IFDIR
+                sha = build_tree(pathjoin(path, basename))
+            else:
+                (mode, sha) = entry
+            tree.add(basename, mode, sha)
+        object_store.add_object(tree)
+        return tree.id
+    return build_tree(b'')
+
+
+def commit_index(object_store, index):
+    """Create a new tree from an index.
+
+    :param object_store: Object store to save the tree in
+    :param index: Index file
+    :note: This function is deprecated, use index.commit() instead.
+    :return: Root tree sha.
+    """
+    return commit_tree(object_store, index.iterblobs())
+
+
+def changes_from_tree(names, lookup_entry, object_store, tree,
+                      want_unchanged=False):
+    """Find the differences between the contents of a tree and
+    a working copy.
+
+    :param names: Iterable of names in the working copy
+    :param lookup_entry: Function to lookup an entry in the working copy
+    :param object_store: Object store to use for retrieving tree contents
+    :param tree: SHA1 of the root tree, or None for an empty tree
+    :param want_unchanged: Whether unchanged files should be reported
+    :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode),
+        (oldsha, newsha)
+    """
+    other_names = set(names)
+
+    if tree is not None:
+        for (name, mode, sha) in object_store.iter_tree_contents(tree):
+            try:
+                (other_sha, other_mode) = lookup_entry(name)
+            except KeyError:
+                # Was removed
+                yield ((name, None), (mode, None), (sha, None))
+            else:
+                other_names.remove(name)
+                if (want_unchanged or other_sha != sha or other_mode != mode):
+                    yield ((name, name), (mode, other_mode), (sha, other_sha))
+
+    # Mention added files
+    for name in other_names:
+        try:
+            (other_sha, other_mode) = lookup_entry(name)
+        except KeyError:
+            pass
+        else:
+            yield ((None, name), (None, other_mode), (None, other_sha))
+
+
+def index_entry_from_stat(stat_val, hex_sha, flags, mode=None):
+    """Create a new index entry from a stat value.
+
+    :param stat_val: POSIX stat_result instance
+    :param hex_sha: Hex sha of the object
+    :param flags: Index flags
+    """
+    if mode is None:
+        mode = cleanup_mode(stat_val.st_mode)
+    return (stat_val.st_ctime, stat_val.st_mtime, stat_val.st_dev,
+            stat_val.st_ino, mode, stat_val.st_uid,
+            stat_val.st_gid, stat_val.st_size, hex_sha, flags)
+
+
+def build_file_from_blob(blob, mode, target_path, honor_filemode=True):
+    """Build a file or symlink on disk based on a Git object.
+
+    :param obj: The git object
+    :param mode: File mode
+    :param target_path: Path to write to
+    :param honor_filemode: An optional flag to honor core.filemode setting in
+        config file, default is core.filemode=True, change executable bit
+    :return: stat object for the file
+    """
+    try:
+        oldstat = os.lstat(target_path)
+    except OSError as e:
+        if e.errno == errno.ENOENT:
+            oldstat = None
+        else:
+            raise
+    contents = blob.as_raw_string()
+    if stat.S_ISLNK(mode):
+        # FIXME: This will fail on Windows. What should we do instead?
+        if oldstat:
+            os.unlink(target_path)
+        if sys.platform == 'win32' and sys.version_info[0] == 3:
+            # os.readlink on Python3 on Windows requires a unicode string.
+            # TODO(jelmer): Don't assume tree_encoding == fs_encoding
+            tree_encoding = sys.getfilesystemencoding()
+            contents = contents.decode(tree_encoding)
+            target_path = target_path.decode(tree_encoding)
+        os.symlink(contents, target_path)
+    else:
+        if oldstat is not None and oldstat.st_size == len(contents):
+            with open(target_path, 'rb') as f:
+                if f.read() == contents:
+                    return oldstat
+
+        with open(target_path, 'wb') as f:
+            # Write out file
+            f.write(contents)
+
+        if honor_filemode:
+            os.chmod(target_path, mode)
+
+    return os.lstat(target_path)
+
+
+INVALID_DOTNAMES = (b".git", b".", b"..", b"")
+
+
+def validate_path_element_default(element):
+    return element.lower() not in INVALID_DOTNAMES
+
+
+def validate_path_element_ntfs(element):
+    stripped = element.rstrip(b". ").lower()
+    if stripped in INVALID_DOTNAMES:
+        return False
+    if stripped == b"git~1":
+        return False
+    return True
+
+
+def validate_path(path, element_validator=validate_path_element_default):
+    """Default path validator that just checks for .git/."""
+    parts = path.split(b"/")
+    for p in parts:
+        if not element_validator(p):
+            return False
+    else:
+        return True
+
+
+def build_index_from_tree(root_path, index_path, object_store, tree_id,
+                          honor_filemode=True,
+                          validate_path_element=validate_path_element_default):
+    """Generate and materialize index from a tree
+
+    :param tree_id: Tree to materialize
+    :param root_path: Target dir for materialized index files
+    :param index_path: Target path for generated index
+    :param object_store: Non-empty object store holding tree contents
+    :param honor_filemode: An optional flag to honor core.filemode setting in
+        config file, default is core.filemode=True, change executable bit
+    :param validate_path_element: Function to validate path elements to check
+        out; default just refuses .git and .. directories.
+
+    :note:: existing index is wiped and contents are not merged
+        in a working dir. Suitable only for fresh clones.
+    """
+
+    index = Index(index_path)
+    if not isinstance(root_path, bytes):
+        root_path = root_path.encode(sys.getfilesystemencoding())
+
+    for entry in object_store.iter_tree_contents(tree_id):
+        if not validate_path(entry.path, validate_path_element):
+            continue
+        full_path = _tree_to_fs_path(root_path, entry.path)
+
+        if not os.path.exists(os.path.dirname(full_path)):
+            os.makedirs(os.path.dirname(full_path))
+
+        # TODO(jelmer): Merge new index into working tree
+        if S_ISGITLINK(entry.mode):
+            if not os.path.isdir(full_path):
+                os.mkdir(full_path)
+            st = os.lstat(full_path)
+            # TODO(jelmer): record and return submodule paths
+        else:
+            obj = object_store[entry.sha]
+            st = build_file_from_blob(
+                obj, entry.mode, full_path, honor_filemode=honor_filemode)
+        # Add file to index
+        if not honor_filemode or S_ISGITLINK(entry.mode):
+            # we can not use tuple slicing to build a new tuple,
+            # because on windows that will convert the times to
+            # longs, which causes errors further along
+            st_tuple = (entry.mode, st.st_ino, st.st_dev, st.st_nlink,
+                        st.st_uid, st.st_gid, st.st_size, st.st_atime,
+                        st.st_mtime, st.st_ctime)
+            st = st.__class__(st_tuple)
+        index[entry.path] = index_entry_from_stat(st, entry.sha, 0)
+
+    index.write()
+
+
+def blob_from_path_and_stat(fs_path, st):
+    """Create a blob from a path and a stat object.
+
+    :param fs_path: Full file system path to file
+    :param st: A stat object
+    :return: A `Blob` object
+    """
+    assert isinstance(fs_path, bytes)
+    blob = Blob()
+    if not stat.S_ISLNK(st.st_mode):
+        with open(fs_path, 'rb') as f:
+            blob.data = f.read()
+    else:
+        if sys.platform == 'win32' and sys.version_info[0] == 3:
+            # os.readlink on Python3 on Windows requires a unicode string.
+            # TODO(jelmer): Don't assume tree_encoding == fs_encoding
+            tree_encoding = sys.getfilesystemencoding()
+            fs_path = fs_path.decode(tree_encoding)
+            blob.data = os.readlink(fs_path).encode(tree_encoding)
+        else:
+            blob.data = os.readlink(fs_path)
+    return blob
+
+
+def get_unstaged_changes(index, root_path):
+    """Walk through an index and check for differences against working tree.
+
+    :param index: index to check
+    :param root_path: path in which to find files
+    :return: iterator over paths with unstaged changes
+    """
+    # For each entry in the index check the sha1 & ensure not staged
+    if not isinstance(root_path, bytes):
+        root_path = root_path.encode(sys.getfilesystemencoding())
+
+    for tree_path, entry in index.iteritems():
+        full_path = _tree_to_fs_path(root_path, tree_path)
+        try:
+            blob = blob_from_path_and_stat(full_path, os.lstat(full_path))
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                raise
+            # The file was removed, so we assume that counts as
+            # different from whatever file used to exist.
+            yield tree_path
+        except IOError as e:
+            if e.errno != errno.EISDIR:
+                raise
+            # This is actually a directory
+            if os.path.exists(os.path.join(tree_path, '.git')):
+                # Submodule
+                from dulwich.errors import NotGitRepository
+                from dulwich.repo import Repo
+                try:
+                    if entry.sha != Repo(tree_path).head():
+                        yield tree_path
+                except NotGitRepository:
+                    yield tree_path
+            else:
+                # The file was changed to a directory, so consider it removed.
+                yield tree_path
+        else:
+            if blob.id != entry.sha:
+                yield tree_path
+
+
+os_sep_bytes = os.sep.encode('ascii')
+
+
+def _tree_to_fs_path(root_path, tree_path):
+    """Convert a git tree path to a file system path.
+
+    :param root_path: Root filesystem path
+    :param tree_path: Git tree path as bytes
+
+    :return: File system path.
+    """
+    assert isinstance(tree_path, bytes)
+    if os_sep_bytes != b'/':
+        sep_corrected_path = tree_path.replace(b'/', os_sep_bytes)
+    else:
+        sep_corrected_path = tree_path
+    return os.path.join(root_path, sep_corrected_path)
+
+
+def _fs_to_tree_path(fs_path, fs_encoding=None):
+    """Convert a file system path to a git tree path.
+
+    :param fs_path: File system path.
+    :param fs_encoding: File system encoding
+
+    :return:  Git tree path as bytes
+    """
+    if fs_encoding is None:
+        fs_encoding = sys.getfilesystemencoding()
+    if not isinstance(fs_path, bytes):
+        fs_path_bytes = fs_path.encode(fs_encoding)
+    else:
+        fs_path_bytes = fs_path
+    if os_sep_bytes != b'/':
+        tree_path = fs_path_bytes.replace(os_sep_bytes, b'/')
+    else:
+        tree_path = fs_path_bytes
+    return tree_path

+ 70 - 0
dulwich/log_utils.py

@@ -0,0 +1,70 @@
+# log_utils.py -- Logging utilities for Dulwich
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Logging utilities for Dulwich.
+
+Any module that uses logging needs to do compile-time initialization to set up
+the logging environment. Since Dulwich is also used as a library, clients may
+not want to see any logging output. In that case, we need to use a special
+handler to suppress spurious warnings like "No handlers could be found for
+logger dulwich.foo".
+
+For details on the _NullHandler approach, see:
+http://docs.python.org/library/logging.html#configuring-logging-for-a-library
+
+For many modules, the only function from the logging module they need is
+getLogger; this module exports that function for convenience. If a calling
+module needs something else, it can import the standard logging module
+directly.
+"""
+
+import logging
+import sys
+
+getLogger = logging.getLogger
+
+
+class _NullHandler(logging.Handler):
+    """No-op logging handler to avoid unexpected logging warnings."""
+
+    def emit(self, record):
+        pass
+
+
+_NULL_HANDLER = _NullHandler()
+_DULWICH_LOGGER = getLogger('dulwich')
+_DULWICH_LOGGER.addHandler(_NULL_HANDLER)
+
+
+def default_logging_config():
+    """Set up the default Dulwich loggers."""
+    remove_null_handler()
+    logging.basicConfig(level=logging.INFO, stream=sys.stderr,
+                        format='%(asctime)s %(levelname)s: %(message)s')
+
+
+def remove_null_handler():
+    """Remove the null handler from the Dulwich loggers.
+
+    If a caller wants to set up logging using something other than
+    default_logging_config, calling this function first is a minor optimization
+    to avoid the overhead of using the _NullHandler.
+    """
+    _DULWICH_LOGGER.removeHandler(_NULL_HANDLER)

+ 371 - 0
dulwich/lru_cache.py

@@ -0,0 +1,371 @@
+# lru_cache.py -- Simple LRU cache for dulwich
+# Copyright (C) 2006, 2008 Canonical Ltd
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""A simple least-recently-used (LRU) cache."""
+
+_null_key = object()
+
+
+class _LRUNode(object):
+    """This maintains the linked-list which is the lru internals."""
+
+    __slots__ = ('prev', 'next_key', 'key', 'value', 'cleanup', 'size')
+
+    def __init__(self, key, value, cleanup=None):
+        self.prev = None
+        self.next_key = _null_key
+        self.key = key
+        self.value = value
+        self.cleanup = cleanup
+        # TODO: We could compute this 'on-the-fly' like we used to, and remove
+        #       one pointer from this object, we just need to decide if it
+        #       actually costs us much of anything in normal usage
+        self.size = None
+
+    def __repr__(self):
+        if self.prev is None:
+            prev_key = None
+        else:
+            prev_key = self.prev.key
+        return '%s(%r n:%r p:%r)' % (self.__class__.__name__, self.key,
+                                     self.next_key, prev_key)
+
+    def run_cleanup(self):
+        if self.cleanup is not None:
+            self.cleanup(self.key, self.value)
+        self.cleanup = None
+        # Just make sure to break any refcycles, etc
+        self.value = None
+
+
+class LRUCache(object):
+    """A class which manages a cache of entries, removing unused ones."""
+
+    def __init__(self, max_cache=100, after_cleanup_count=None):
+        self._cache = {}
+        # The "HEAD" of the lru linked list
+        self._most_recently_used = None
+        # The "TAIL" of the lru linked list
+        self._least_recently_used = None
+        self._update_max_cache(max_cache, after_cleanup_count)
+
+    def __contains__(self, key):
+        return key in self._cache
+
+    def __getitem__(self, key):
+        cache = self._cache
+        node = cache[key]
+        # Inlined from _record_access to decrease the overhead of __getitem__
+        # We also have more knowledge about structure if __getitem__ is
+        # succeeding, then we know that self._most_recently_used must not be
+        # None, etc.
+        mru = self._most_recently_used
+        if node is mru:
+            # Nothing to do, this node is already at the head of the queue
+            return node.value
+        # Remove this node from the old location
+        node_prev = node.prev
+        next_key = node.next_key
+        # benchmarking shows that the lookup of _null_key in globals is faster
+        # than the attribute lookup for (node is self._least_recently_used)
+        if next_key is _null_key:
+            # 'node' is the _least_recently_used, because it doesn't have a
+            # 'next' item. So move the current lru to the previous node.
+            self._least_recently_used = node_prev
+        else:
+            node_next = cache[next_key]
+            node_next.prev = node_prev
+        node_prev.next_key = next_key
+        # Insert this node at the front of the list
+        node.next_key = mru.key
+        mru.prev = node
+        self._most_recently_used = node
+        node.prev = None
+        return node.value
+
+    def __len__(self):
+        return len(self._cache)
+
+    def _walk_lru(self):
+        """Walk the LRU list, only meant to be used in tests."""
+        node = self._most_recently_used
+        if node is not None:
+            if node.prev is not None:
+                raise AssertionError('the _most_recently_used entry is not'
+                                     ' supposed to have a previous entry'
+                                     ' %s' % (node,))
+        while node is not None:
+            if node.next_key is _null_key:
+                if node is not self._least_recently_used:
+                    raise AssertionError('only the last node should have'
+                                         ' no next value: %s' % (node,))
+                node_next = None
+            else:
+                node_next = self._cache[node.next_key]
+                if node_next.prev is not node:
+                    raise AssertionError('inconsistency found, node.next.prev'
+                                         ' != node: %s' % (node,))
+            if node.prev is None:
+                if node is not self._most_recently_used:
+                    raise AssertionError('only the _most_recently_used should'
+                                         ' not have a previous node: %s'
+                                         % (node,))
+            else:
+                if node.prev.next_key != node.key:
+                    raise AssertionError('inconsistency found, node.prev.next'
+                                         ' != node: %s' % (node,))
+            yield node
+            node = node_next
+
+    def add(self, key, value, cleanup=None):
+        """Add a new value to the cache.
+
+        Also, if the entry is ever removed from the cache, call
+        cleanup(key, value).
+
+        :param key: The key to store it under
+        :param value: The object to store
+        :param cleanup: None or a function taking (key, value) to indicate
+                        'value' should be cleaned up.
+        """
+        if key is _null_key:
+            raise ValueError('cannot use _null_key as a key')
+        if key in self._cache:
+            node = self._cache[key]
+            node.run_cleanup()
+            node.value = value
+            node.cleanup = cleanup
+        else:
+            node = _LRUNode(key, value, cleanup=cleanup)
+            self._cache[key] = node
+        self._record_access(node)
+
+        if len(self._cache) > self._max_cache:
+            # Trigger the cleanup
+            self.cleanup()
+
+    def cache_size(self):
+        """Get the number of entries we will cache."""
+        return self._max_cache
+
+    def get(self, key, default=None):
+        node = self._cache.get(key, None)
+        if node is None:
+            return default
+        self._record_access(node)
+        return node.value
+
+    def keys(self):
+        """Get the list of keys currently cached.
+
+        Note that values returned here may not be available by the time you
+        request them later. This is simply meant as a peak into the current
+        state.
+
+        :return: An unordered list of keys that are currently cached.
+        """
+        return self._cache.keys()
+
+    def items(self):
+        """Get the key:value pairs as a dict."""
+        return dict((k, n.value) for k, n in self._cache.items())
+
+    def cleanup(self):
+        """Clear the cache until it shrinks to the requested size.
+
+        This does not completely wipe the cache, just makes sure it is under
+        the after_cleanup_count.
+        """
+        # Make sure the cache is shrunk to the correct size
+        while len(self._cache) > self._after_cleanup_count:
+            self._remove_lru()
+
+    def __setitem__(self, key, value):
+        """Add a value to the cache, there will be no cleanup function."""
+        self.add(key, value, cleanup=None)
+
+    def _record_access(self, node):
+        """Record that key was accessed."""
+        # Move 'node' to the front of the queue
+        if self._most_recently_used is None:
+            self._most_recently_used = node
+            self._least_recently_used = node
+            return
+        elif node is self._most_recently_used:
+            # Nothing to do, this node is already at the head of the queue
+            return
+        # We've taken care of the tail pointer, remove the node, and insert it
+        # at the front
+        # REMOVE
+        if node is self._least_recently_used:
+            self._least_recently_used = node.prev
+        if node.prev is not None:
+            node.prev.next_key = node.next_key
+        if node.next_key is not _null_key:
+            node_next = self._cache[node.next_key]
+            node_next.prev = node.prev
+        # INSERT
+        node.next_key = self._most_recently_used.key
+        self._most_recently_used.prev = node
+        self._most_recently_used = node
+        node.prev = None
+
+    def _remove_node(self, node):
+        if node is self._least_recently_used:
+            self._least_recently_used = node.prev
+        self._cache.pop(node.key)
+        # If we have removed all entries, remove the head pointer as well
+        if self._least_recently_used is None:
+            self._most_recently_used = None
+        node.run_cleanup()
+        # Now remove this node from the linked list
+        if node.prev is not None:
+            node.prev.next_key = node.next_key
+        if node.next_key is not _null_key:
+            node_next = self._cache[node.next_key]
+            node_next.prev = node.prev
+        # And remove this node's pointers
+        node.prev = None
+        node.next_key = _null_key
+
+    def _remove_lru(self):
+        """Remove one entry from the lru, and handle consequences.
+
+        If there are no more references to the lru, then this entry should be
+        removed from the cache.
+        """
+        self._remove_node(self._least_recently_used)
+
+    def clear(self):
+        """Clear out all of the cache."""
+        # Clean up in LRU order
+        while self._cache:
+            self._remove_lru()
+
+    def resize(self, max_cache, after_cleanup_count=None):
+        """Change the number of entries that will be cached."""
+        self._update_max_cache(max_cache,
+                               after_cleanup_count=after_cleanup_count)
+
+    def _update_max_cache(self, max_cache, after_cleanup_count=None):
+        self._max_cache = max_cache
+        if after_cleanup_count is None:
+            self._after_cleanup_count = self._max_cache * 8 / 10
+        else:
+            self._after_cleanup_count = min(after_cleanup_count,
+                                            self._max_cache)
+        self.cleanup()
+
+
+class LRUSizeCache(LRUCache):
+    """An LRUCache that removes things based on the size of the values.
+
+    This differs in that it doesn't care how many actual items there are,
+    it just restricts the cache to be cleaned up after so much data is stored.
+
+    The size of items added will be computed using compute_size(value), which
+    defaults to len() if not supplied.
+    """
+
+    def __init__(self, max_size=1024*1024, after_cleanup_size=None,
+                 compute_size=None):
+        """Create a new LRUSizeCache.
+
+        :param max_size: The max number of bytes to store before we start
+            clearing out entries.
+        :param after_cleanup_size: After cleaning up, shrink everything to this
+            size.
+        :param compute_size: A function to compute the size of the values. We
+            use a function here, so that you can pass 'len' if you are just
+            using simple strings, or a more complex function if you are using
+            something like a list of strings, or even a custom object.
+            The function should take the form "compute_size(value) => integer".
+            If not supplied, it defaults to 'len()'
+        """
+        self._value_size = 0
+        self._compute_size = compute_size
+        if compute_size is None:
+            self._compute_size = len
+        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
+        LRUCache.__init__(self, max_cache=max(int(max_size/512), 1))
+
+    def add(self, key, value, cleanup=None):
+        """Add a new value to the cache.
+
+        Also, if the entry is ever removed from the cache, call
+        cleanup(key, value).
+
+        :param key: The key to store it under
+        :param value: The object to store
+        :param cleanup: None or a function taking (key, value) to indicate
+                        'value' should be cleaned up.
+        """
+        if key is _null_key:
+            raise ValueError('cannot use _null_key as a key')
+        node = self._cache.get(key, None)
+        value_len = self._compute_size(value)
+        if value_len >= self._after_cleanup_size:
+            # The new value is 'too big to fit', as it would fill up/overflow
+            # the cache all by itself
+            if node is not None:
+                # We won't be replacing the old node, so just remove it
+                self._remove_node(node)
+            if cleanup is not None:
+                cleanup(key, value)
+            return
+        if node is None:
+            node = _LRUNode(key, value, cleanup=cleanup)
+            self._cache[key] = node
+        else:
+            self._value_size -= node.size
+        node.size = value_len
+        self._value_size += value_len
+        self._record_access(node)
+
+        if self._value_size > self._max_size:
+            # Time to cleanup
+            self.cleanup()
+
+    def cleanup(self):
+        """Clear the cache until it shrinks to the requested size.
+
+        This does not completely wipe the cache, just makes sure it is under
+        the after_cleanup_size.
+        """
+        # Make sure the cache is shrunk to the correct size
+        while self._value_size > self._after_cleanup_size:
+            self._remove_lru()
+
+    def _remove_node(self, node):
+        self._value_size -= node.size
+        LRUCache._remove_node(self, node)
+
+    def resize(self, max_size, after_cleanup_size=None):
+        """Change the number of bytes that will be cached."""
+        self._update_max_size(max_size, after_cleanup_size=after_cleanup_size)
+        max_cache = max(int(max_size/512), 1)
+        self._update_max_cache(max_cache)
+
+    def _update_max_size(self, max_size, after_cleanup_size=None):
+        self._max_size = max_size
+        if after_cleanup_size is None:
+            self._after_cleanup_size = self._max_size * 8 // 10
+        else:
+            self._after_cleanup_size = min(after_cleanup_size, self._max_size)

+ 1128 - 0
dulwich/object_store.py

@@ -0,0 +1,1128 @@
+# object_store.py -- Object store for git objects
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+#                         and others
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+
+"""Git object store interfaces and implementation."""
+
+from io import BytesIO
+import errno
+from itertools import chain
+import os
+import stat
+import sys
+import tempfile
+import time
+
+from dulwich.diff_tree import (
+    tree_changes,
+    walk_trees,
+    )
+from dulwich.errors import (
+    NotTreeError,
+    )
+from dulwich.file import GitFile
+from dulwich.objects import (
+    Commit,
+    ShaFile,
+    Tag,
+    Tree,
+    ZERO_SHA,
+    hex_to_sha,
+    sha_to_hex,
+    hex_to_filename,
+    S_ISGITLINK,
+    object_class,
+    )
+from dulwich.pack import (
+    Pack,
+    PackData,
+    PackInflater,
+    iter_sha1,
+    write_pack_header,
+    write_pack_index_v2,
+    write_pack_object,
+    write_pack_objects,
+    compute_file_sha,
+    PackIndexer,
+    PackStreamCopier,
+    )
+
+INFODIR = 'info'
+PACKDIR = 'pack'
+
+
+class BaseObjectStore(object):
+    """Object store interface."""
+
+    def determine_wants_all(self, refs):
+        return [sha for (ref, sha) in refs.items()
+                if sha not in self and not ref.endswith(b"^{}") and
+                not sha == ZERO_SHA]
+
+    def iter_shas(self, shas):
+        """Iterate over the objects for the specified shas.
+
+        :param shas: Iterable object with SHAs
+        :return: Object iterator
+        """
+        return ObjectStoreIterator(self, shas)
+
+    def contains_loose(self, sha):
+        """Check if a particular object is present by SHA1 and is loose."""
+        raise NotImplementedError(self.contains_loose)
+
+    def contains_packed(self, sha):
+        """Check if a particular object is present by SHA1 and is packed."""
+        raise NotImplementedError(self.contains_packed)
+
+    def __contains__(self, sha):
+        """Check if a particular object is present by SHA1.
+
+        This method makes no distinction between loose and packed objects.
+        """
+        return self.contains_packed(sha) or self.contains_loose(sha)
+
+    @property
+    def packs(self):
+        """Iterable of pack objects."""
+        raise NotImplementedError
+
+    def get_raw(self, name):
+        """Obtain the raw text for an object.
+
+        :param name: sha for the object.
+        :return: tuple with numeric type and object contents.
+        """
+        raise NotImplementedError(self.get_raw)
+
+    def __getitem__(self, sha):
+        """Obtain an object by SHA1."""
+        type_num, uncomp = self.get_raw(sha)
+        return ShaFile.from_raw_string(type_num, uncomp, sha=sha)
+
+    def __iter__(self):
+        """Iterate over the SHAs that are present in this store."""
+        raise NotImplementedError(self.__iter__)
+
+    def add_object(self, obj):
+        """Add a single object to this object store.
+
+        """
+        raise NotImplementedError(self.add_object)
+
+    def add_objects(self, objects):
+        """Add a set of objects to this object store.
+
+        :param objects: Iterable over a list of (object, path) tuples
+        """
+        raise NotImplementedError(self.add_objects)
+
+    def tree_changes(self, source, target, want_unchanged=False):
+        """Find the differences between the contents of two trees
+
+        :param source: SHA1 of the source tree
+        :param target: SHA1 of the target tree
+        :param want_unchanged: Whether unchanged files should be reported
+        :return: Iterator over tuples with
+            (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
+        """
+        for change in tree_changes(self, source, target,
+                                   want_unchanged=want_unchanged):
+            yield ((change.old.path, change.new.path),
+                   (change.old.mode, change.new.mode),
+                   (change.old.sha, change.new.sha))
+
+    def iter_tree_contents(self, tree_id, include_trees=False):
+        """Iterate the contents of a tree and all subtrees.
+
+        Iteration is depth-first pre-order, as in e.g. os.walk.
+
+        :param tree_id: SHA1 of the tree.
+        :param include_trees: If True, include tree objects in the iteration.
+        :return: Iterator over TreeEntry namedtuples for all the objects in a
+            tree.
+        """
+        for entry, _ in walk_trees(self, tree_id, None):
+            if not stat.S_ISDIR(entry.mode) or include_trees:
+                yield entry
+
+    def find_missing_objects(self, haves, wants, progress=None,
+                             get_tagged=None,
+                             get_parents=lambda commit: commit.parents):
+        """Find the missing objects required for a set of revisions.
+
+        :param haves: Iterable over SHAs already in common.
+        :param wants: Iterable over SHAs of objects to fetch.
+        :param progress: Simple progress function that will be called with
+            updated progress strings.
+        :param get_tagged: Function that returns a dict of pointed-to sha ->
+            tag sha for including tags.
+        :param get_parents: Optional function for getting the parents of a
+            commit.
+        :return: Iterator over (sha, path) pairs.
+        """
+        finder = MissingObjectFinder(self, haves, wants, progress, get_tagged,
+                                     get_parents=get_parents)
+        return iter(finder.next, None)
+
+    def find_common_revisions(self, graphwalker):
+        """Find which revisions this store has in common using graphwalker.
+
+        :param graphwalker: A graphwalker object.
+        :return: List of SHAs that are in common
+        """
+        haves = []
+        sha = next(graphwalker)
+        while sha:
+            if sha in self:
+                haves.append(sha)
+                graphwalker.ack(sha)
+            sha = next(graphwalker)
+        return haves
+
+    def generate_pack_contents(self, have, want, progress=None):
+        """Iterate over the contents of a pack file.
+
+        :param have: List of SHA1s of objects that should not be sent
+        :param want: List of SHA1s of objects that should be sent
+        :param progress: Optional progress reporting method
+        """
+        return self.iter_shas(self.find_missing_objects(have, want, progress))
+
+    def peel_sha(self, sha):
+        """Peel all tags from a SHA.
+
+        :param sha: The object SHA to peel.
+        :return: The fully-peeled SHA1 of a tag object, after peeling all
+            intermediate tags; if the original ref does not point to a tag,
+            this will equal the original SHA1.
+        """
+        obj = self[sha]
+        obj_class = object_class(obj.type_name)
+        while obj_class is Tag:
+            obj_class, sha = obj.object
+            obj = self[sha]
+        return obj
+
+    def _collect_ancestors(self, heads, common=set(),
+                           get_parents=lambda commit: commit.parents):
+        """Collect all ancestors of heads up to (excluding) those in common.
+
+        :param heads: commits to start from
+        :param common: commits to end at, or empty set to walk repository
+            completely
+        :param get_parents: Optional function for getting the parents of a
+            commit.
+        :return: a tuple (A, B) where A - all commits reachable
+            from heads but not present in common, B - common (shared) elements
+            that are directly reachable from heads
+        """
+        bases = set()
+        commits = set()
+        queue = []
+        queue.extend(heads)
+        while queue:
+            e = queue.pop(0)
+            if e in common:
+                bases.add(e)
+            elif e not in commits:
+                commits.add(e)
+                cmt = self[e]
+                queue.extend(get_parents(cmt))
+        return (commits, bases)
+
+    def close(self):
+        """Close any files opened by this object store."""
+        # Default implementation is a NO-OP
+
+
+class PackBasedObjectStore(BaseObjectStore):
+
+    def __init__(self):
+        self._pack_cache = {}
+
+    @property
+    def alternates(self):
+        return []
+
+    def contains_packed(self, sha):
+        """Check if a particular object is present by SHA1 and is packed.
+
+        This does not check alternates.
+        """
+        for pack in self.packs:
+            if sha in pack:
+                return True
+        return False
+
+    def __contains__(self, sha):
+        """Check if a particular object is present by SHA1.
+
+        This method makes no distinction between loose and packed objects.
+        """
+        if self.contains_packed(sha) or self.contains_loose(sha):
+            return True
+        for alternate in self.alternates:
+            if sha in alternate:
+                return True
+        return False
+
+    def _pack_cache_stale(self):
+        """Check whether the pack cache is stale."""
+        raise NotImplementedError(self._pack_cache_stale)
+
+    def _add_known_pack(self, base_name, pack):
+        """Add a newly appeared pack to the cache by path.
+
+        """
+        self._pack_cache[base_name] = pack
+
+    def close(self):
+        pack_cache = self._pack_cache
+        self._pack_cache = {}
+        while pack_cache:
+            (name, pack) = pack_cache.popitem()
+            pack.close()
+
+    @property
+    def packs(self):
+        """List with pack objects."""
+        if self._pack_cache is None or self._pack_cache_stale():
+            self._update_pack_cache()
+
+        return self._pack_cache.values()
+
+    def _iter_alternate_objects(self):
+        """Iterate over the SHAs of all the objects in alternate stores."""
+        for alternate in self.alternates:
+            for alternate_object in alternate:
+                yield alternate_object
+
+    def _iter_loose_objects(self):
+        """Iterate over the SHAs of all loose objects."""
+        raise NotImplementedError(self._iter_loose_objects)
+
+    def _get_loose_object(self, sha):
+        raise NotImplementedError(self._get_loose_object)
+
+    def _remove_loose_object(self, sha):
+        raise NotImplementedError(self._remove_loose_object)
+
+    def pack_loose_objects(self):
+        """Pack loose objects.
+
+        :return: Number of objects packed
+        """
+        objects = set()
+        for sha in self._iter_loose_objects():
+            objects.add((self._get_loose_object(sha), None))
+        self.add_objects(list(objects))
+        for obj, path in objects:
+            self._remove_loose_object(obj.id)
+        return len(objects)
+
+    def __iter__(self):
+        """Iterate over the SHAs that are present in this store."""
+        iterables = (list(self.packs) + [self._iter_loose_objects()] +
+                     [self._iter_alternate_objects()])
+        return chain(*iterables)
+
+    def contains_loose(self, sha):
+        """Check if a particular object is present by SHA1 and is loose.
+
+        This does not check alternates.
+        """
+        return self._get_loose_object(sha) is not None
+
+    def get_raw(self, name):
+        """Obtain the raw text for an object.
+
+        :param name: sha for the object.
+        :return: tuple with numeric type and object contents.
+        """
+        if len(name) == 40:
+            sha = hex_to_sha(name)
+            hexsha = name
+        elif len(name) == 20:
+            sha = name
+            hexsha = None
+        else:
+            raise AssertionError("Invalid object name %r" % name)
+        for pack in self.packs:
+            try:
+                return pack.get_raw(sha)
+            except KeyError:
+                pass
+        if hexsha is None:
+            hexsha = sha_to_hex(name)
+        ret = self._get_loose_object(hexsha)
+        if ret is not None:
+            return ret.type_num, ret.as_raw_string()
+        for alternate in self.alternates:
+            try:
+                return alternate.get_raw(hexsha)
+            except KeyError:
+                pass
+        raise KeyError(hexsha)
+
+    def add_objects(self, objects):
+        """Add a set of objects to this object store.
+
+        :param objects: Iterable over (object, path) tuples, should support
+            __len__.
+        :return: Pack object of the objects written.
+        """
+        if len(objects) == 0:
+            # Don't bother writing an empty pack file
+            return
+        f, commit, abort = self.add_pack()
+        try:
+            write_pack_objects(f, objects)
+        except:
+            abort()
+            raise
+        else:
+            return commit()
+
+
+class DiskObjectStore(PackBasedObjectStore):
+    """Git-style object store that exists on disk."""
+
+    def __init__(self, path):
+        """Open an object store.
+
+        :param path: Path of the object store.
+        """
+        super(DiskObjectStore, self).__init__()
+        self.path = path
+        self.pack_dir = os.path.join(self.path, PACKDIR)
+        self._pack_cache_time = 0
+        self._pack_cache = {}
+        self._alternates = None
+
+    def __repr__(self):
+        return "<%s(%r)>" % (self.__class__.__name__, self.path)
+
+    @property
+    def alternates(self):
+        if self._alternates is not None:
+            return self._alternates
+        self._alternates = []
+        for path in self._read_alternate_paths():
+            self._alternates.append(DiskObjectStore(path))
+        return self._alternates
+
+    def _read_alternate_paths(self):
+        try:
+            f = GitFile(os.path.join(self.path, INFODIR, "alternates"), 'rb')
+        except (OSError, IOError) as e:
+            if e.errno == errno.ENOENT:
+                return
+            raise
+        with f:
+            for l in f.readlines():
+                l = l.rstrip(b"\n")
+                if l[0] == b"#":
+                    continue
+                if os.path.isabs(l):
+                    yield l.decode(sys.getfilesystemencoding())
+                else:
+                    yield os.path.join(self.path, l).decode(
+                        sys.getfilesystemencoding())
+
+    def add_alternate_path(self, path):
+        """Add an alternate path to this object store.
+        """
+        try:
+            os.mkdir(os.path.join(self.path, INFODIR))
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+        alternates_path = os.path.join(self.path, INFODIR, "alternates")
+        with GitFile(alternates_path, 'wb') as f:
+            try:
+                orig_f = open(alternates_path, 'rb')
+            except (OSError, IOError) as e:
+                if e.errno != errno.ENOENT:
+                    raise
+            else:
+                with orig_f:
+                    f.write(orig_f.read())
+            f.write(path.encode(sys.getfilesystemencoding()) + b"\n")
+
+        if not os.path.isabs(path):
+            path = os.path.join(self.path, path)
+        self.alternates.append(DiskObjectStore(path))
+
+    def _update_pack_cache(self):
+        try:
+            pack_dir_contents = os.listdir(self.pack_dir)
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                self._pack_cache_time = 0
+                self.close()
+                return
+            raise
+        self._pack_cache_time = max(
+                os.stat(self.pack_dir).st_mtime, time.time())
+        pack_files = set()
+        for name in pack_dir_contents:
+            if name.startswith("pack-") and name.endswith(".pack"):
+                # verify that idx exists first (otherwise the pack was not yet
+                # fully written)
+                idx_name = os.path.splitext(name)[0] + ".idx"
+                if idx_name in pack_dir_contents:
+                    pack_name = name[:-len(".pack")]
+                    pack_files.add(pack_name)
+
+        # Open newly appeared pack files
+        for f in pack_files:
+            if f not in self._pack_cache:
+                self._pack_cache[f] = Pack(os.path.join(self.pack_dir, f))
+        # Remove disappeared pack files
+        for f in set(self._pack_cache) - pack_files:
+            self._pack_cache.pop(f).close()
+
+    def _pack_cache_stale(self):
+        try:
+            return os.stat(self.pack_dir).st_mtime >= self._pack_cache_time
+        except OSError as e:
+            if e.errno == errno.ENOENT:
+                return True
+            raise
+
+    def _get_shafile_path(self, sha):
+        # Check from object dir
+        return hex_to_filename(self.path, sha)
+
+    def _iter_loose_objects(self):
+        for base in os.listdir(self.path):
+            if len(base) != 2:
+                continue
+            for rest in os.listdir(os.path.join(self.path, base)):
+                yield (base+rest).encode(sys.getfilesystemencoding())
+
+    def _get_loose_object(self, sha):
+        path = self._get_shafile_path(sha)
+        try:
+            return ShaFile.from_path(path)
+        except (OSError, IOError) as e:
+            if e.errno == errno.ENOENT:
+                return None
+            raise
+
+    def _remove_loose_object(self, sha):
+        os.remove(self._get_shafile_path(sha))
+
+    def _get_pack_basepath(self, entries):
+        suffix = iter_sha1(entry[0] for entry in entries)
+        # TODO: Handle self.pack_dir being bytes
+        suffix = suffix.decode('ascii')
+        return os.path.join(self.pack_dir, "pack-" + suffix)
+
+    def _complete_thin_pack(self, f, path, copier, indexer):
+        """Move a specific file containing a pack into the pack directory.
+
+        :note: The file should be on the same file system as the
+            packs directory.
+
+        :param f: Open file object for the pack.
+        :param path: Path to the pack file.
+        :param copier: A PackStreamCopier to use for writing pack data.
+        :param indexer: A PackIndexer for indexing the pack.
+        """
+        entries = list(indexer)
+
+        # Update the header with the new number of objects.
+        f.seek(0)
+        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+        # Must flush before reading (http://bugs.python.org/issue3207)
+        f.flush()
+
+        # Rescan the rest of the pack, computing the SHA with the new header.
+        new_sha = compute_file_sha(f, end_ofs=-20)
+
+        # Must reposition before writing (http://bugs.python.org/issue3207)
+        f.seek(0, os.SEEK_CUR)
+
+        # Complete the pack.
+        for ext_sha in indexer.ext_refs():
+            assert len(ext_sha) == 20
+            type_num, data = self.get_raw(ext_sha)
+            offset = f.tell()
+            crc32 = write_pack_object(f, type_num, data, sha=new_sha)
+            entries.append((ext_sha, offset, crc32))
+        pack_sha = new_sha.digest()
+        f.write(pack_sha)
+        f.close()
+
+        # Move the pack in.
+        entries.sort()
+        pack_base_name = self._get_pack_basepath(entries)
+        if sys.platform == 'win32':
+            try:
+                os.rename(path, pack_base_name + '.pack')
+            except WindowsError:
+                os.remove(pack_base_name + '.pack')
+                os.rename(path, pack_base_name + '.pack')
+        else:
+            os.rename(path, pack_base_name + '.pack')
+
+        # Write the index.
+        index_file = GitFile(pack_base_name + '.idx', 'wb')
+        try:
+            write_pack_index_v2(index_file, entries, pack_sha)
+            index_file.close()
+        finally:
+            index_file.abort()
+
+        # Add the pack to the store and return it.
+        final_pack = Pack(pack_base_name)
+        final_pack.check_length_and_checksum()
+        self._add_known_pack(pack_base_name, final_pack)
+        return final_pack
+
+    def add_thin_pack(self, read_all, read_some):
+        """Add a new thin pack to this object store.
+
+        Thin packs are packs that contain deltas with parents that exist
+        outside the pack. They should never be placed in the object store
+        directly, and always indexed and completed as they are copied.
+
+        :param read_all: Read function that blocks until the number of
+            requested bytes are read.
+        :param read_some: Read function that returns at least one byte, but may
+            not return the number of bytes requested.
+        :return: A Pack object pointing at the now-completed thin pack in the
+            objects/pack directory.
+        """
+        fd, path = tempfile.mkstemp(dir=self.path, prefix='tmp_pack_')
+        with os.fdopen(fd, 'w+b') as f:
+            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+            copier = PackStreamCopier(read_all, read_some, f,
+                                      delta_iter=indexer)
+            copier.verify()
+            return self._complete_thin_pack(f, path, copier, indexer)
+
+    def move_in_pack(self, path):
+        """Move a specific file containing a pack into the pack directory.
+
+        :note: The file should be on the same file system as the
+            packs directory.
+
+        :param path: Path to the pack file.
+        """
+        with PackData(path) as p:
+            entries = p.sorted_entries()
+            basename = self._get_pack_basepath(entries)
+            with GitFile(basename+".idx", "wb") as f:
+                write_pack_index_v2(f, entries, p.get_stored_checksum())
+        os.rename(path, basename + ".pack")
+        final_pack = Pack(basename)
+        self._add_known_pack(basename, final_pack)
+        return final_pack
+
+    def add_pack(self):
+        """Add a new pack to this object store.
+
+        :return: Fileobject to write to, a commit function to
+            call when the pack is finished and an abort
+            function.
+        """
+        fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
+        f = os.fdopen(fd, 'wb')
+
+        def commit():
+            os.fsync(fd)
+            f.close()
+            if os.path.getsize(path) > 0:
+                return self.move_in_pack(path)
+            else:
+                os.remove(path)
+                return None
+
+        def abort():
+            f.close()
+            os.remove(path)
+        return f, commit, abort
+
+    def add_object(self, obj):
+        """Add a single object to this object store.
+
+        :param obj: Object to add
+        """
+        path = self._get_shafile_path(obj.id)
+        dir = os.path.dirname(path)
+        try:
+            os.mkdir(dir)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+        if os.path.exists(path):
+            return  # Already there, no need to write again
+        with GitFile(path, 'wb') as f:
+            f.write(obj.as_legacy_object())
+
+    @classmethod
+    def init(cls, path):
+        try:
+            os.mkdir(path)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+        os.mkdir(os.path.join(path, "info"))
+        os.mkdir(os.path.join(path, PACKDIR))
+        return cls(path)
+
+
+class MemoryObjectStore(BaseObjectStore):
+    """Object store that keeps all objects in memory."""
+
+    def __init__(self):
+        super(MemoryObjectStore, self).__init__()
+        self._data = {}
+
+    def _to_hexsha(self, sha):
+        if len(sha) == 40:
+            return sha
+        elif len(sha) == 20:
+            return sha_to_hex(sha)
+        else:
+            raise ValueError("Invalid sha %r" % (sha,))
+
+    def contains_loose(self, sha):
+        """Check if a particular object is present by SHA1 and is loose."""
+        return self._to_hexsha(sha) in self._data
+
+    def contains_packed(self, sha):
+        """Check if a particular object is present by SHA1 and is packed."""
+        return False
+
+    def __iter__(self):
+        """Iterate over the SHAs that are present in this store."""
+        return iter(self._data.keys())
+
+    @property
+    def packs(self):
+        """List with pack objects."""
+        return []
+
+    def get_raw(self, name):
+        """Obtain the raw text for an object.
+
+        :param name: sha for the object.
+        :return: tuple with numeric type and object contents.
+        """
+        obj = self[self._to_hexsha(name)]
+        return obj.type_num, obj.as_raw_string()
+
+    def __getitem__(self, name):
+        return self._data[self._to_hexsha(name)].copy()
+
+    def __delitem__(self, name):
+        """Delete an object from this store, for testing only."""
+        del self._data[self._to_hexsha(name)]
+
+    def add_object(self, obj):
+        """Add a single object to this object store.
+
+        """
+        self._data[obj.id] = obj.copy()
+
+    def add_objects(self, objects):
+        """Add a set of objects to this object store.
+
+        :param objects: Iterable over a list of (object, path) tuples
+        """
+        for obj, path in objects:
+            self.add_object(obj)
+
+    def add_pack(self):
+        """Add a new pack to this object store.
+
+        Because this object store doesn't support packs, we extract and add the
+        individual objects.
+
+        :return: Fileobject to write to and a commit function to
+            call when the pack is finished.
+        """
+        f = BytesIO()
+
+        def commit():
+            p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
+            f.close()
+            for obj in PackInflater.for_pack_data(p, self.get_raw):
+                self.add_object(obj)
+
+        def abort():
+            pass
+        return f, commit, abort
+
+    def _complete_thin_pack(self, f, indexer):
+        """Complete a thin pack by adding external references.
+
+        :param f: Open file object for the pack.
+        :param indexer: A PackIndexer for indexing the pack.
+        """
+        entries = list(indexer)
+
+        # Update the header with the new number of objects.
+        f.seek(0)
+        write_pack_header(f, len(entries) + len(indexer.ext_refs()))
+
+        # Rescan the rest of the pack, computing the SHA with the new header.
+        new_sha = compute_file_sha(f, end_ofs=-20)
+
+        # Complete the pack.
+        for ext_sha in indexer.ext_refs():
+            assert len(ext_sha) == 20
+            type_num, data = self.get_raw(ext_sha)
+            write_pack_object(f, type_num, data, sha=new_sha)
+        pack_sha = new_sha.digest()
+        f.write(pack_sha)
+
+    def add_thin_pack(self, read_all, read_some):
+        """Add a new thin pack to this object store.
+
+        Thin packs are packs that contain deltas with parents that exist
+        outside the pack. Because this object store doesn't support packs, we
+        extract and add the individual objects.
+
+        :param read_all: Read function that blocks until the number of
+            requested bytes are read.
+        :param read_some: Read function that returns at least one byte, but may
+            not return the number of bytes requested.
+        """
+        f, commit, abort = self.add_pack()
+        try:
+            indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
+            copier = PackStreamCopier(read_all, read_some, f,
+                                      delta_iter=indexer)
+            copier.verify()
+            self._complete_thin_pack(f, indexer)
+        except:
+            abort()
+            raise
+        else:
+            commit()
+
+
+class ObjectImporter(object):
+    """Interface for importing objects."""
+
+    def __init__(self, count):
+        """Create a new ObjectImporter.
+
+        :param count: Number of objects that's going to be imported.
+        """
+        self.count = count
+
+    def add_object(self, object):
+        """Add an object."""
+        raise NotImplementedError(self.add_object)
+
+    def finish(self, object):
+        """Finish the import and write objects to disk."""
+        raise NotImplementedError(self.finish)
+
+
+class ObjectIterator(object):
+    """Interface for iterating over objects."""
+
+    def iterobjects(self):
+        raise NotImplementedError(self.iterobjects)
+
+
+class ObjectStoreIterator(ObjectIterator):
+    """ObjectIterator that works on top of an ObjectStore."""
+
+    def __init__(self, store, sha_iter):
+        """Create a new ObjectIterator.
+
+        :param store: Object store to retrieve from
+        :param sha_iter: Iterator over (sha, path) tuples
+        """
+        self.store = store
+        self.sha_iter = sha_iter
+        self._shas = []
+
+    def __iter__(self):
+        """Yield tuple with next object and path."""
+        for sha, path in self.itershas():
+            yield self.store[sha], path
+
+    def iterobjects(self):
+        """Iterate over just the objects."""
+        for o, path in self:
+            yield o
+
+    def itershas(self):
+        """Iterate over the SHAs."""
+        for sha in self._shas:
+            yield sha
+        for sha in self.sha_iter:
+            self._shas.append(sha)
+            yield sha
+
+    def __contains__(self, needle):
+        """Check if an object is present.
+
+        :note: This checks if the object is present in
+            the underlying object store, not if it would
+            be yielded by the iterator.
+
+        :param needle: SHA1 of the object to check for
+        """
+        return needle in self.store
+
+    def __getitem__(self, key):
+        """Find an object by SHA1.
+
+        :note: This retrieves the object from the underlying
+            object store. It will also succeed if the object would
+            not be returned by the iterator.
+        """
+        return self.store[key]
+
+    def __len__(self):
+        """Return the number of objects."""
+        return len(list(self.itershas()))
+
+
+def tree_lookup_path(lookup_obj, root_sha, path):
+    """Look up an object in a Git tree.
+
+    :param lookup_obj: Callback for retrieving object by SHA1
+    :param root_sha: SHA1 of the root tree
+    :param path: Path to lookup
+    :return: A tuple of (mode, SHA) of the resulting path.
+    """
+    tree = lookup_obj(root_sha)
+    if not isinstance(tree, Tree):
+        raise NotTreeError(root_sha)
+    return tree.lookup_path(lookup_obj, path)
+
+
+def _collect_filetree_revs(obj_store, tree_sha, kset):
+    """Collect SHA1s of files and directories for specified tree.
+
+    :param obj_store: Object store to get objects by SHA from
+    :param tree_sha: tree reference to walk
+    :param kset: set to fill with references to files and directories
+    """
+    filetree = obj_store[tree_sha]
+    for name, mode, sha in filetree.iteritems():
+        if not S_ISGITLINK(mode) and sha not in kset:
+            kset.add(sha)
+            if stat.S_ISDIR(mode):
+                _collect_filetree_revs(obj_store, sha, kset)
+
+
+def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
+    """Split object id list into three lists with commit, tag, and other SHAs.
+
+    Commits referenced by tags are included into commits
+    list as well. Only SHA1s known in this repository will get
+    through, and unless ignore_unknown argument is True, KeyError
+    is thrown for SHA1 missing in the repository
+
+    :param obj_store: Object store to get objects by SHA1 from
+    :param lst: Collection of commit and tag SHAs
+    :param ignore_unknown: True to skip SHA1 missing in the repository
+        silently.
+    :return: A tuple of (commits, tags, others) SHA1s
+    """
+    commits = set()
+    tags = set()
+    others = set()
+    for e in lst:
+        try:
+            o = obj_store[e]
+        except KeyError:
+            if not ignore_unknown:
+                raise
+        else:
+            if isinstance(o, Commit):
+                commits.add(e)
+            elif isinstance(o, Tag):
+                tags.add(e)
+                tagged = o.object[1]
+                c, t, o = _split_commits_and_tags(
+                    obj_store, [tagged], ignore_unknown=ignore_unknown)
+                commits |= c
+                tags |= t
+                others |= o
+            else:
+                others.add(e)
+    return (commits, tags, others)
+
+
+class MissingObjectFinder(object):
+    """Find the objects missing from another object store.
+
+    :param object_store: Object store containing at least all objects to be
+        sent
+    :param haves: SHA1s of commits not to send (already present in target)
+    :param wants: SHA1s of commits to send
+    :param progress: Optional function to report progress to.
+    :param get_tagged: Function that returns a dict of pointed-to sha -> tag
+        sha for including tags.
+    :param get_parents: Optional function for getting the parents of a commit.
+    :param tagged: dict of pointed-to sha -> tag sha for including tags
+    """
+
+    def __init__(self, object_store, haves, wants, progress=None,
+                 get_tagged=None, get_parents=lambda commit: commit.parents):
+        self.object_store = object_store
+        self._get_parents = get_parents
+        # process Commits and Tags differently
+        # Note, while haves may list commits/tags not available locally,
+        # and such SHAs would get filtered out by _split_commits_and_tags,
+        # wants shall list only known SHAs, and otherwise
+        # _split_commits_and_tags fails with KeyError
+        have_commits, have_tags, have_others = (
+            _split_commits_and_tags(object_store, haves, True))
+        want_commits, want_tags, want_others = (
+            _split_commits_and_tags(object_store, wants, False))
+        # all_ancestors is a set of commits that shall not be sent
+        # (complete repository up to 'haves')
+        all_ancestors = object_store._collect_ancestors(
+            have_commits, get_parents=self._get_parents)[0]
+        # all_missing - complete set of commits between haves and wants
+        # common - commits from all_ancestors we hit into while
+        # traversing parent hierarchy of wants
+        missing_commits, common_commits = object_store._collect_ancestors(
+            want_commits, all_ancestors, get_parents=self._get_parents)
+        self.sha_done = set()
+        # Now, fill sha_done with commits and revisions of
+        # files and directories known to be both locally
+        # and on target. Thus these commits and files
+        # won't get selected for fetch
+        for h in common_commits:
+            self.sha_done.add(h)
+            cmt = object_store[h]
+            _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
+        # record tags we have as visited, too
+        for t in have_tags:
+            self.sha_done.add(t)
+
+        missing_tags = want_tags.difference(have_tags)
+        missing_others = want_others.difference(have_others)
+        # in fact, what we 'want' is commits, tags, and others
+        # we've found missing
+        wants = missing_commits.union(missing_tags)
+        wants = wants.union(missing_others)
+
+        self.objects_to_send = set([(w, None, False) for w in wants])
+
+        if progress is None:
+            self.progress = lambda x: None
+        else:
+            self.progress = progress
+        self._tagged = get_tagged and get_tagged() or {}
+
+    def add_todo(self, entries):
+        self.objects_to_send.update([e for e in entries
+                                     if not e[0] in self.sha_done])
+
+    def next(self):
+        while True:
+            if not self.objects_to_send:
+                return None
+            (sha, name, leaf) = self.objects_to_send.pop()
+            if sha not in self.sha_done:
+                break
+        if not leaf:
+            o = self.object_store[sha]
+            if isinstance(o, Commit):
+                self.add_todo([(o.tree, "", False)])
+            elif isinstance(o, Tree):
+                self.add_todo([(s, n, not stat.S_ISDIR(m))
+                               for n, m, s in o.iteritems()
+                               if not S_ISGITLINK(m)])
+            elif isinstance(o, Tag):
+                self.add_todo([(o.object[1], None, False)])
+        if sha in self._tagged:
+            self.add_todo([(self._tagged[sha], None, True)])
+        self.sha_done.add(sha)
+        self.progress(("counting objects: %d\r" %
+                       len(self.sha_done)).encode('ascii'))
+        return (sha, name)
+
+    __next__ = next
+
+
+class ObjectStoreGraphWalker(object):
+    """Graph walker that finds what commits are missing from an object store.
+
+    :ivar heads: Revisions without descendants in the local repo
+    :ivar get_parents: Function to retrieve parents in the local repo
+    """
+
+    def __init__(self, local_heads, get_parents):
+        """Create a new instance.
+
+        :param local_heads: Heads to start search with
+        :param get_parents: Function for finding the parents of a SHA1.
+        """
+        self.heads = set(local_heads)
+        self.get_parents = get_parents
+        self.parents = {}
+
+    def ack(self, sha):
+        """Ack that a revision and its ancestors are present in the source."""
+        if len(sha) != 40:
+            raise ValueError("unexpected sha %r received" % sha)
+        ancestors = set([sha])
+
+        # stop if we run out of heads to remove
+        while self.heads:
+            for a in ancestors:
+                if a in self.heads:
+                    self.heads.remove(a)
+
+            # collect all ancestors
+            new_ancestors = set()
+            for a in ancestors:
+                ps = self.parents.get(a)
+                if ps is not None:
+                    new_ancestors.update(ps)
+                self.parents[a] = None
+
+            # no more ancestors; stop
+            if not new_ancestors:
+                break
+
+            ancestors = new_ancestors
+
+    def next(self):
+        """Iterate over ancestors of heads in the target."""
+        if self.heads:
+            ret = self.heads.pop()
+            ps = self.get_parents(ret)
+            self.parents[ret] = ps
+            self.heads.update(
+                [p for p in ps if p not in self.parents])
+            return ret
+        return None
+
+    __next__ = next

+ 1336 - 0
dulwich/objects.py

@@ -0,0 +1,1336 @@
+# objects.py -- Access to base git objects
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Access to base git objects."""
+
+import binascii
+from io import BytesIO
+from collections import namedtuple
+import os
+import posixpath
+import stat
+import warnings
+import zlib
+from hashlib import sha1
+
+from dulwich.errors import (
+    ChecksumMismatch,
+    NotBlobError,
+    NotCommitError,
+    NotTagError,
+    NotTreeError,
+    ObjectFormatException,
+    )
+from dulwich.file import GitFile
+
+
+ZERO_SHA = b'0' * 40
+
+# Header fields for commits
+_TREE_HEADER = b'tree'
+_PARENT_HEADER = b'parent'
+_AUTHOR_HEADER = b'author'
+_COMMITTER_HEADER = b'committer'
+_ENCODING_HEADER = b'encoding'
+_MERGETAG_HEADER = b'mergetag'
+_GPGSIG_HEADER = b'gpgsig'
+
+# Header fields for objects
+_OBJECT_HEADER = b'object'
+_TYPE_HEADER = b'type'
+_TAG_HEADER = b'tag'
+_TAGGER_HEADER = b'tagger'
+
+
+S_IFGITLINK = 0o160000
+
+
+def S_ISGITLINK(m):
+    """Check if a mode indicates a submodule.
+
+    :param m: Mode to check
+    :return: a ``boolean``
+    """
+    return (stat.S_IFMT(m) == S_IFGITLINK)
+
+
+def _decompress(string):
+    dcomp = zlib.decompressobj()
+    dcomped = dcomp.decompress(string)
+    dcomped += dcomp.flush()
+    return dcomped
+
+
+def sha_to_hex(sha):
+    """Takes a string and returns the hex of the sha within"""
+    hexsha = binascii.hexlify(sha)
+    assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
+    return hexsha
+
+
+def hex_to_sha(hex):
+    """Takes a hex sha and returns a binary sha"""
+    assert len(hex) == 40, "Incorrect length of hexsha: %s" % hex
+    try:
+        return binascii.unhexlify(hex)
+    except TypeError as exc:
+        if not isinstance(hex, bytes):
+            raise
+        raise ValueError(exc.args[0])
+
+
+def valid_hexsha(hex):
+    if len(hex) != 40:
+        return False
+    try:
+        binascii.unhexlify(hex)
+    except (TypeError, binascii.Error):
+        return False
+    else:
+        return True
+
+
+def hex_to_filename(path, hex):
+    """Takes a hex sha and returns its filename relative to the given path."""
+    # os.path.join accepts bytes or unicode, but all args must be of the same
+    # type. Make sure that hex which is expected to be bytes, is the same type
+    # as path.
+    if getattr(path, 'encode', None) is not None:
+        hex = hex.decode('ascii')
+    dir = hex[:2]
+    file = hex[2:]
+    # Check from object dir
+    return os.path.join(path, dir, file)
+
+
+def filename_to_hex(filename):
+    """Takes an object filename and returns its corresponding hex sha."""
+    # grab the last (up to) two path components
+    names = filename.rsplit(os.path.sep, 2)[-2:]
+    errmsg = "Invalid object filename: %s" % filename
+    assert len(names) == 2, errmsg
+    base, rest = names
+    assert len(base) == 2 and len(rest) == 38, errmsg
+    hex = (base + rest).encode('ascii')
+    hex_to_sha(hex)
+    return hex
+
+
+def object_header(num_type, length):
+    """Return an object header for the given numeric type and text length."""
+    return (object_class(num_type).type_name +
+            b' ' + str(length).encode('ascii') + b'\0')
+
+
+def serializable_property(name, docstring=None):
+    """A property that helps tracking whether serialization is necessary.
+    """
+    def set(obj, value):
+        setattr(obj, "_"+name, value)
+        obj._needs_serialization = True
+
+    def get(obj):
+        return getattr(obj, "_"+name)
+    return property(get, set, doc=docstring)
+
+
+def object_class(type):
+    """Get the object class corresponding to the given type.
+
+    :param type: Either a type name string or a numeric type.
+    :return: The ShaFile subclass corresponding to the given type, or None if
+        type is not a valid type name/number.
+    """
+    return _TYPE_MAP.get(type, None)
+
+
+def check_hexsha(hex, error_msg):
+    """Check if a string is a valid hex sha string.
+
+    :param hex: Hex string to check
+    :param error_msg: Error message to use in exception
+    :raise ObjectFormatException: Raised when the string is not valid
+    """
+    if not valid_hexsha(hex):
+        raise ObjectFormatException("%s %s" % (error_msg, hex))
+
+
+def check_identity(identity, error_msg):
+    """Check if the specified identity is valid.
+
+    This will raise an exception if the identity is not valid.
+
+    :param identity: Identity string
+    :param error_msg: Error message to use in exception
+    """
+    email_start = identity.find(b'<')
+    email_end = identity.find(b'>')
+    if (email_start < 0 or email_end < 0 or email_end <= email_start
+            or identity.find(b'<', email_start + 1) >= 0
+            or identity.find(b'>', email_end + 1) >= 0
+            or not identity.endswith(b'>')):
+        raise ObjectFormatException(error_msg)
+
+
+def git_line(*items):
+    """Formats items into a space sepreated line."""
+    return b' '.join(items) + b'\n'
+
+
+class FixedSha(object):
+    """SHA object that behaves like hashlib's but is given a fixed value."""
+
+    __slots__ = ('_hexsha', '_sha')
+
+    def __init__(self, hexsha):
+        if getattr(hexsha, 'encode', None) is not None:
+            hexsha = hexsha.encode('ascii')
+        if not isinstance(hexsha, bytes):
+            raise TypeError('Expected bytes for hexsha, got %r' % hexsha)
+        self._hexsha = hexsha
+        self._sha = hex_to_sha(hexsha)
+
+    def digest(self):
+        """Return the raw SHA digest."""
+        return self._sha
+
+    def hexdigest(self):
+        """Return the hex SHA digest."""
+        return self._hexsha.decode('ascii')
+
+
+class ShaFile(object):
+    """A git SHA file."""
+
+    __slots__ = ('_chunked_text', '_sha', '_needs_serialization')
+
+    @staticmethod
+    def _parse_legacy_object_header(magic, f):
+        """Parse a legacy object, creating it but not reading the file."""
+        bufsize = 1024
+        decomp = zlib.decompressobj()
+        header = decomp.decompress(magic)
+        start = 0
+        end = -1
+        while end < 0:
+            extra = f.read(bufsize)
+            header += decomp.decompress(extra)
+            magic += extra
+            end = header.find(b'\0', start)
+            start = len(header)
+        header = header[:end]
+        type_name, size = header.split(b' ', 1)
+        size = int(size)  # sanity check
+        obj_class = object_class(type_name)
+        if not obj_class:
+            raise ObjectFormatException("Not a known type: %s" % type_name)
+        return obj_class()
+
+    def _parse_legacy_object(self, map):
+        """Parse a legacy object, setting the raw string."""
+        text = _decompress(map)
+        header_end = text.find(b'\0')
+        if header_end < 0:
+            raise ObjectFormatException("Invalid object header, no \\0")
+        self.set_raw_string(text[header_end+1:])
+
+    def as_legacy_object_chunks(self):
+        """Return chunks representing the object in the experimental format.
+
+        :return: List of strings
+        """
+        compobj = zlib.compressobj()
+        yield compobj.compress(self._header())
+        for chunk in self.as_raw_chunks():
+            yield compobj.compress(chunk)
+        yield compobj.flush()
+
+    def as_legacy_object(self):
+        """Return string representing the object in the experimental format.
+        """
+        return b''.join(self.as_legacy_object_chunks())
+
+    def as_raw_chunks(self):
+        """Return chunks with serialization of the object.
+
+        :return: List of strings, not necessarily one per line
+        """
+        if self._needs_serialization:
+            self._sha = None
+            self._chunked_text = self._serialize()
+            self._needs_serialization = False
+        return self._chunked_text
+
+    def as_raw_string(self):
+        """Return raw string with serialization of the object.
+
+        :return: String object
+        """
+        return b''.join(self.as_raw_chunks())
+
+    def __str__(self):
+        """Return raw string serialization of this object."""
+        return self.as_raw_string()
+
+    def __hash__(self):
+        """Return unique hash for this object."""
+        return hash(self.id)
+
+    def as_pretty_string(self):
+        """Return a string representing this object, fit for display."""
+        return self.as_raw_string()
+
+    def set_raw_string(self, text, sha=None):
+        """Set the contents of this object from a serialized string."""
+        if not isinstance(text, bytes):
+            raise TypeError('Expected bytes for text, got %r' % text)
+        self.set_raw_chunks([text], sha)
+
+    def set_raw_chunks(self, chunks, sha=None):
+        """Set the contents of this object from a list of chunks."""
+        self._chunked_text = chunks
+        self._deserialize(chunks)
+        if sha is None:
+            self._sha = None
+        else:
+            self._sha = FixedSha(sha)
+        self._needs_serialization = False
+
+    @staticmethod
+    def _parse_object_header(magic, f):
+        """Parse a new style object, creating it but not reading the file."""
+        num_type = (ord(magic[0:1]) >> 4) & 7
+        obj_class = object_class(num_type)
+        if not obj_class:
+            raise ObjectFormatException("Not a known type %d" % num_type)
+        return obj_class()
+
+    def _parse_object(self, map):
+        """Parse a new style object, setting self._text."""
+        # skip type and size; type must have already been determined, and
+        # we trust zlib to fail if it's otherwise corrupted
+        byte = ord(map[0:1])
+        used = 1
+        while (byte & 0x80) != 0:
+            byte = ord(map[used:used+1])
+            used += 1
+        raw = map[used:]
+        self.set_raw_string(_decompress(raw))
+
+    @classmethod
+    def _is_legacy_object(cls, magic):
+        b0 = ord(magic[0:1])
+        b1 = ord(magic[1:2])
+        word = (b0 << 8) + b1
+        return (b0 & 0x8F) == 0x08 and (word % 31) == 0
+
+    @classmethod
+    def _parse_file(cls, f):
+        map = f.read()
+        if cls._is_legacy_object(map):
+            obj = cls._parse_legacy_object_header(map, f)
+            obj._parse_legacy_object(map)
+        else:
+            obj = cls._parse_object_header(map, f)
+            obj._parse_object(map)
+        return obj
+
+    def __init__(self):
+        """Don't call this directly"""
+        self._sha = None
+        self._chunked_text = []
+        self._needs_serialization = True
+
+    def _deserialize(self, chunks):
+        raise NotImplementedError(self._deserialize)
+
+    def _serialize(self):
+        raise NotImplementedError(self._serialize)
+
+    @classmethod
+    def from_path(cls, path):
+        """Open a SHA file from disk."""
+        with GitFile(path, 'rb') as f:
+            return cls.from_file(f)
+
+    @classmethod
+    def from_file(cls, f):
+        """Get the contents of a SHA file on disk."""
+        try:
+            obj = cls._parse_file(f)
+            obj._sha = None
+            return obj
+        except (IndexError, ValueError):
+            raise ObjectFormatException("invalid object header")
+
+    @staticmethod
+    def from_raw_string(type_num, string, sha=None):
+        """Creates an object of the indicated type from the raw string given.
+
+        :param type_num: The numeric type of the object.
+        :param string: The raw uncompressed contents.
+        :param sha: Optional known sha for the object
+        """
+        obj = object_class(type_num)()
+        obj.set_raw_string(string, sha)
+        return obj
+
+    @staticmethod
+    def from_raw_chunks(type_num, chunks, sha=None):
+        """Creates an object of the indicated type from the raw chunks given.
+
+        :param type_num: The numeric type of the object.
+        :param chunks: An iterable of the raw uncompressed contents.
+        :param sha: Optional known sha for the object
+        """
+        obj = object_class(type_num)()
+        obj.set_raw_chunks(chunks, sha)
+        return obj
+
+    @classmethod
+    def from_string(cls, string):
+        """Create a ShaFile from a string."""
+        obj = cls()
+        obj.set_raw_string(string)
+        return obj
+
+    def _check_has_member(self, member, error_msg):
+        """Check that the object has a given member variable.
+
+        :param member: the member variable to check for
+        :param error_msg: the message for an error if the member is missing
+        :raise ObjectFormatException: with the given error_msg if member is
+            missing or is None
+        """
+        if getattr(self, member, None) is None:
+            raise ObjectFormatException(error_msg)
+
+    def check(self):
+        """Check this object for internal consistency.
+
+        :raise ObjectFormatException: if the object is malformed in some way
+        :raise ChecksumMismatch: if the object was created with a SHA that does
+            not match its contents
+        """
+        # TODO: if we find that error-checking during object parsing is a
+        # performance bottleneck, those checks should be moved to the class's
+        # check() method during optimization so we can still check the object
+        # when necessary.
+        old_sha = self.id
+        try:
+            self._deserialize(self.as_raw_chunks())
+            self._sha = None
+            new_sha = self.id
+        except Exception as e:
+            raise ObjectFormatException(e)
+        if old_sha != new_sha:
+            raise ChecksumMismatch(new_sha, old_sha)
+
+    def _header(self):
+        return object_header(self.type, self.raw_length())
+
+    def raw_length(self):
+        """Returns the length of the raw string of this object."""
+        ret = 0
+        for chunk in self.as_raw_chunks():
+            ret += len(chunk)
+        return ret
+
+    def sha(self):
+        """The SHA1 object that is the name of this object."""
+        if self._sha is None or self._needs_serialization:
+            # this is a local because as_raw_chunks() overwrites self._sha
+            new_sha = sha1()
+            new_sha.update(self._header())
+            for chunk in self.as_raw_chunks():
+                new_sha.update(chunk)
+            self._sha = new_sha
+        return self._sha
+
+    def copy(self):
+        """Create a new copy of this SHA1 object from its raw string"""
+        obj_class = object_class(self.get_type())
+        return obj_class.from_raw_string(
+            self.get_type(),
+            self.as_raw_string(),
+            self.id)
+
+    @property
+    def id(self):
+        """The hex SHA of this object."""
+        return self.sha().hexdigest().encode('ascii')
+
+    def get_type(self):
+        """Return the type number for this object class."""
+        return self.type_num
+
+    def set_type(self, type):
+        """Set the type number for this object class."""
+        self.type_num = type
+
+    # DEPRECATED: use type_num or type_name as needed.
+    type = property(get_type, set_type)
+
+    def __repr__(self):
+        return "<%s %s>" % (self.__class__.__name__, self.id)
+
+    def __ne__(self, other):
+        return not isinstance(other, ShaFile) or self.id != other.id
+
+    def __eq__(self, other):
+        """Return True if the SHAs of the two objects match.
+
+        It doesn't make sense to talk about an order on ShaFiles, so we don't
+        override the rich comparison methods (__le__, etc.).
+        """
+        return isinstance(other, ShaFile) and self.id == other.id
+
+    def __lt__(self, other):
+        if not isinstance(other, ShaFile):
+            raise TypeError
+        return self.id < other.id
+
+    def __le__(self, other):
+        if not isinstance(other, ShaFile):
+            raise TypeError
+        return self.id <= other.id
+
+    def __cmp__(self, other):
+        if not isinstance(other, ShaFile):
+            raise TypeError
+        return cmp(self.id, other.id)  # noqa: F821
+
+
+class Blob(ShaFile):
+    """A Git Blob object."""
+
+    __slots__ = ()
+
+    type_name = b'blob'
+    type_num = 3
+
+    def __init__(self):
+        super(Blob, self).__init__()
+        self._chunked_text = []
+        self._needs_serialization = False
+
+    def _get_data(self):
+        return self.as_raw_string()
+
+    def _set_data(self, data):
+        self.set_raw_string(data)
+
+    data = property(_get_data, _set_data,
+                    "The text contained within the blob object.")
+
+    def _get_chunked(self):
+        return self._chunked_text
+
+    def _set_chunked(self, chunks):
+        self._chunked_text = chunks
+
+    def _serialize(self):
+        return self._chunked_text
+
+    def _deserialize(self, chunks):
+        self._chunked_text = chunks
+
+    chunked = property(
+        _get_chunked, _set_chunked,
+        "The text within the blob object, as chunks (not necessarily lines).")
+
+    @classmethod
+    def from_path(cls, path):
+        blob = ShaFile.from_path(path)
+        if not isinstance(blob, cls):
+            raise NotBlobError(path)
+        return blob
+
+    def check(self):
+        """Check this object for internal consistency.
+
+        :raise ObjectFormatException: if the object is malformed in some way
+        """
+        super(Blob, self).check()
+
+    def splitlines(self):
+        """Return list of lines in this blob.
+
+        This preserves the original line endings.
+        """
+        chunks = self.chunked
+        if not chunks:
+            return []
+        if len(chunks) == 1:
+            return chunks[0].splitlines(True)
+        remaining = None
+        ret = []
+        for chunk in chunks:
+            lines = chunk.splitlines(True)
+            if len(lines) > 1:
+                ret.append((remaining or b"") + lines[0])
+                ret.extend(lines[1:-1])
+                remaining = lines[-1]
+            elif len(lines) == 1:
+                if remaining is None:
+                    remaining = lines.pop()
+                else:
+                    remaining += lines.pop()
+        if remaining is not None:
+            ret.append(remaining)
+        return ret
+
+
+def _parse_message(chunks):
+    """Parse a message with a list of fields and a body.
+
+    :param chunks: the raw chunks of the tag or commit object.
+    :return: iterator of tuples of (field, value), one per header line, in the
+        order read from the text, possibly including duplicates. Includes a
+        field named None for the freeform tag/commit text.
+    """
+    f = BytesIO(b''.join(chunks))
+    k = None
+    v = ""
+    eof = False
+
+    def _strip_last_newline(value):
+        """Strip the last newline from value"""
+        if value and value.endswith(b'\n'):
+            return value[:-1]
+        return value
+
+    # Parse the headers
+    #
+    # Headers can contain newlines. The next line is indented with a space.
+    # We store the latest key as 'k', and the accumulated value as 'v'.
+    for l in f:
+        if l.startswith(b' '):
+            # Indented continuation of the previous line
+            v += l[1:]
+        else:
+            if k is not None:
+                # We parsed a new header, return its value
+                yield (k, _strip_last_newline(v))
+            if l == b'\n':
+                # Empty line indicates end of headers
+                break
+            (k, v) = l.split(b' ', 1)
+
+    else:
+        # We reached end of file before the headers ended. We still need to
+        # return the previous header, then we need to return a None field for
+        # the text.
+        eof = True
+        if k is not None:
+            yield (k, _strip_last_newline(v))
+        yield (None, None)
+
+    if not eof:
+        # We didn't reach the end of file while parsing headers. We can return
+        # the rest of the file as a message.
+        yield (None, f.read())
+
+    f.close()
+
+
+class Tag(ShaFile):
+    """A Git Tag object."""
+
+    type_name = b'tag'
+    type_num = 4
+
+    __slots__ = ('_tag_timezone_neg_utc', '_name', '_object_sha',
+                 '_object_class', '_tag_time', '_tag_timezone',
+                 '_tagger', '_message')
+
+    def __init__(self):
+        super(Tag, self).__init__()
+        self._tagger = None
+        self._tag_time = None
+        self._tag_timezone = None
+        self._tag_timezone_neg_utc = False
+
+    @classmethod
+    def from_path(cls, filename):
+        tag = ShaFile.from_path(filename)
+        if not isinstance(tag, cls):
+            raise NotTagError(filename)
+        return tag
+
+    def check(self):
+        """Check this object for internal consistency.
+
+        :raise ObjectFormatException: if the object is malformed in some way
+        """
+        super(Tag, self).check()
+        self._check_has_member("_object_sha", "missing object sha")
+        self._check_has_member("_object_class", "missing object type")
+        self._check_has_member("_name", "missing tag name")
+
+        if not self._name:
+            raise ObjectFormatException("empty tag name")
+
+        check_hexsha(self._object_sha, "invalid object sha")
+
+        if getattr(self, "_tagger", None):
+            check_identity(self._tagger, "invalid tagger")
+
+        last = None
+        for field, _ in _parse_message(self._chunked_text):
+            if field == _OBJECT_HEADER and last is not None:
+                raise ObjectFormatException("unexpected object")
+            elif field == _TYPE_HEADER and last != _OBJECT_HEADER:
+                raise ObjectFormatException("unexpected type")
+            elif field == _TAG_HEADER and last != _TYPE_HEADER:
+                raise ObjectFormatException("unexpected tag name")
+            elif field == _TAGGER_HEADER and last != _TAG_HEADER:
+                raise ObjectFormatException("unexpected tagger")
+            last = field
+
+    def _serialize(self):
+        chunks = []
+        chunks.append(git_line(_OBJECT_HEADER, self._object_sha))
+        chunks.append(git_line(_TYPE_HEADER, self._object_class.type_name))
+        chunks.append(git_line(_TAG_HEADER, self._name))
+        if self._tagger:
+            if self._tag_time is None:
+                chunks.append(git_line(_TAGGER_HEADER, self._tagger))
+            else:
+                chunks.append(git_line(
+                    _TAGGER_HEADER, self._tagger,
+                    str(self._tag_time).encode('ascii'),
+                    format_timezone(
+                        self._tag_timezone, self._tag_timezone_neg_utc)))
+        if self._message is not None:
+            chunks.append(b'\n')  # To close headers
+            chunks.append(self._message)
+        return chunks
+
+    def _deserialize(self, chunks):
+        """Grab the metadata attached to the tag"""
+        self._tagger = None
+        self._tag_time = None
+        self._tag_timezone = None
+        self._tag_timezone_neg_utc = False
+        for field, value in _parse_message(chunks):
+            if field == _OBJECT_HEADER:
+                self._object_sha = value
+            elif field == _TYPE_HEADER:
+                obj_class = object_class(value)
+                if not obj_class:
+                    raise ObjectFormatException("Not a known type: %s" % value)
+                self._object_class = obj_class
+            elif field == _TAG_HEADER:
+                self._name = value
+            elif field == _TAGGER_HEADER:
+                try:
+                    sep = value.index(b'> ')
+                except ValueError:
+                    self._tagger = value
+                    self._tag_time = None
+                    self._tag_timezone = None
+                    self._tag_timezone_neg_utc = False
+                else:
+                    self._tagger = value[0:sep+1]
+                    try:
+                        (timetext, timezonetext) = (
+                                value[sep+2:].rsplit(b' ', 1))
+                        self._tag_time = int(timetext)
+                        self._tag_timezone, self._tag_timezone_neg_utc = (
+                                parse_timezone(timezonetext))
+                    except ValueError as e:
+                        raise ObjectFormatException(e)
+            elif field is None:
+                self._message = value
+            else:
+                raise ObjectFormatException("Unknown field %s" % field)
+
+    def _get_object(self):
+        """Get the object pointed to by this tag.
+
+        :return: tuple of (object class, sha).
+        """
+        return (self._object_class, self._object_sha)
+
+    def _set_object(self, value):
+        (self._object_class, self._object_sha) = value
+        self._needs_serialization = True
+
+    object = property(_get_object, _set_object)
+
+    name = serializable_property("name", "The name of this tag")
+    tagger = serializable_property(
+            "tagger",
+            "Returns the name of the person who created this tag")
+    tag_time = serializable_property(
+            "tag_time",
+            "The creation timestamp of the tag.  As the number of seconds "
+            "since the epoch")
+    tag_timezone = serializable_property(
+            "tag_timezone",
+            "The timezone that tag_time is in.")
+    message = serializable_property(
+            "message", "The message attached to this tag")
+
+
+class TreeEntry(namedtuple('TreeEntry', ['path', 'mode', 'sha'])):
+    """Named tuple encapsulating a single tree entry."""
+
+    def in_path(self, path):
+        """Return a copy of this entry with the given path prepended."""
+        if not isinstance(self.path, bytes):
+            raise TypeError('Expected bytes for path, got %r' % path)
+        return TreeEntry(posixpath.join(path, self.path), self.mode, self.sha)
+
+
+def parse_tree(text, strict=False):
+    """Parse a tree text.
+
+    :param text: Serialized text to parse
+    :return: iterator of tuples of (name, mode, sha)
+    :raise ObjectFormatException: if the object was malformed in some way
+    """
+    count = 0
+    l = len(text)
+    while count < l:
+        mode_end = text.index(b' ', count)
+        mode_text = text[count:mode_end]
+        if strict and mode_text.startswith(b'0'):
+            raise ObjectFormatException("Invalid mode '%s'" % mode_text)
+        try:
+            mode = int(mode_text, 8)
+        except ValueError:
+            raise ObjectFormatException("Invalid mode '%s'" % mode_text)
+        name_end = text.index(b'\0', mode_end)
+        name = text[mode_end+1:name_end]
+        count = name_end+21
+        sha = text[name_end+1:count]
+        if len(sha) != 20:
+            raise ObjectFormatException("Sha has invalid length")
+        hexsha = sha_to_hex(sha)
+        yield (name, mode, hexsha)
+
+
+def serialize_tree(items):
+    """Serialize the items in a tree to a text.
+
+    :param items: Sorted iterable over (name, mode, sha) tuples
+    :return: Serialized tree text as chunks
+    """
+    for name, mode, hexsha in items:
+        yield (("%04o" % mode).encode('ascii') + b' ' + name +
+               b'\0' + hex_to_sha(hexsha))
+
+
+def sorted_tree_items(entries, name_order):
+    """Iterate over a tree entries dictionary.
+
+    :param name_order: If True, iterate entries in order of their name. If
+        False, iterate entries in tree order, that is, treat subtree entries as
+        having '/' appended.
+    :param entries: Dictionary mapping names to (mode, sha) tuples
+    :return: Iterator over (name, mode, hexsha)
+    """
+    key_func = name_order and key_entry_name_order or key_entry
+    for name, entry in sorted(entries.items(), key=key_func):
+        mode, hexsha = entry
+        # Stricter type checks than normal to mirror checks in the C version.
+        mode = int(mode)
+        if not isinstance(hexsha, bytes):
+            raise TypeError('Expected bytes for SHA, got %r' % hexsha)
+        yield TreeEntry(name, mode, hexsha)
+
+
+def key_entry(entry):
+    """Sort key for tree entry.
+
+    :param entry: (name, value) tuplee
+    """
+    (name, value) = entry
+    if stat.S_ISDIR(value[0]):
+        name += b'/'
+    return name
+
+
+def key_entry_name_order(entry):
+    """Sort key for tree entry in name order."""
+    return entry[0]
+
+
+def pretty_format_tree_entry(name, mode, hexsha, encoding="utf-8"):
+    """Pretty format tree entry.
+
+    :param name: Name of the directory entry
+    :param mode: Mode of entry
+    :param hexsha: Hexsha of the referenced object
+    :return: string describing the tree entry
+    """
+    if mode & stat.S_IFDIR:
+        kind = "tree"
+    else:
+        kind = "blob"
+    return "%04o %s %s\t%s\n" % (
+            mode, kind, hexsha.decode('ascii'),
+            name.decode(encoding, 'replace'))
+
+
+class Tree(ShaFile):
+    """A Git tree object"""
+
+    type_name = b'tree'
+    type_num = 2
+
+    __slots__ = ('_entries')
+
+    def __init__(self):
+        super(Tree, self).__init__()
+        self._entries = {}
+
+    @classmethod
+    def from_path(cls, filename):
+        tree = ShaFile.from_path(filename)
+        if not isinstance(tree, cls):
+            raise NotTreeError(filename)
+        return tree
+
+    def __contains__(self, name):
+        return name in self._entries
+
+    def __getitem__(self, name):
+        return self._entries[name]
+
+    def __setitem__(self, name, value):
+        """Set a tree entry by name.
+
+        :param name: The name of the entry, as a string.
+        :param value: A tuple of (mode, hexsha), where mode is the mode of the
+            entry as an integral type and hexsha is the hex SHA of the entry as
+            a string.
+        """
+        mode, hexsha = value
+        self._entries[name] = (mode, hexsha)
+        self._needs_serialization = True
+
+    def __delitem__(self, name):
+        del self._entries[name]
+        self._needs_serialization = True
+
+    def __len__(self):
+        return len(self._entries)
+
+    def __iter__(self):
+        return iter(self._entries)
+
+    def add(self, name, mode, hexsha):
+        """Add an entry to the tree.
+
+        :param mode: The mode of the entry as an integral type. Not all
+            possible modes are supported by git; see check() for details.
+        :param name: The name of the entry, as a string.
+        :param hexsha: The hex SHA of the entry as a string.
+        """
+        if isinstance(name, int) and isinstance(mode, bytes):
+            (name, mode) = (mode, name)
+            warnings.warn(
+                "Please use Tree.add(name, mode, hexsha)",
+                category=DeprecationWarning, stacklevel=2)
+        self._entries[name] = mode, hexsha
+        self._needs_serialization = True
+
+    def iteritems(self, name_order=False):
+        """Iterate over entries.
+
+        :param name_order: If True, iterate in name order instead of tree
+            order.
+        :return: Iterator over (name, mode, sha) tuples
+        """
+        return sorted_tree_items(self._entries, name_order)
+
+    def items(self):
+        """Return the sorted entries in this tree.
+
+        :return: List with (name, mode, sha) tuples
+        """
+        return list(self.iteritems())
+
+    def _deserialize(self, chunks):
+        """Grab the entries in the tree"""
+        try:
+            parsed_entries = parse_tree(b''.join(chunks))
+        except ValueError as e:
+            raise ObjectFormatException(e)
+        # TODO: list comprehension is for efficiency in the common (small)
+        # case; if memory efficiency in the large case is a concern, use a
+        # genexp.
+        self._entries = dict([(n, (m, s)) for n, m, s in parsed_entries])
+
+    def check(self):
+        """Check this object for internal consistency.
+
+        :raise ObjectFormatException: if the object is malformed in some way
+        """
+        super(Tree, self).check()
+        last = None
+        allowed_modes = (stat.S_IFREG | 0o755, stat.S_IFREG | 0o644,
+                         stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
+                         # TODO: optionally exclude as in git fsck --strict
+                         stat.S_IFREG | 0o664)
+        for name, mode, sha in parse_tree(b''.join(self._chunked_text),
+                                          True):
+            check_hexsha(sha, 'invalid sha %s' % sha)
+            if b'/' in name or name in (b'', b'.', b'..'):
+                raise ObjectFormatException('invalid name %s' % name)
+
+            if mode not in allowed_modes:
+                raise ObjectFormatException('invalid mode %06o' % mode)
+
+            entry = (name, (mode, sha))
+            if last:
+                if key_entry(last) > key_entry(entry):
+                    raise ObjectFormatException('entries not sorted')
+                if name == last[0]:
+                    raise ObjectFormatException('duplicate entry %s' % name)
+            last = entry
+
+    def _serialize(self):
+        return list(serialize_tree(self.iteritems()))
+
+    def as_pretty_string(self):
+        text = []
+        for name, mode, hexsha in self.iteritems():
+            text.append(pretty_format_tree_entry(name, mode, hexsha))
+        return "".join(text)
+
+    def lookup_path(self, lookup_obj, path):
+        """Look up an object in a Git tree.
+
+        :param lookup_obj: Callback for retrieving object by SHA1
+        :param path: Path to lookup
+        :return: A tuple of (mode, SHA) of the resulting path.
+        """
+        parts = path.split(b'/')
+        sha = self.id
+        mode = None
+        for p in parts:
+            if not p:
+                continue
+            obj = lookup_obj(sha)
+            if not isinstance(obj, Tree):
+                raise NotTreeError(sha)
+            mode, sha = obj[p]
+        return mode, sha
+
+
+def parse_timezone(text):
+    """Parse a timezone text fragment (e.g. '+0100').
+
+    :param text: Text to parse.
+    :return: Tuple with timezone as seconds difference to UTC
+        and a boolean indicating whether this was a UTC timezone
+        prefixed with a negative sign (-0000).
+    """
+    # cgit parses the first character as the sign, and the rest
+    #  as an integer (using strtol), which could also be negative.
+    #  We do the same for compatibility. See #697828.
+    if not text[0] in b'+-':
+        raise ValueError("Timezone must start with + or - (%(text)s)" % vars())
+    sign = text[:1]
+    offset = int(text[1:])
+    if sign == b'-':
+        offset = -offset
+    unnecessary_negative_timezone = (offset >= 0 and sign == b'-')
+    signum = (offset < 0) and -1 or 1
+    offset = abs(offset)
+    hours = int(offset / 100)
+    minutes = (offset % 100)
+    return (signum * (hours * 3600 + minutes * 60),
+            unnecessary_negative_timezone)
+
+
+def format_timezone(offset, unnecessary_negative_timezone=False):
+    """Format a timezone for Git serialization.
+
+    :param offset: Timezone offset as seconds difference to UTC
+    :param unnecessary_negative_timezone: Whether to use a minus sign for
+        UTC or positive timezones (-0000 and --700 rather than +0000 / +0700).
+    """
+    if offset % 60 != 0:
+        raise ValueError("Unable to handle non-minute offset.")
+    if offset < 0 or unnecessary_negative_timezone:
+        sign = '-'
+        offset = -offset
+    else:
+        sign = '+'
+    return ('%c%02d%02d' %
+            (sign, offset / 3600, (offset / 60) % 60)).encode('ascii')
+
+
+def parse_commit(chunks):
+    """Parse a commit object from chunks.
+
+    :param chunks: Chunks to parse
+    :return: Tuple of (tree, parents, author_info, commit_info,
+        encoding, mergetag, gpgsig, message, extra)
+    """
+    parents = []
+    extra = []
+    tree = None
+    author_info = (None, None, (None, None))
+    commit_info = (None, None, (None, None))
+    encoding = None
+    mergetag = []
+    message = None
+    gpgsig = None
+
+    for field, value in _parse_message(chunks):
+        # TODO(jelmer): Enforce ordering
+        if field == _TREE_HEADER:
+            tree = value
+        elif field == _PARENT_HEADER:
+            parents.append(value)
+        elif field == _AUTHOR_HEADER:
+            author, timetext, timezonetext = value.rsplit(b' ', 2)
+            author_time = int(timetext)
+            author_info = (author, author_time, parse_timezone(timezonetext))
+        elif field == _COMMITTER_HEADER:
+            committer, timetext, timezonetext = value.rsplit(b' ', 2)
+            commit_time = int(timetext)
+            commit_info = (
+                    committer, commit_time, parse_timezone(timezonetext))
+        elif field == _ENCODING_HEADER:
+            encoding = value
+        elif field == _MERGETAG_HEADER:
+            mergetag.append(Tag.from_string(value + b'\n'))
+        elif field == _GPGSIG_HEADER:
+            gpgsig = value
+        elif field is None:
+            message = value
+        else:
+            extra.append((field, value))
+    return (tree, parents, author_info, commit_info, encoding, mergetag,
+            gpgsig, message, extra)
+
+
+class Commit(ShaFile):
+    """A git commit object"""
+
+    type_name = b'commit'
+    type_num = 1
+
+    __slots__ = ('_parents', '_encoding', '_extra', '_author_timezone_neg_utc',
+                 '_commit_timezone_neg_utc', '_commit_time',
+                 '_author_time', '_author_timezone', '_commit_timezone',
+                 '_author', '_committer', '_tree', '_message',
+                 '_mergetag', '_gpgsig')
+
+    def __init__(self):
+        super(Commit, self).__init__()
+        self._parents = []
+        self._encoding = None
+        self._mergetag = []
+        self._gpgsig = None
+        self._extra = []
+        self._author_timezone_neg_utc = False
+        self._commit_timezone_neg_utc = False
+
+    @classmethod
+    def from_path(cls, path):
+        commit = ShaFile.from_path(path)
+        if not isinstance(commit, cls):
+            raise NotCommitError(path)
+        return commit
+
+    def _deserialize(self, chunks):
+        (self._tree, self._parents, author_info, commit_info, self._encoding,
+         self._mergetag, self._gpgsig, self._message, self._extra) = (
+                        parse_commit(chunks))
+        (self._author, self._author_time,
+         (self._author_timezone, self._author_timezone_neg_utc)) = author_info
+        (self._committer, self._commit_time,
+         (self._commit_timezone, self._commit_timezone_neg_utc)) = commit_info
+
+    def check(self):
+        """Check this object for internal consistency.
+
+        :raise ObjectFormatException: if the object is malformed in some way
+        """
+        super(Commit, self).check()
+        self._check_has_member("_tree", "missing tree")
+        self._check_has_member("_author", "missing author")
+        self._check_has_member("_committer", "missing committer")
+        # times are currently checked when set
+
+        for parent in self._parents:
+            check_hexsha(parent, "invalid parent sha")
+        check_hexsha(self._tree, "invalid tree sha")
+
+        check_identity(self._author, "invalid author")
+        check_identity(self._committer, "invalid committer")
+
+        last = None
+        for field, _ in _parse_message(self._chunked_text):
+            if field == _TREE_HEADER and last is not None:
+                raise ObjectFormatException("unexpected tree")
+            elif field == _PARENT_HEADER and last not in (_PARENT_HEADER,
+                                                          _TREE_HEADER):
+                raise ObjectFormatException("unexpected parent")
+            elif field == _AUTHOR_HEADER and last not in (_TREE_HEADER,
+                                                          _PARENT_HEADER):
+                raise ObjectFormatException("unexpected author")
+            elif field == _COMMITTER_HEADER and last != _AUTHOR_HEADER:
+                raise ObjectFormatException("unexpected committer")
+            elif field == _ENCODING_HEADER and last != _COMMITTER_HEADER:
+                raise ObjectFormatException("unexpected encoding")
+            last = field
+
+        # TODO: optionally check for duplicate parents
+
+    def _serialize(self):
+        chunks = []
+        tree_bytes = (
+                self._tree.id if isinstance(self._tree, Tree) else self._tree)
+        chunks.append(git_line(_TREE_HEADER, tree_bytes))
+        for p in self._parents:
+            chunks.append(git_line(_PARENT_HEADER, p))
+        chunks.append(git_line(
+            _AUTHOR_HEADER, self._author,
+            str(self._author_time).encode('ascii'),
+            format_timezone(
+                    self._author_timezone, self._author_timezone_neg_utc)))
+        chunks.append(git_line(
+            _COMMITTER_HEADER, self._committer,
+            str(self._commit_time).encode('ascii'),
+            format_timezone(self._commit_timezone,
+                            self._commit_timezone_neg_utc)))
+        if self.encoding:
+            chunks.append(git_line(_ENCODING_HEADER, self.encoding))
+        for mergetag in self.mergetag:
+            mergetag_chunks = mergetag.as_raw_string().split(b'\n')
+
+            chunks.append(git_line(_MERGETAG_HEADER, mergetag_chunks[0]))
+            # Embedded extra header needs leading space
+            for chunk in mergetag_chunks[1:]:
+                chunks.append(b' ' + chunk + b'\n')
+
+            # No trailing empty line
+            if chunks[-1].endswith(b' \n'):
+                chunks[-1] = chunks[-1][:-2]
+        for k, v in self.extra:
+            if b'\n' in k or b'\n' in v:
+                raise AssertionError(
+                    "newline in extra data: %r -> %r" % (k, v))
+            chunks.append(git_line(k, v))
+        if self.gpgsig:
+            sig_chunks = self.gpgsig.split(b'\n')
+            chunks.append(git_line(_GPGSIG_HEADER, sig_chunks[0]))
+            for chunk in sig_chunks[1:]:
+                chunks.append(git_line(b'',  chunk))
+        chunks.append(b'\n')  # There must be a new line after the headers
+        chunks.append(self._message)
+        return chunks
+
+    tree = serializable_property(
+        "tree", "Tree that is the state of this commit")
+
+    def _get_parents(self):
+        """Return a list of parents of this commit."""
+        return self._parents
+
+    def _set_parents(self, value):
+        """Set a list of parents of this commit."""
+        self._needs_serialization = True
+        self._parents = value
+
+    parents = property(_get_parents, _set_parents,
+                       doc="Parents of this commit, by their SHA1.")
+
+    def _get_extra(self):
+        """Return extra settings of this commit."""
+        return self._extra
+
+    extra = property(
+        _get_extra,
+        doc="Extra header fields not understood (presumably added in a "
+            "newer version of git). Kept verbatim so the object can "
+            "be correctly reserialized. For private commit metadata, use "
+            "pseudo-headers in Commit.message, rather than this field.")
+
+    author = serializable_property(
+        "author",
+        "The name of the author of the commit")
+
+    committer = serializable_property(
+        "committer",
+        "The name of the committer of the commit")
+
+    message = serializable_property(
+        "message", "The commit message")
+
+    commit_time = serializable_property(
+        "commit_time",
+        "The timestamp of the commit. As the number of seconds since the "
+        "epoch.")
+
+    commit_timezone = serializable_property(
+        "commit_timezone",
+        "The zone the commit time is in")
+
+    author_time = serializable_property(
+        "author_time",
+        "The timestamp the commit was written. As the number of "
+        "seconds since the epoch.")
+
+    author_timezone = serializable_property(
+        "author_timezone", "Returns the zone the author time is in.")
+
+    encoding = serializable_property(
+        "encoding", "Encoding of the commit message.")
+
+    mergetag = serializable_property(
+        "mergetag", "Associated signed tag.")
+
+    gpgsig = serializable_property(
+        "gpgsig", "GPG Signature.")
+
+
+OBJECT_CLASSES = (
+    Commit,
+    Tree,
+    Blob,
+    Tag,
+    )
+
+_TYPE_MAP = {}
+
+for cls in OBJECT_CLASSES:
+    _TYPE_MAP[cls.type_name] = cls
+    _TYPE_MAP[cls.type_num] = cls
+
+
+# Hold on to the pure-python implementations for testing
+_parse_tree_py = parse_tree
+_sorted_tree_items_py = sorted_tree_items
+try:
+    # Try to import C versions
+    from dulwich._objects import parse_tree, sorted_tree_items
+except ImportError:
+    pass

+ 178 - 0
dulwich/objectspec.py

@@ -0,0 +1,178 @@
+# objectspec.py -- Object specification
+# Copyright (C) 2014 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Object specification."""
+
+
+def to_bytes(text):
+    if getattr(text, "encode", None) is not None:
+        text = text.encode('ascii')
+    return text
+
+
+def parse_object(repo, objectish):
+    """Parse a string referring to an object.
+
+    :param repo: A `Repo` object
+    :param objectish: A string referring to an object
+    :return: A git object
+    :raise KeyError: If the object can not be found
+    """
+    objectish = to_bytes(objectish)
+    return repo[objectish]
+
+
+def parse_tree(repo, treeish):
+    """Parse a string referring to a tree.
+
+    :param repo: A `Repo` object
+    :param treeish: A string referring to a tree
+    :return: A git object
+    :raise KeyError: If the object can not be found
+    """
+    treeish = to_bytes(treeish)
+    o = repo[treeish]
+    if o.type_name == b"commit":
+        return repo[o.tree]
+    return o
+
+
+def parse_ref(container, refspec):
+    """Parse a string referring to a reference.
+
+    :param container: A RefsContainer object
+    :param refspec: A string referring to a ref
+    :return: A ref
+    :raise KeyError: If the ref can not be found
+    """
+    refspec = to_bytes(refspec)
+    possible_refs = [
+        refspec,
+        b"refs/" + refspec,
+        b"refs/tags/" + refspec,
+        b"refs/heads/" + refspec,
+        b"refs/remotes/" + refspec,
+        b"refs/remotes/" + refspec + b"/HEAD"
+    ]
+    for ref in possible_refs:
+        if ref in container:
+            return ref
+    else:
+        raise KeyError(refspec)
+
+
+def parse_reftuple(lh_container, rh_container, refspec):
+    """Parse a reftuple spec.
+
+    :param lh_container: A RefsContainer object
+    :param hh_container: A RefsContainer object
+    :param refspec: A string
+    :return: A tuple with left and right ref
+    :raise KeyError: If one of the refs can not be found
+    """
+    if refspec.startswith(b"+"):
+        force = True
+        refspec = refspec[1:]
+    else:
+        force = False
+    refspec = to_bytes(refspec)
+    if b":" in refspec:
+        (lh, rh) = refspec.split(b":")
+    else:
+        lh = rh = refspec
+    if lh == b"":
+        lh = None
+    else:
+        lh = parse_ref(lh_container, lh)
+    if rh == b"":
+        rh = None
+    else:
+        try:
+            rh = parse_ref(rh_container, rh)
+        except KeyError:
+            # TODO: check force?
+            if b"/" not in rh:
+                rh = b"refs/heads/" + rh
+    return (lh, rh, force)
+
+
+def parse_reftuples(lh_container, rh_container, refspecs):
+    """Parse a list of reftuple specs to a list of reftuples.
+
+    :param lh_container: A RefsContainer object
+    :param hh_container: A RefsContainer object
+    :param refspecs: A list of refspecs or a string
+    :return: A list of refs
+    :raise KeyError: If one of the refs can not be found
+    """
+    if not isinstance(refspecs, list):
+        refspecs = [refspecs]
+    ret = []
+    # TODO: Support * in refspecs
+    for refspec in refspecs:
+        ret.append(parse_reftuple(lh_container, rh_container, refspec))
+    return ret
+
+
+def parse_refs(container, refspecs):
+    """Parse a list of refspecs to a list of refs.
+
+    :param container: A RefsContainer object
+    :param refspecs: A list of refspecs or a string
+    :return: A list of refs
+    :raise KeyError: If one of the refs can not be found
+    """
+    # TODO: Support * in refspecs
+    if not isinstance(refspecs, list):
+        refspecs = [refspecs]
+    ret = []
+    for refspec in refspecs:
+        ret.append(parse_ref(container, refspec))
+    return ret
+
+
+def parse_commit_range(repo, committishs):
+    """Parse a string referring to a range of commits.
+
+    :param repo: A `Repo` object
+    :param committishs: A string referring to a range of commits.
+    :return: An iterator over `Commit` objects
+    :raise KeyError: When the reference commits can not be found
+    :raise ValueError: If the range can not be parsed
+    """
+    committishs = to_bytes(committishs)
+    # TODO(jelmer): Support more than a single commit..
+    return iter([parse_commit(repo, committishs)])
+
+
+def parse_commit(repo, committish):
+    """Parse a string referring to a single commit.
+
+    :param repo: A` Repo` object
+    :param commitish: A string referring to a single commit.
+    :return: A Commit object
+    :raise KeyError: When the reference commits can not be found
+    :raise ValueError: If the range can not be parsed
+    """
+    committish = to_bytes(committish)
+    return repo[committish]  # For now..
+
+
+# TODO: parse_path_in_tree(), which handles e.g. v1.0:Documentation

+ 1967 - 0
dulwich/pack.py

@@ -0,0 +1,1967 @@
+# pack.py -- For dealing with packed git objects.
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Classes for dealing with packed git objects.
+
+A pack is a compact representation of a bunch of objects, stored
+using deltas where possible.
+
+They have two parts, the pack file, which stores the data, and an index
+that tells you where the data is.
+
+To find an object you look in all of the index files 'til you find a
+match for the object name. You then use the pointer got from this as
+a pointer in to the corresponding packfile.
+"""
+
+from collections import defaultdict
+
+import binascii
+from io import BytesIO, UnsupportedOperation
+from collections import (
+    deque,
+    )
+import difflib
+import struct
+
+from itertools import chain
+try:
+    from itertools import imap, izip
+except ImportError:
+    # Python3
+    imap = map
+    izip = zip
+
+import os
+import sys
+
+from hashlib import sha1
+from os import (
+    SEEK_CUR,
+    SEEK_END,
+    )
+from struct import unpack_from
+import zlib
+
+try:
+    import mmap
+except ImportError:
+    has_mmap = False
+else:
+    has_mmap = True
+
+# For some reason the above try, except fails to set has_mmap = False for plan9
+if sys.platform == 'Plan9':
+    has_mmap = False
+
+from dulwich.errors import (  # noqa: E402
+    ApplyDeltaError,
+    ChecksumMismatch,
+    )
+from dulwich.file import GitFile  # noqa: E402
+from dulwich.lru_cache import (  # noqa: E402
+    LRUSizeCache,
+    )
+from dulwich.objects import (  # noqa: E402
+    ShaFile,
+    hex_to_sha,
+    sha_to_hex,
+    object_header,
+    )
+
+
+OFS_DELTA = 6
+REF_DELTA = 7
+
+DELTA_TYPES = (OFS_DELTA, REF_DELTA)
+
+
+DEFAULT_PACK_DELTA_WINDOW_SIZE = 10
+
+
+def take_msb_bytes(read, crc32=None):
+    """Read bytes marked with most significant bit.
+
+    :param read: Read function
+    """
+    ret = []
+    while len(ret) == 0 or ret[-1] & 0x80:
+        b = read(1)
+        if crc32 is not None:
+            crc32 = binascii.crc32(b, crc32)
+        ret.append(ord(b[:1]))
+    return ret, crc32
+
+
+class UnpackedObject(object):
+    """Class encapsulating an object unpacked from a pack file.
+
+    These objects should only be created from within unpack_object. Most
+    members start out as empty and are filled in at various points by
+    read_zlib_chunks, unpack_object, DeltaChainIterator, etc.
+
+    End users of this object should take care that the function they're getting
+    this object from is guaranteed to set the members they need.
+    """
+
+    __slots__ = [
+      'offset',         # Offset in its pack.
+      '_sha',           # Cached binary SHA.
+      'obj_type_num',   # Type of this object.
+      'obj_chunks',     # Decompressed and delta-resolved chunks.
+      'pack_type_num',  # Type of this object in the pack (may be a delta).
+      'delta_base',     # Delta base offset or SHA.
+      'comp_chunks',    # Compressed object chunks.
+      'decomp_chunks',  # Decompressed object chunks.
+      'decomp_len',     # Decompressed length of this object.
+      'crc32',          # CRC32.
+      ]
+
+    # TODO(dborowitz): read_zlib_chunks and unpack_object could very well be
+    # methods of this object.
+    def __init__(self, pack_type_num, delta_base, decomp_len, crc32):
+        self.offset = None
+        self._sha = None
+        self.pack_type_num = pack_type_num
+        self.delta_base = delta_base
+        self.comp_chunks = None
+        self.decomp_chunks = []
+        self.decomp_len = decomp_len
+        self.crc32 = crc32
+
+        if pack_type_num in DELTA_TYPES:
+            self.obj_type_num = None
+            self.obj_chunks = None
+        else:
+            self.obj_type_num = pack_type_num
+            self.obj_chunks = self.decomp_chunks
+            self.delta_base = delta_base
+
+    def sha(self):
+        """Return the binary SHA of this object."""
+        if self._sha is None:
+            self._sha = obj_sha(self.obj_type_num, self.obj_chunks)
+        return self._sha
+
+    def sha_file(self):
+        """Return a ShaFile from this object."""
+        return ShaFile.from_raw_chunks(self.obj_type_num, self.obj_chunks)
+
+    # Only provided for backwards compatibility with code that expects either
+    # chunks or a delta tuple.
+    def _obj(self):
+        """Return the decompressed chunks, or (delta base, delta chunks)."""
+        if self.pack_type_num in DELTA_TYPES:
+            return (self.delta_base, self.decomp_chunks)
+        else:
+            return self.decomp_chunks
+
+    def __eq__(self, other):
+        if not isinstance(other, UnpackedObject):
+            return False
+        for slot in self.__slots__:
+            if getattr(self, slot) != getattr(other, slot):
+                return False
+        return True
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __repr__(self):
+        data = ['%s=%r' % (s, getattr(self, s)) for s in self.__slots__]
+        return '%s(%s)' % (self.__class__.__name__, ', '.join(data))
+
+
+_ZLIB_BUFSIZE = 4096
+
+
+def read_zlib_chunks(read_some, unpacked, include_comp=False,
+                     buffer_size=_ZLIB_BUFSIZE):
+    """Read zlib data from a buffer.
+
+    This function requires that the buffer have additional data following the
+    compressed data, which is guaranteed to be the case for git pack files.
+
+    :param read_some: Read function that returns at least one byte, but may
+        return less than the requested size.
+    :param unpacked: An UnpackedObject to write result data to. If its crc32
+        attr is not None, the CRC32 of the compressed bytes will be computed
+        using this starting CRC32.
+        After this function, will have the following attrs set:
+        * comp_chunks    (if include_comp is True)
+        * decomp_chunks
+        * decomp_len
+        * crc32
+    :param include_comp: If True, include compressed data in the result.
+    :param buffer_size: Size of the read buffer.
+    :return: Leftover unused data from the decompression.
+    :raise zlib.error: if a decompression error occurred.
+    """
+    if unpacked.decomp_len <= -1:
+        raise ValueError('non-negative zlib data stream size expected')
+    decomp_obj = zlib.decompressobj()
+
+    comp_chunks = []
+    decomp_chunks = unpacked.decomp_chunks
+    decomp_len = 0
+    crc32 = unpacked.crc32
+
+    while True:
+        add = read_some(buffer_size)
+        if not add:
+            raise zlib.error('EOF before end of zlib stream')
+        comp_chunks.append(add)
+        decomp = decomp_obj.decompress(add)
+        decomp_len += len(decomp)
+        decomp_chunks.append(decomp)
+        unused = decomp_obj.unused_data
+        if unused:
+            left = len(unused)
+            if crc32 is not None:
+                crc32 = binascii.crc32(add[:-left], crc32)
+            if include_comp:
+                comp_chunks[-1] = add[:-left]
+            break
+        elif crc32 is not None:
+            crc32 = binascii.crc32(add, crc32)
+    if crc32 is not None:
+        crc32 &= 0xffffffff
+
+    if decomp_len != unpacked.decomp_len:
+        raise zlib.error('decompressed data does not match expected size')
+
+    unpacked.crc32 = crc32
+    if include_comp:
+        unpacked.comp_chunks = comp_chunks
+    return unused
+
+
+def iter_sha1(iter):
+    """Return the hexdigest of the SHA1 over a set of names.
+
+    :param iter: Iterator over string objects
+    :return: 40-byte hex sha1 digest
+    """
+    sha = sha1()
+    for name in iter:
+        sha.update(name)
+    return sha.hexdigest().encode('ascii')
+
+
+def load_pack_index(path):
+    """Load an index file by path.
+
+    :param filename: Path to the index file
+    :return: A PackIndex loaded from the given path
+    """
+    with GitFile(path, 'rb') as f:
+        return load_pack_index_file(path, f)
+
+
+def _load_file_contents(f, size=None):
+    try:
+        fd = f.fileno()
+    except (UnsupportedOperation, AttributeError):
+        fd = None
+    # Attempt to use mmap if possible
+    if fd is not None:
+        if size is None:
+            size = os.fstat(fd).st_size
+        if has_mmap:
+            try:
+                contents = mmap.mmap(fd, size, access=mmap.ACCESS_READ)
+            except mmap.error:
+                # Perhaps a socket?
+                pass
+            else:
+                return contents, size
+    contents = f.read()
+    size = len(contents)
+    return contents, size
+
+
+def load_pack_index_file(path, f):
+    """Load an index file from a file-like object.
+
+    :param path: Path for the index file
+    :param f: File-like object
+    :return: A PackIndex loaded from the given file
+    """
+    contents, size = _load_file_contents(f)
+    if contents[:4] == b'\377tOc':
+        version = struct.unpack(b'>L', contents[4:8])[0]
+        if version == 2:
+            return PackIndex2(
+                path, file=f, contents=contents, size=size)
+        else:
+            raise KeyError('Unknown pack index format %d' % version)
+    else:
+        return PackIndex1(path, file=f, contents=contents, size=size)
+
+
+def bisect_find_sha(start, end, sha, unpack_name):
+    """Find a SHA in a data blob with sorted SHAs.
+
+    :param start: Start index of range to search
+    :param end: End index of range to search
+    :param sha: Sha to find
+    :param unpack_name: Callback to retrieve SHA by index
+    :return: Index of the SHA, or None if it wasn't found
+    """
+    assert start <= end
+    while start <= end:
+        i = (start + end) // 2
+        file_sha = unpack_name(i)
+        if file_sha < sha:
+            start = i + 1
+        elif file_sha > sha:
+            end = i - 1
+        else:
+            return i
+    return None
+
+
+class PackIndex(object):
+    """An index in to a packfile.
+
+    Given a sha id of an object a pack index can tell you the location in the
+    packfile of that object if it has it.
+    """
+
+    def __eq__(self, other):
+        if not isinstance(other, PackIndex):
+            return False
+
+        for (name1, _, _), (name2, _, _) in izip(self.iterentries(),
+                                                 other.iterentries()):
+            if name1 != name2:
+                return False
+        return True
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def __len__(self):
+        """Return the number of entries in this pack index."""
+        raise NotImplementedError(self.__len__)
+
+    def __iter__(self):
+        """Iterate over the SHAs in this pack."""
+        return imap(sha_to_hex, self._itersha())
+
+    def iterentries(self):
+        """Iterate over the entries in this pack index.
+
+        :return: iterator over tuples with object name, offset in packfile and
+            crc32 checksum.
+        """
+        raise NotImplementedError(self.iterentries)
+
+    def get_pack_checksum(self):
+        """Return the SHA1 checksum stored for the corresponding packfile.
+
+        :return: 20-byte binary digest
+        """
+        raise NotImplementedError(self.get_pack_checksum)
+
+    def object_index(self, sha):
+        """Return the index in to the corresponding packfile for the object.
+
+        Given the name of an object it will return the offset that object
+        lives at within the corresponding pack file. If the pack file doesn't
+        have the object then None will be returned.
+        """
+        if len(sha) == 40:
+            sha = hex_to_sha(sha)
+        return self._object_index(sha)
+
+    def _object_index(self, sha):
+        """See object_index.
+
+        :param sha: A *binary* SHA string. (20 characters long)_
+        """
+        raise NotImplementedError(self._object_index)
+
+    def objects_sha1(self):
+        """Return the hex SHA1 over all the shas of all objects in this pack.
+
+        :note: This is used for the filename of the pack.
+        """
+        return iter_sha1(self._itersha())
+
+    def _itersha(self):
+        """Yield all the SHA1's of the objects in the index, sorted."""
+        raise NotImplementedError(self._itersha)
+
+
+class MemoryPackIndex(PackIndex):
+    """Pack index that is stored entirely in memory."""
+
+    def __init__(self, entries, pack_checksum=None):
+        """Create a new MemoryPackIndex.
+
+        :param entries: Sequence of name, idx, crc32 (sorted)
+        :param pack_checksum: Optional pack checksum
+        """
+        self._by_sha = {}
+        for name, idx, crc32 in entries:
+            self._by_sha[name] = idx
+        self._entries = entries
+        self._pack_checksum = pack_checksum
+
+    def get_pack_checksum(self):
+        return self._pack_checksum
+
+    def __len__(self):
+        return len(self._entries)
+
+    def _object_index(self, sha):
+        return self._by_sha[sha][0]
+
+    def _itersha(self):
+        return iter(self._by_sha)
+
+    def iterentries(self):
+        return iter(self._entries)
+
+
+class FilePackIndex(PackIndex):
+    """Pack index that is based on a file.
+
+    To do the loop it opens the file, and indexes first 256 4 byte groups
+    with the first byte of the sha id. The value in the four byte group indexed
+    is the end of the group that shares the same starting byte. Subtract one
+    from the starting byte and index again to find the start of the group.
+    The values are sorted by sha id within the group, so do the math to find
+    the start and end offset and then bisect in to find if the value is
+    present.
+    """
+
+    def __init__(self, filename, file=None, contents=None, size=None):
+        """Create a pack index object.
+
+        Provide it with the name of the index file to consider, and it will map
+        it whenever required.
+        """
+        self._filename = filename
+        # Take the size now, so it can be checked each time we map the file to
+        # ensure that it hasn't changed.
+        if file is None:
+            self._file = GitFile(filename, 'rb')
+        else:
+            self._file = file
+        if contents is None:
+            self._contents, self._size = _load_file_contents(self._file, size)
+        else:
+            self._contents, self._size = (contents, size)
+
+    def __eq__(self, other):
+        # Quick optimization:
+        if (isinstance(other, FilePackIndex) and
+                self._fan_out_table != other._fan_out_table):
+            return False
+
+        return super(FilePackIndex, self).__eq__(other)
+
+    def close(self):
+        self._file.close()
+        if getattr(self._contents, "close", None) is not None:
+            self._contents.close()
+
+    def __len__(self):
+        """Return the number of entries in this pack index."""
+        return self._fan_out_table[-1]
+
+    def _unpack_entry(self, i):
+        """Unpack the i-th entry in the index file.
+
+        :return: Tuple with object name (SHA), offset in pack file and CRC32
+            checksum (if known).
+        """
+        raise NotImplementedError(self._unpack_entry)
+
+    def _unpack_name(self, i):
+        """Unpack the i-th name from the index file."""
+        raise NotImplementedError(self._unpack_name)
+
+    def _unpack_offset(self, i):
+        """Unpack the i-th object offset from the index file."""
+        raise NotImplementedError(self._unpack_offset)
+
+    def _unpack_crc32_checksum(self, i):
+        """Unpack the crc32 checksum for the ith object from the index file.
+        """
+        raise NotImplementedError(self._unpack_crc32_checksum)
+
+    def _itersha(self):
+        for i in range(len(self)):
+            yield self._unpack_name(i)
+
+    def iterentries(self):
+        """Iterate over the entries in this pack index.
+
+        :return: iterator over tuples with object name, offset in packfile and
+            crc32 checksum.
+        """
+        for i in range(len(self)):
+            yield self._unpack_entry(i)
+
+    def _read_fan_out_table(self, start_offset):
+        ret = []
+        for i in range(0x100):
+            fanout_entry = self._contents[
+                start_offset+i*4:start_offset+(i+1)*4]
+            ret.append(struct.unpack('>L', fanout_entry)[0])
+        return ret
+
+    def check(self):
+        """Check that the stored checksum matches the actual checksum."""
+        actual = self.calculate_checksum()
+        stored = self.get_stored_checksum()
+        if actual != stored:
+            raise ChecksumMismatch(stored, actual)
+
+    def calculate_checksum(self):
+        """Calculate the SHA1 checksum over this pack index.
+
+        :return: This is a 20-byte binary digest
+        """
+        return sha1(self._contents[:-20]).digest()
+
+    def get_pack_checksum(self):
+        """Return the SHA1 checksum stored for the corresponding packfile.
+
+        :return: 20-byte binary digest
+        """
+        return bytes(self._contents[-40:-20])
+
+    def get_stored_checksum(self):
+        """Return the SHA1 checksum stored for this index.
+
+        :return: 20-byte binary digest
+        """
+        return bytes(self._contents[-20:])
+
+    def _object_index(self, sha):
+        """See object_index.
+
+        :param sha: A *binary* SHA string. (20 characters long)_
+        """
+        assert len(sha) == 20
+        idx = ord(sha[:1])
+        if idx == 0:
+            start = 0
+        else:
+            start = self._fan_out_table[idx-1]
+        end = self._fan_out_table[idx]
+        i = bisect_find_sha(start, end, sha, self._unpack_name)
+        if i is None:
+            raise KeyError(sha)
+        return self._unpack_offset(i)
+
+
+class PackIndex1(FilePackIndex):
+    """Version 1 Pack Index file."""
+
+    def __init__(self, filename, file=None, contents=None, size=None):
+        super(PackIndex1, self).__init__(filename, file, contents, size)
+        self.version = 1
+        self._fan_out_table = self._read_fan_out_table(0)
+
+    def _unpack_entry(self, i):
+        (offset, name) = unpack_from('>L20s', self._contents,
+                                     (0x100 * 4) + (i * 24))
+        return (name, offset, None)
+
+    def _unpack_name(self, i):
+        offset = (0x100 * 4) + (i * 24) + 4
+        return self._contents[offset:offset+20]
+
+    def _unpack_offset(self, i):
+        offset = (0x100 * 4) + (i * 24)
+        return unpack_from('>L', self._contents, offset)[0]
+
+    def _unpack_crc32_checksum(self, i):
+        # Not stored in v1 index files
+        return None
+
+
+class PackIndex2(FilePackIndex):
+    """Version 2 Pack Index file."""
+
+    def __init__(self, filename, file=None, contents=None, size=None):
+        super(PackIndex2, self).__init__(filename, file, contents, size)
+        if self._contents[:4] != b'\377tOc':
+            raise AssertionError('Not a v2 pack index file')
+        (self.version, ) = unpack_from(b'>L', self._contents, 4)
+        if self.version != 2:
+            raise AssertionError('Version was %d' % self.version)
+        self._fan_out_table = self._read_fan_out_table(8)
+        self._name_table_offset = 8 + 0x100 * 4
+        self._crc32_table_offset = self._name_table_offset + 20 * len(self)
+        self._pack_offset_table_offset = (self._crc32_table_offset +
+                                          4 * len(self))
+        self._pack_offset_largetable_offset = (
+            self._pack_offset_table_offset + 4 * len(self))
+
+    def _unpack_entry(self, i):
+        return (self._unpack_name(i), self._unpack_offset(i),
+                self._unpack_crc32_checksum(i))
+
+    def _unpack_name(self, i):
+        offset = self._name_table_offset + i * 20
+        return self._contents[offset:offset+20]
+
+    def _unpack_offset(self, i):
+        offset = self._pack_offset_table_offset + i * 4
+        offset = unpack_from('>L', self._contents, offset)[0]
+        if offset & (2**31):
+            offset = (
+                self._pack_offset_largetable_offset +
+                (offset & (2 ** 31 - 1)) * 8)
+            offset = unpack_from('>Q', self._contents, offset)[0]
+        return offset
+
+    def _unpack_crc32_checksum(self, i):
+        return unpack_from('>L', self._contents,
+                           self._crc32_table_offset + i * 4)[0]
+
+
+def read_pack_header(read):
+    """Read the header of a pack file.
+
+    :param read: Read function
+    :return: Tuple of (pack version, number of objects). If no data is
+        available to read, returns (None, None).
+    """
+    header = read(12)
+    if not header:
+        return None, None
+    if header[:4] != b'PACK':
+        raise AssertionError('Invalid pack header %r' % header)
+    (version,) = unpack_from(b'>L', header, 4)
+    if version not in (2, 3):
+        raise AssertionError('Version was %d' % version)
+    (num_objects,) = unpack_from(b'>L', header, 8)
+    return (version, num_objects)
+
+
+def chunks_length(chunks):
+    if isinstance(chunks, bytes):
+        return len(chunks)
+    else:
+        return sum(imap(len, chunks))
+
+
+def unpack_object(read_all, read_some=None, compute_crc32=False,
+                  include_comp=False, zlib_bufsize=_ZLIB_BUFSIZE):
+    """Unpack a Git object.
+
+    :param read_all: Read function that blocks until the number of requested
+        bytes are read.
+    :param read_some: Read function that returns at least one byte, but may not
+        return the number of bytes requested.
+    :param compute_crc32: If True, compute the CRC32 of the compressed data. If
+        False, the returned CRC32 will be None.
+    :param include_comp: If True, include compressed data in the result.
+    :param zlib_bufsize: An optional buffer size for zlib operations.
+    :return: A tuple of (unpacked, unused), where unused is the unused data
+        leftover from decompression, and unpacked in an UnpackedObject with
+        the following attrs set:
+
+        * obj_chunks     (for non-delta types)
+        * pack_type_num
+        * delta_base     (for delta types)
+        * comp_chunks    (if include_comp is True)
+        * decomp_chunks
+        * decomp_len
+        * crc32          (if compute_crc32 is True)
+    """
+    if read_some is None:
+        read_some = read_all
+    if compute_crc32:
+        crc32 = 0
+    else:
+        crc32 = None
+
+    bytes, crc32 = take_msb_bytes(read_all, crc32=crc32)
+    type_num = (bytes[0] >> 4) & 0x07
+    size = bytes[0] & 0x0f
+    for i, byte in enumerate(bytes[1:]):
+        size += (byte & 0x7f) << ((i * 7) + 4)
+
+    raw_base = len(bytes)
+    if type_num == OFS_DELTA:
+        bytes, crc32 = take_msb_bytes(read_all, crc32=crc32)
+        raw_base += len(bytes)
+        if bytes[-1] & 0x80:
+            raise AssertionError
+        delta_base_offset = bytes[0] & 0x7f
+        for byte in bytes[1:]:
+            delta_base_offset += 1
+            delta_base_offset <<= 7
+            delta_base_offset += (byte & 0x7f)
+        delta_base = delta_base_offset
+    elif type_num == REF_DELTA:
+        delta_base = read_all(20)
+        if compute_crc32:
+            crc32 = binascii.crc32(delta_base, crc32)
+        raw_base += 20
+    else:
+        delta_base = None
+
+    unpacked = UnpackedObject(type_num, delta_base, size, crc32)
+    unused = read_zlib_chunks(read_some, unpacked, buffer_size=zlib_bufsize,
+                              include_comp=include_comp)
+    return unpacked, unused
+
+
+def _compute_object_size(value):
+    """Compute the size of a unresolved object for use with LRUSizeCache."""
+    (num, obj) = value
+    if num in DELTA_TYPES:
+        return chunks_length(obj[1])
+    return chunks_length(obj)
+
+
+class PackStreamReader(object):
+    """Class to read a pack stream.
+
+    The pack is read from a ReceivableProtocol using read() or recv() as
+    appropriate.
+    """
+
+    def __init__(self, read_all, read_some=None, zlib_bufsize=_ZLIB_BUFSIZE):
+        self.read_all = read_all
+        if read_some is None:
+            self.read_some = read_all
+        else:
+            self.read_some = read_some
+        self.sha = sha1()
+        self._offset = 0
+        self._rbuf = BytesIO()
+        # trailer is a deque to avoid memory allocation on small reads
+        self._trailer = deque()
+        self._zlib_bufsize = zlib_bufsize
+
+    def _read(self, read, size):
+        """Read up to size bytes using the given callback.
+
+        As a side effect, update the verifier's hash (excluding the last 20
+        bytes read).
+
+        :param read: The read callback to read from.
+        :param size: The maximum number of bytes to read; the particular
+            behavior is callback-specific.
+        """
+        data = read(size)
+
+        # maintain a trailer of the last 20 bytes we've read
+        n = len(data)
+        self._offset += n
+        tn = len(self._trailer)
+        if n >= 20:
+            to_pop = tn
+            to_add = 20
+        else:
+            to_pop = max(n + tn - 20, 0)
+            to_add = n
+        self.sha.update(
+            bytes(bytearray([self._trailer.popleft() for _ in range(to_pop)])))
+        self._trailer.extend(data[-to_add:])
+
+        # hash everything but the trailer
+        self.sha.update(data[:-to_add])
+        return data
+
+    def _buf_len(self):
+        buf = self._rbuf
+        start = buf.tell()
+        buf.seek(0, SEEK_END)
+        end = buf.tell()
+        buf.seek(start)
+        return end - start
+
+    @property
+    def offset(self):
+        return self._offset - self._buf_len()
+
+    def read(self, size):
+        """Read, blocking until size bytes are read."""
+        buf_len = self._buf_len()
+        if buf_len >= size:
+            return self._rbuf.read(size)
+        buf_data = self._rbuf.read()
+        self._rbuf = BytesIO()
+        return buf_data + self._read(self.read_all, size - buf_len)
+
+    def recv(self, size):
+        """Read up to size bytes, blocking until one byte is read."""
+        buf_len = self._buf_len()
+        if buf_len:
+            data = self._rbuf.read(size)
+            if size >= buf_len:
+                self._rbuf = BytesIO()
+            return data
+        return self._read(self.read_some, size)
+
+    def __len__(self):
+        return self._num_objects
+
+    def read_objects(self, compute_crc32=False):
+        """Read the objects in this pack file.
+
+        :param compute_crc32: If True, compute the CRC32 of the compressed
+            data. If False, the returned CRC32 will be None.
+        :return: Iterator over UnpackedObjects with the following members set:
+            offset
+            obj_type_num
+            obj_chunks (for non-delta types)
+            delta_base (for delta types)
+            decomp_chunks
+            decomp_len
+            crc32 (if compute_crc32 is True)
+        :raise ChecksumMismatch: if the checksum of the pack contents does not
+            match the checksum in the pack trailer.
+        :raise zlib.error: if an error occurred during zlib decompression.
+        :raise IOError: if an error occurred writing to the output file.
+        """
+        pack_version, self._num_objects = read_pack_header(self.read)
+        if pack_version is None:
+            return
+
+        for i in range(self._num_objects):
+            offset = self.offset
+            unpacked, unused = unpack_object(
+              self.read, read_some=self.recv, compute_crc32=compute_crc32,
+              zlib_bufsize=self._zlib_bufsize)
+            unpacked.offset = offset
+
+            # prepend any unused data to current read buffer
+            buf = BytesIO()
+            buf.write(unused)
+            buf.write(self._rbuf.read())
+            buf.seek(0)
+            self._rbuf = buf
+
+            yield unpacked
+
+        if self._buf_len() < 20:
+            # If the read buffer is full, then the last read() got the whole
+            # trailer off the wire. If not, it means there is still some of the
+            # trailer to read. We need to read() all 20 bytes; N come from the
+            # read buffer and (20 - N) come from the wire.
+            self.read(20)
+
+        pack_sha = bytearray(self._trailer)
+        if pack_sha != self.sha.digest():
+            raise ChecksumMismatch(sha_to_hex(pack_sha), self.sha.hexdigest())
+
+
+class PackStreamCopier(PackStreamReader):
+    """Class to verify a pack stream as it is being read.
+
+    The pack is read from a ReceivableProtocol using read() or recv() as
+    appropriate and written out to the given file-like object.
+    """
+
+    def __init__(self, read_all, read_some, outfile, delta_iter=None):
+        """Initialize the copier.
+
+        :param read_all: Read function that blocks until the number of
+            requested bytes are read.
+        :param read_some: Read function that returns at least one byte, but may
+            not return the number of bytes requested.
+        :param outfile: File-like object to write output through.
+        :param delta_iter: Optional DeltaChainIterator to record deltas as we
+            read them.
+        """
+        super(PackStreamCopier, self).__init__(read_all, read_some=read_some)
+        self.outfile = outfile
+        self._delta_iter = delta_iter
+
+    def _read(self, read, size):
+        """Read data from the read callback and write it to the file."""
+        data = super(PackStreamCopier, self)._read(read, size)
+        self.outfile.write(data)
+        return data
+
+    def verify(self):
+        """Verify a pack stream and write it to the output file.
+
+        See PackStreamReader.iterobjects for a list of exceptions this may
+        throw.
+        """
+        if self._delta_iter:
+            for unpacked in self.read_objects():
+                self._delta_iter.record(unpacked)
+        else:
+            for _ in self.read_objects():
+                pass
+
+
+def obj_sha(type, chunks):
+    """Compute the SHA for a numeric type and object chunks."""
+    sha = sha1()
+    sha.update(object_header(type, chunks_length(chunks)))
+    if isinstance(chunks, bytes):
+        sha.update(chunks)
+    else:
+        for chunk in chunks:
+            sha.update(chunk)
+    return sha.digest()
+
+
+def compute_file_sha(f, start_ofs=0, end_ofs=0, buffer_size=1 << 16):
+    """Hash a portion of a file into a new SHA.
+
+    :param f: A file-like object to read from that supports seek().
+    :param start_ofs: The offset in the file to start reading at.
+    :param end_ofs: The offset in the file to end reading at, relative to the
+        end of the file.
+    :param buffer_size: A buffer size for reading.
+    :return: A new SHA object updated with data read from the file.
+    """
+    sha = sha1()
+    f.seek(0, SEEK_END)
+    length = f.tell()
+    if (end_ofs < 0 and length + end_ofs < start_ofs) or end_ofs > length:
+        raise AssertionError(
+            "Attempt to read beyond file length. "
+            "start_ofs: %d, end_ofs: %d, file length: %d" % (
+                start_ofs, end_ofs, length))
+    todo = length + end_ofs - start_ofs
+    f.seek(start_ofs)
+    while todo:
+        data = f.read(min(todo, buffer_size))
+        sha.update(data)
+        todo -= len(data)
+    return sha
+
+
+class PackData(object):
+    """The data contained in a packfile.
+
+    Pack files can be accessed both sequentially for exploding a pack, and
+    directly with the help of an index to retrieve a specific object.
+
+    The objects within are either complete or a delta against another.
+
+    The header is variable length. If the MSB of each byte is set then it
+    indicates that the subsequent byte is still part of the header.
+    For the first byte the next MS bits are the type, which tells you the type
+    of object, and whether it is a delta. The LS byte is the lowest bits of the
+    size. For each subsequent byte the LS 7 bits are the next MS bits of the
+    size, i.e. the last byte of the header contains the MS bits of the size.
+
+    For the complete objects the data is stored as zlib deflated data.
+    The size in the header is the uncompressed object size, so to uncompress
+    you need to just keep feeding data to zlib until you get an object back,
+    or it errors on bad data. This is done here by just giving the complete
+    buffer from the start of the deflated object on. This is bad, but until I
+    get mmap sorted out it will have to do.
+
+    Currently there are no integrity checks done. Also no attempt is made to
+    try and detect the delta case, or a request for an object at the wrong
+    position.  It will all just throw a zlib or KeyError.
+    """
+
+    def __init__(self, filename, file=None, size=None):
+        """Create a PackData object representing the pack in the given filename.
+
+        The file must exist and stay readable until the object is disposed of.
+        It must also stay the same size. It will be mapped whenever needed.
+
+        Currently there is a restriction on the size of the pack as the python
+        mmap implementation is flawed.
+        """
+        self._filename = filename
+        self._size = size
+        self._header_size = 12
+        if file is None:
+            self._file = GitFile(self._filename, 'rb')
+        else:
+            self._file = file
+        (version, self._num_objects) = read_pack_header(self._file.read)
+        self._offset_cache = LRUSizeCache(
+            1024*1024*20, compute_size=_compute_object_size)
+        self.pack = None
+
+    @property
+    def filename(self):
+        return os.path.basename(self._filename)
+
+    @classmethod
+    def from_file(cls, file, size):
+        return cls(str(file), file=file, size=size)
+
+    @classmethod
+    def from_path(cls, path):
+        return cls(filename=path)
+
+    def close(self):
+        self._file.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def _get_size(self):
+        if self._size is not None:
+            return self._size
+        self._size = os.path.getsize(self._filename)
+        if self._size < self._header_size:
+            errmsg = ('%s is too small for a packfile (%d < %d)' %
+                      (self._filename, self._size, self._header_size))
+            raise AssertionError(errmsg)
+        return self._size
+
+    def __len__(self):
+        """Returns the number of objects in this pack."""
+        return self._num_objects
+
+    def calculate_checksum(self):
+        """Calculate the checksum for this pack.
+
+        :return: 20-byte binary SHA1 digest
+        """
+        return compute_file_sha(self._file, end_ofs=-20).digest()
+
+    def get_ref(self, sha):
+        """Get the object for a ref SHA, only looking in this pack."""
+        # TODO: cache these results
+        if self.pack is None:
+            raise KeyError(sha)
+        try:
+            offset = self.pack.index.object_index(sha)
+        except KeyError:
+            offset = None
+        if offset:
+            type, obj = self.get_object_at(offset)
+        elif self.pack is not None and self.pack.resolve_ext_ref:
+            type, obj = self.pack.resolve_ext_ref(sha)
+        else:
+            raise KeyError(sha)
+        return offset, type, obj
+
+    def resolve_object(self, offset, type, obj, get_ref=None):
+        """Resolve an object, possibly resolving deltas when necessary.
+
+        :return: Tuple with object type and contents.
+        """
+        # Walk down the delta chain, building a stack of deltas to reach
+        # the requested object.
+        base_offset = offset
+        base_type = type
+        base_obj = obj
+        delta_stack = []
+        while base_type in DELTA_TYPES:
+            prev_offset = base_offset
+            if get_ref is None:
+                get_ref = self.get_ref
+            if base_type == OFS_DELTA:
+                (delta_offset, delta) = base_obj
+                # TODO: clean up asserts and replace with nicer error messages
+                base_offset = base_offset - delta_offset
+                base_type, base_obj = self.get_object_at(base_offset)
+                assert isinstance(base_type, int)
+            elif base_type == REF_DELTA:
+                (basename, delta) = base_obj
+                assert isinstance(basename, bytes) and len(basename) == 20
+                base_offset, base_type, base_obj = get_ref(basename)
+                assert isinstance(base_type, int)
+            delta_stack.append((prev_offset, base_type, delta))
+
+        # Now grab the base object (mustn't be a delta) and apply the
+        # deltas all the way up the stack.
+        chunks = base_obj
+        for prev_offset, delta_type, delta in reversed(delta_stack):
+            chunks = apply_delta(chunks, delta)
+            # TODO(dborowitz): This can result in poor performance if
+            # large base objects are separated from deltas in the pack.
+            # We should reorganize so that we apply deltas to all
+            # objects in a chain one after the other to optimize cache
+            # performance.
+            if prev_offset is not None:
+                self._offset_cache[prev_offset] = base_type, chunks
+        return base_type, chunks
+
+    def iterobjects(self, progress=None, compute_crc32=True):
+        self._file.seek(self._header_size)
+        for i in range(1, self._num_objects + 1):
+            offset = self._file.tell()
+            unpacked, unused = unpack_object(
+              self._file.read, compute_crc32=compute_crc32)
+            if progress is not None:
+                progress(i, self._num_objects)
+            yield (offset, unpacked.pack_type_num, unpacked._obj(),
+                   unpacked.crc32)
+            # Back up over unused data.
+            self._file.seek(-len(unused), SEEK_CUR)
+
+    def _iter_unpacked(self):
+        # TODO(dborowitz): Merge this with iterobjects, if we can change its
+        # return type.
+        self._file.seek(self._header_size)
+
+        if self._num_objects is None:
+            return
+
+        for _ in range(self._num_objects):
+            offset = self._file.tell()
+            unpacked, unused = unpack_object(
+              self._file.read, compute_crc32=False)
+            unpacked.offset = offset
+            yield unpacked
+            # Back up over unused data.
+            self._file.seek(-len(unused), SEEK_CUR)
+
+    def iterentries(self, progress=None):
+        """Yield entries summarizing the contents of this pack.
+
+        :param progress: Progress function, called with current and total
+            object count.
+        :return: iterator of tuples with (sha, offset, crc32)
+        """
+        num_objects = self._num_objects
+        resolve_ext_ref = (
+            self.pack.resolve_ext_ref if self.pack is not None else None)
+        indexer = PackIndexer.for_pack_data(
+            self, resolve_ext_ref=resolve_ext_ref)
+        for i, result in enumerate(indexer):
+            if progress is not None:
+                progress(i, num_objects)
+            yield result
+
+    def sorted_entries(self, progress=None):
+        """Return entries in this pack, sorted by SHA.
+
+        :param progress: Progress function, called with current and total
+            object count
+        :return: List of tuples with (sha, offset, crc32)
+        """
+        ret = sorted(self.iterentries(progress=progress))
+        return ret
+
+    def create_index_v1(self, filename, progress=None):
+        """Create a version 1 file for this data file.
+
+        :param filename: Index filename.
+        :param progress: Progress report function
+        :return: Checksum of index file
+        """
+        entries = self.sorted_entries(progress=progress)
+        with GitFile(filename, 'wb') as f:
+            return write_pack_index_v1(f, entries, self.calculate_checksum())
+
+    def create_index_v2(self, filename, progress=None):
+        """Create a version 2 index file for this data file.
+
+        :param filename: Index filename.
+        :param progress: Progress report function
+        :return: Checksum of index file
+        """
+        entries = self.sorted_entries(progress=progress)
+        with GitFile(filename, 'wb') as f:
+            return write_pack_index_v2(f, entries, self.calculate_checksum())
+
+    def create_index(self, filename, progress=None,
+                     version=2):
+        """Create an  index file for this data file.
+
+        :param filename: Index filename.
+        :param progress: Progress report function
+        :return: Checksum of index file
+        """
+        if version == 1:
+            return self.create_index_v1(filename, progress)
+        elif version == 2:
+            return self.create_index_v2(filename, progress)
+        else:
+            raise ValueError('unknown index format %d' % version)
+
+    def get_stored_checksum(self):
+        """Return the expected checksum stored in this pack."""
+        self._file.seek(-20, SEEK_END)
+        return self._file.read(20)
+
+    def check(self):
+        """Check the consistency of this pack."""
+        actual = self.calculate_checksum()
+        stored = self.get_stored_checksum()
+        if actual != stored:
+            raise ChecksumMismatch(stored, actual)
+
+    def get_object_at(self, offset):
+        """Given an offset in to the packfile return the object that is there.
+
+        Using the associated index the location of an object can be looked up,
+        and then the packfile can be asked directly for that object using this
+        function.
+        """
+        try:
+            return self._offset_cache[offset]
+        except KeyError:
+            pass
+        assert offset >= self._header_size
+        self._file.seek(offset)
+        unpacked, _ = unpack_object(self._file.read)
+        return (unpacked.pack_type_num, unpacked._obj())
+
+
+class DeltaChainIterator(object):
+    """Abstract iterator over pack data based on delta chains.
+
+    Each object in the pack is guaranteed to be inflated exactly once,
+    regardless of how many objects reference it as a delta base. As a result,
+    memory usage is proportional to the length of the longest delta chain.
+
+    Subclasses can override _result to define the result type of the iterator.
+    By default, results are UnpackedObjects with the following members set:
+
+    * offset
+    * obj_type_num
+    * obj_chunks
+    * pack_type_num
+    * delta_base     (for delta types)
+    * comp_chunks    (if _include_comp is True)
+    * decomp_chunks
+    * decomp_len
+    * crc32          (if _compute_crc32 is True)
+    """
+
+    _compute_crc32 = False
+    _include_comp = False
+
+    def __init__(self, file_obj, resolve_ext_ref=None):
+        self._file = file_obj
+        self._resolve_ext_ref = resolve_ext_ref
+        self._pending_ofs = defaultdict(list)
+        self._pending_ref = defaultdict(list)
+        self._full_ofs = []
+        self._shas = {}
+        self._ext_refs = []
+
+    @classmethod
+    def for_pack_data(cls, pack_data, resolve_ext_ref=None):
+        walker = cls(None, resolve_ext_ref=resolve_ext_ref)
+        walker.set_pack_data(pack_data)
+        for unpacked in pack_data._iter_unpacked():
+            walker.record(unpacked)
+        return walker
+
+    def record(self, unpacked):
+        type_num = unpacked.pack_type_num
+        offset = unpacked.offset
+        if type_num == OFS_DELTA:
+            base_offset = offset - unpacked.delta_base
+            self._pending_ofs[base_offset].append(offset)
+        elif type_num == REF_DELTA:
+            self._pending_ref[unpacked.delta_base].append(offset)
+        else:
+            self._full_ofs.append((offset, type_num))
+
+    def set_pack_data(self, pack_data):
+        self._file = pack_data._file
+
+    def _walk_all_chains(self):
+        for offset, type_num in self._full_ofs:
+            for result in self._follow_chain(offset, type_num, None):
+                yield result
+        for result in self._walk_ref_chains():
+            yield result
+        assert not self._pending_ofs
+
+    def _ensure_no_pending(self):
+        if self._pending_ref:
+            raise KeyError([sha_to_hex(s) for s in self._pending_ref])
+
+    def _walk_ref_chains(self):
+        if not self._resolve_ext_ref:
+            self._ensure_no_pending()
+            return
+
+        for base_sha, pending in sorted(self._pending_ref.items()):
+            if base_sha not in self._pending_ref:
+                continue
+            try:
+                type_num, chunks = self._resolve_ext_ref(base_sha)
+            except KeyError:
+                # Not an external ref, but may depend on one. Either it will
+                # get popped via a _follow_chain call, or we will raise an
+                # error below.
+                continue
+            self._ext_refs.append(base_sha)
+            self._pending_ref.pop(base_sha)
+            for new_offset in pending:
+                for result in self._follow_chain(new_offset, type_num, chunks):
+                    yield result
+
+        self._ensure_no_pending()
+
+    def _result(self, unpacked):
+        return unpacked
+
+    def _resolve_object(self, offset, obj_type_num, base_chunks):
+        self._file.seek(offset)
+        unpacked, _ = unpack_object(
+          self._file.read, include_comp=self._include_comp,
+          compute_crc32=self._compute_crc32)
+        unpacked.offset = offset
+        if base_chunks is None:
+            assert unpacked.pack_type_num == obj_type_num
+        else:
+            assert unpacked.pack_type_num in DELTA_TYPES
+            unpacked.obj_type_num = obj_type_num
+            unpacked.obj_chunks = apply_delta(base_chunks,
+                                              unpacked.decomp_chunks)
+        return unpacked
+
+    def _follow_chain(self, offset, obj_type_num, base_chunks):
+        # Unlike PackData.get_object_at, there is no need to cache offsets as
+        # this approach by design inflates each object exactly once.
+        todo = [(offset, obj_type_num, base_chunks)]
+        for offset, obj_type_num, base_chunks in todo:
+            unpacked = self._resolve_object(offset, obj_type_num, base_chunks)
+            yield self._result(unpacked)
+
+            unblocked = chain(self._pending_ofs.pop(unpacked.offset, []),
+                              self._pending_ref.pop(unpacked.sha(), []))
+            todo.extend(
+                (new_offset, unpacked.obj_type_num, unpacked.obj_chunks)
+                for new_offset in unblocked)
+
+    def __iter__(self):
+        return self._walk_all_chains()
+
+    def ext_refs(self):
+        return self._ext_refs
+
+
+class PackIndexer(DeltaChainIterator):
+    """Delta chain iterator that yields index entries."""
+
+    _compute_crc32 = True
+
+    def _result(self, unpacked):
+        return unpacked.sha(), unpacked.offset, unpacked.crc32
+
+
+class PackInflater(DeltaChainIterator):
+    """Delta chain iterator that yields ShaFile objects."""
+
+    def _result(self, unpacked):
+        return unpacked.sha_file()
+
+
+class SHA1Reader(object):
+    """Wrapper for file-like object that remembers the SHA1 of its data."""
+
+    def __init__(self, f):
+        self.f = f
+        self.sha1 = sha1(b'')
+
+    def read(self, num=None):
+        data = self.f.read(num)
+        self.sha1.update(data)
+        return data
+
+    def check_sha(self):
+        stored = self.f.read(20)
+        if stored != self.sha1.digest():
+            raise ChecksumMismatch(self.sha1.hexdigest(), sha_to_hex(stored))
+
+    def close(self):
+        return self.f.close()
+
+    def tell(self):
+        return self.f.tell()
+
+
+class SHA1Writer(object):
+    """Wrapper for file-like object that remembers the SHA1 of its data."""
+
+    def __init__(self, f):
+        self.f = f
+        self.length = 0
+        self.sha1 = sha1(b'')
+
+    def write(self, data):
+        self.sha1.update(data)
+        self.f.write(data)
+        self.length += len(data)
+
+    def write_sha(self):
+        sha = self.sha1.digest()
+        assert len(sha) == 20
+        self.f.write(sha)
+        self.length += len(sha)
+        return sha
+
+    def close(self):
+        sha = self.write_sha()
+        self.f.close()
+        return sha
+
+    def offset(self):
+        return self.length
+
+    def tell(self):
+        return self.f.tell()
+
+
+def pack_object_header(type_num, delta_base, size):
+    """Create a pack object header for the given object info.
+
+    :param type_num: Numeric type of the object.
+    :param delta_base: Delta base offset or ref, or None for whole objects.
+    :param size: Uncompressed object size.
+    :return: A header for a packed object.
+    """
+    header = []
+    c = (type_num << 4) | (size & 15)
+    size >>= 4
+    while size:
+        header.append(c | 0x80)
+        c = size & 0x7f
+        size >>= 7
+    header.append(c)
+    if type_num == OFS_DELTA:
+        ret = [delta_base & 0x7f]
+        delta_base >>= 7
+        while delta_base:
+            delta_base -= 1
+            ret.insert(0, 0x80 | (delta_base & 0x7f))
+            delta_base >>= 7
+        header.extend(ret)
+    elif type_num == REF_DELTA:
+        assert len(delta_base) == 20
+        header += delta_base
+    return bytearray(header)
+
+
+def write_pack_object(f, type, object, sha=None):
+    """Write pack object to a file.
+
+    :param f: File to write to
+    :param type: Numeric type of the object
+    :param object: Object to write
+    :return: Tuple with offset at which the object was written, and crc32
+    """
+    if type in DELTA_TYPES:
+        delta_base, object = object
+    else:
+        delta_base = None
+    header = bytes(pack_object_header(type, delta_base, len(object)))
+    comp_data = zlib.compress(object)
+    crc32 = 0
+    for data in (header, comp_data):
+        f.write(data)
+        if sha is not None:
+            sha.update(data)
+        crc32 = binascii.crc32(data, crc32)
+    return crc32 & 0xffffffff
+
+
+def write_pack(filename, objects, deltify=None, delta_window_size=None):
+    """Write a new pack data file.
+
+    :param filename: Path to the new pack file (without .pack extension)
+    :param objects: Iterable of (object, path) tuples to write.
+        Should provide __len__
+    :param window_size: Delta window size
+    :param deltify: Whether to deltify pack objects
+    :return: Tuple with checksum of pack file and index file
+    """
+    with GitFile(filename + '.pack', 'wb') as f:
+        entries, data_sum = write_pack_objects(
+            f, objects, delta_window_size=delta_window_size, deltify=deltify)
+    entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
+    with GitFile(filename + '.idx', 'wb') as f:
+        return data_sum, write_pack_index_v2(f, entries, data_sum)
+
+
+def write_pack_header(f, num_objects):
+    """Write a pack header for the given number of objects."""
+    f.write(b'PACK')                          # Pack header
+    f.write(struct.pack(b'>L', 2))            # Pack version
+    f.write(struct.pack(b'>L', num_objects))  # Number of objects in pack
+
+
+def deltify_pack_objects(objects, window_size=None):
+    """Generate deltas for pack objects.
+
+    :param objects: An iterable of (object, path) tuples to deltify.
+    :param window_size: Window size; None for default
+    :return: Iterator over type_num, object id, delta_base, content
+        delta_base is None for full text entries
+    """
+    if window_size is None:
+        window_size = DEFAULT_PACK_DELTA_WINDOW_SIZE
+    # Build a list of objects ordered by the magic Linus heuristic
+    # This helps us find good objects to diff against us
+    magic = []
+    for obj, path in objects:
+        magic.append((obj.type_num, path, -obj.raw_length(), obj))
+    magic.sort()
+
+    possible_bases = deque()
+
+    for type_num, path, neg_length, o in magic:
+        raw = o.as_raw_string()
+        winner = raw
+        winner_base = None
+        for base in possible_bases:
+            if base.type_num != type_num:
+                continue
+            delta = create_delta(base.as_raw_string(), raw)
+            if len(delta) < len(winner):
+                winner_base = base.sha().digest()
+                winner = delta
+        yield type_num, o.sha().digest(), winner_base, winner
+        possible_bases.appendleft(o)
+        while len(possible_bases) > window_size:
+            possible_bases.pop()
+
+
+def write_pack_objects(f, objects, delta_window_size=None, deltify=False):
+    """Write a new pack data file.
+
+    :param f: File to write to
+    :param objects: Iterable of (object, path) tuples to write.
+        Should provide __len__
+    :param window_size: Sliding window size for searching for deltas;
+                        Set to None for default window size.
+    :param deltify: Whether to deltify objects
+    :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
+    """
+    if deltify:
+        pack_contents = deltify_pack_objects(objects, delta_window_size)
+    else:
+        pack_contents = (
+            (o.type_num, o.sha().digest(), None, o.as_raw_string())
+            for (o, path) in objects)
+
+    return write_pack_data(f, len(objects), pack_contents)
+
+
+def write_pack_data(f, num_records, records):
+    """Write a new pack data file.
+
+    :param f: File to write to
+    :param num_records: Number of records
+    :param records: Iterator over type_num, object_id, delta_base, raw
+    :return: Dict mapping id -> (offset, crc32 checksum), pack checksum
+    """
+    # Write the pack
+    entries = {}
+    f = SHA1Writer(f)
+    write_pack_header(f, num_records)
+    for type_num, object_id, delta_base, raw in records:
+        offset = f.offset()
+        if delta_base is not None:
+            try:
+                base_offset, base_crc32 = entries[delta_base]
+            except KeyError:
+                type_num = REF_DELTA
+                raw = (delta_base, raw)
+            else:
+                type_num = OFS_DELTA
+                raw = (offset - base_offset, raw)
+        crc32 = write_pack_object(f, type_num, raw)
+        entries[object_id] = (offset, crc32)
+    return entries, f.write_sha()
+
+
+def write_pack_index_v1(f, entries, pack_checksum):
+    """Write a new pack index file.
+
+    :param f: A file-like object to write to
+    :param entries: List of tuples with object name (sha), offset_in_pack,
+        and crc32_checksum.
+    :param pack_checksum: Checksum of the pack file.
+    :return: The SHA of the written index file
+    """
+    f = SHA1Writer(f)
+    fan_out_table = defaultdict(lambda: 0)
+    for (name, offset, entry_checksum) in entries:
+        fan_out_table[ord(name[:1])] += 1
+    # Fan-out table
+    for i in range(0x100):
+        f.write(struct.pack('>L', fan_out_table[i]))
+        fan_out_table[i+1] += fan_out_table[i]
+    for (name, offset, entry_checksum) in entries:
+        if not (offset <= 0xffffffff):
+            raise TypeError("pack format 1 only supports offsets < 2Gb")
+        f.write(struct.pack('>L20s', offset, name))
+    assert len(pack_checksum) == 20
+    f.write(pack_checksum)
+    return f.write_sha()
+
+
+def _delta_encode_size(size):
+    ret = bytearray()
+    c = size & 0x7f
+    size >>= 7
+    while size:
+        ret.append(c | 0x80)
+        c = size & 0x7f
+        size >>= 7
+    ret.append(c)
+    return ret
+
+
+# The length of delta compression copy operations in version 2 packs is limited
+# to 64K.  To copy more, we use several copy operations.  Version 3 packs allow
+# 24-bit lengths in copy operations, but we always make version 2 packs.
+_MAX_COPY_LEN = 0xffff
+
+
+def _encode_copy_operation(start, length):
+    scratch = []
+    op = 0x80
+    for i in range(4):
+        if start & 0xff << i*8:
+            scratch.append((start >> i*8) & 0xff)
+            op |= 1 << i
+    for i in range(2):
+        if length & 0xff << i*8:
+            scratch.append((length >> i*8) & 0xff)
+            op |= 1 << (4+i)
+    return bytearray([op] + scratch)
+
+
+def create_delta(base_buf, target_buf):
+    """Use python difflib to work out how to transform base_buf to target_buf.
+
+    :param base_buf: Base buffer
+    :param target_buf: Target buffer
+    """
+    assert isinstance(base_buf, bytes)
+    assert isinstance(target_buf, bytes)
+    out_buf = bytearray()
+    # write delta header
+    out_buf += _delta_encode_size(len(base_buf))
+    out_buf += _delta_encode_size(len(target_buf))
+    # write out delta opcodes
+    seq = difflib.SequenceMatcher(a=base_buf, b=target_buf)
+    for opcode, i1, i2, j1, j2 in seq.get_opcodes():
+        # Git patch opcodes don't care about deletes!
+        # if opcode == 'replace' or opcode == 'delete':
+        #    pass
+        if opcode == 'equal':
+            # If they are equal, unpacker will use data from base_buf
+            # Write out an opcode that says what range to use
+            copy_start = i1
+            copy_len = i2 - i1
+            while copy_len > 0:
+                to_copy = min(copy_len, _MAX_COPY_LEN)
+                out_buf += _encode_copy_operation(copy_start, to_copy)
+                copy_start += to_copy
+                copy_len -= to_copy
+        if opcode == 'replace' or opcode == 'insert':
+            # If we are replacing a range or adding one, then we just
+            # output it to the stream (prefixed by its size)
+            s = j2 - j1
+            o = j1
+            while s > 127:
+                out_buf.append(127)
+                out_buf += bytearray(target_buf[o:o+127])
+                s -= 127
+                o += 127
+            out_buf.append(s)
+            out_buf += bytearray(target_buf[o:o+s])
+    return bytes(out_buf)
+
+
+def apply_delta(src_buf, delta):
+    """Based on the similar function in git's patch-delta.c.
+
+    :param src_buf: Source buffer
+    :param delta: Delta instructions
+    """
+    if not isinstance(src_buf, bytes):
+        src_buf = b''.join(src_buf)
+    if not isinstance(delta, bytes):
+        delta = b''.join(delta)
+    out = []
+    index = 0
+    delta_length = len(delta)
+
+    def get_delta_header_size(delta, index):
+        size = 0
+        i = 0
+        while delta:
+            cmd = ord(delta[index:index+1])
+            index += 1
+            size |= (cmd & ~0x80) << i
+            i += 7
+            if not cmd & 0x80:
+                break
+        return size, index
+    src_size, index = get_delta_header_size(delta, index)
+    dest_size, index = get_delta_header_size(delta, index)
+    assert src_size == len(src_buf), '%d vs %d' % (src_size, len(src_buf))
+    while index < delta_length:
+        cmd = ord(delta[index:index+1])
+        index += 1
+        if cmd & 0x80:
+            cp_off = 0
+            for i in range(4):
+                if cmd & (1 << i):
+                    x = ord(delta[index:index+1])
+                    index += 1
+                    cp_off |= x << (i * 8)
+            cp_size = 0
+            # Version 3 packs can contain copy sizes larger than 64K.
+            for i in range(3):
+                if cmd & (1 << (4+i)):
+                    x = ord(delta[index:index+1])
+                    index += 1
+                    cp_size |= x << (i * 8)
+            if cp_size == 0:
+                cp_size = 0x10000
+            if (cp_off + cp_size < cp_size or
+                    cp_off + cp_size > src_size or
+                    cp_size > dest_size):
+                break
+            out.append(src_buf[cp_off:cp_off+cp_size])
+        elif cmd != 0:
+            out.append(delta[index:index+cmd])
+            index += cmd
+        else:
+            raise ApplyDeltaError('Invalid opcode 0')
+
+    if index != delta_length:
+        raise ApplyDeltaError('delta not empty: %r' % delta[index:])
+
+    if dest_size != chunks_length(out):
+        raise ApplyDeltaError('dest size incorrect')
+
+    return out
+
+
+def write_pack_index_v2(f, entries, pack_checksum):
+    """Write a new pack index file.
+
+    :param f: File-like object to write to
+    :param entries: List of tuples with object name (sha), offset_in_pack, and
+        crc32_checksum.
+    :param pack_checksum: Checksum of the pack file.
+    :return: The SHA of the index file written
+    """
+    f = SHA1Writer(f)
+    f.write(b'\377tOc')  # Magic!
+    f.write(struct.pack('>L', 2))
+    fan_out_table = defaultdict(lambda: 0)
+    for (name, offset, entry_checksum) in entries:
+        fan_out_table[ord(name[:1])] += 1
+    # Fan-out table
+    largetable = []
+    for i in range(0x100):
+        f.write(struct.pack(b'>L', fan_out_table[i]))
+        fan_out_table[i+1] += fan_out_table[i]
+    for (name, offset, entry_checksum) in entries:
+        f.write(name)
+    for (name, offset, entry_checksum) in entries:
+        f.write(struct.pack(b'>L', entry_checksum))
+    for (name, offset, entry_checksum) in entries:
+        if offset < 2**31:
+            f.write(struct.pack(b'>L', offset))
+        else:
+            f.write(struct.pack(b'>L', 2**31 + len(largetable)))
+            largetable.append(offset)
+    for offset in largetable:
+        f.write(struct.pack(b'>Q', offset))
+    assert len(pack_checksum) == 20
+    f.write(pack_checksum)
+    return f.write_sha()
+
+
+write_pack_index = write_pack_index_v2
+
+
+class Pack(object):
+    """A Git pack object."""
+
+    def __init__(self, basename, resolve_ext_ref=None):
+        self._basename = basename
+        self._data = None
+        self._idx = None
+        self._idx_path = self._basename + '.idx'
+        self._data_path = self._basename + '.pack'
+        self._data_load = lambda: PackData(self._data_path)
+        self._idx_load = lambda: load_pack_index(self._idx_path)
+        self.resolve_ext_ref = resolve_ext_ref
+
+    @classmethod
+    def from_lazy_objects(self, data_fn, idx_fn):
+        """Create a new pack object from callables to load pack data and
+        index objects."""
+        ret = Pack('')
+        ret._data_load = data_fn
+        ret._idx_load = idx_fn
+        return ret
+
+    @classmethod
+    def from_objects(self, data, idx):
+        """Create a new pack object from pack data and index objects."""
+        ret = Pack('')
+        ret._data_load = lambda: data
+        ret._idx_load = lambda: idx
+        return ret
+
+    def name(self):
+        """The SHA over the SHAs of the objects in this pack."""
+        return self.index.objects_sha1()
+
+    @property
+    def data(self):
+        """The pack data object being used."""
+        if self._data is None:
+            self._data = self._data_load()
+            self._data.pack = self
+            self.check_length_and_checksum()
+        return self._data
+
+    @property
+    def index(self):
+        """The index being used.
+
+        :note: This may be an in-memory index
+        """
+        if self._idx is None:
+            self._idx = self._idx_load()
+        return self._idx
+
+    def close(self):
+        if self._data is not None:
+            self._data.close()
+        if self._idx is not None:
+            self._idx.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def __eq__(self, other):
+        return isinstance(self, type(other)) and self.index == other.index
+
+    def __len__(self):
+        """Number of entries in this pack."""
+        return len(self.index)
+
+    def __repr__(self):
+        return '%s(%r)' % (self.__class__.__name__, self._basename)
+
+    def __iter__(self):
+        """Iterate over all the sha1s of the objects in this pack."""
+        return iter(self.index)
+
+    def check_length_and_checksum(self):
+        """Sanity check the length and checksum of the pack index and data."""
+        assert len(self.index) == len(self.data)
+        idx_stored_checksum = self.index.get_pack_checksum()
+        data_stored_checksum = self.data.get_stored_checksum()
+        if idx_stored_checksum != data_stored_checksum:
+            raise ChecksumMismatch(sha_to_hex(idx_stored_checksum),
+                                   sha_to_hex(data_stored_checksum))
+
+    def check(self):
+        """Check the integrity of this pack.
+
+        :raise ChecksumMismatch: if a checksum for the index or data is wrong
+        """
+        self.index.check()
+        self.data.check()
+        for obj in self.iterobjects():
+            obj.check()
+        # TODO: object connectivity checks
+
+    def get_stored_checksum(self):
+        return self.data.get_stored_checksum()
+
+    def __contains__(self, sha1):
+        """Check whether this pack contains a particular SHA1."""
+        try:
+            self.index.object_index(sha1)
+            return True
+        except KeyError:
+            return False
+
+    def get_raw(self, sha1):
+        offset = self.index.object_index(sha1)
+        obj_type, obj = self.data.get_object_at(offset)
+        type_num, chunks = self.data.resolve_object(offset, obj_type, obj)
+        return type_num, b''.join(chunks)
+
+    def __getitem__(self, sha1):
+        """Retrieve the specified SHA1."""
+        type, uncomp = self.get_raw(sha1)
+        return ShaFile.from_raw_string(type, uncomp, sha=sha1)
+
+    def iterobjects(self):
+        """Iterate over the objects in this pack."""
+        return iter(PackInflater.for_pack_data(
+            self.data, resolve_ext_ref=self.resolve_ext_ref))
+
+    def pack_tuples(self):
+        """Provide an iterable for use with write_pack_objects.
+
+        :return: Object that can iterate over (object, path) tuples
+            and provides __len__
+        """
+        class PackTupleIterable(object):
+
+            def __init__(self, pack):
+                self.pack = pack
+
+            def __len__(self):
+                return len(self.pack)
+
+            def __iter__(self):
+                return ((o, None) for o in self.pack.iterobjects())
+
+        return PackTupleIterable(self)
+
+    def keep(self, msg=None):
+        """Add a .keep file for the pack, preventing git from garbage collecting it.
+
+        :param msg: A message written inside the .keep file; can be used later
+            to determine whether or not a .keep file is obsolete.
+        :return: The path of the .keep file, as a string.
+        """
+        keepfile_name = '%s.keep' % self._basename
+        with GitFile(keepfile_name, 'wb') as keepfile:
+            if msg:
+                keepfile.write(msg)
+                keepfile.write(b'\n')
+        return keepfile_name
+
+
+try:
+    from dulwich._pack import apply_delta, bisect_find_sha  # noqa: F811
+except ImportError:
+    pass

+ 317 - 0
dulwich/patch.py

@@ -0,0 +1,317 @@
+# patch.py -- For dealing with packed-style patches.
+# Copyright (C) 2009-2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Classes for dealing with git am-style patches.
+
+These patches are basically unified diffs with some extra metadata tacked
+on.
+"""
+
+from difflib import SequenceMatcher
+import email.parser
+import time
+
+from dulwich.objects import (
+    Blob,
+    Commit,
+    S_ISGITLINK,
+    )
+
+FIRST_FEW_BYTES = 8000
+
+
+def write_commit_patch(f, commit, contents, progress, version=None,
+                       encoding=None):
+    """Write a individual file patch.
+
+    :param commit: Commit object
+    :param progress: Tuple with current patch number and total.
+    :return: tuple with filename and contents
+    """
+    encoding = encoding or getattr(f, "encoding", "ascii")
+    if isinstance(contents, str):
+        contents = contents.encode(encoding)
+    (num, total) = progress
+    f.write(b"From " + commit.id + b" " +
+            time.ctime(commit.commit_time).encode(encoding) + b"\n")
+    f.write(b"From: " + commit.author + b"\n")
+    f.write(b"Date: " +
+            time.strftime("%a, %d %b %Y %H:%M:%S %Z").encode(encoding) + b"\n")
+    f.write(("Subject: [PATCH %d/%d] " % (num, total)).encode(encoding) +
+            commit.message + b"\n")
+    f.write(b"\n")
+    f.write(b"---\n")
+    try:
+        import subprocess
+        p = subprocess.Popen(["diffstat"], stdout=subprocess.PIPE,
+                             stdin=subprocess.PIPE)
+    except (ImportError, OSError):
+        pass  # diffstat not available?
+    else:
+        (diffstat, _) = p.communicate(contents)
+        f.write(diffstat)
+        f.write(b"\n")
+    f.write(contents)
+    f.write(b"-- \n")
+    if version is None:
+        from dulwich import __version__ as dulwich_version
+        f.write(b"Dulwich %d.%d.%d\n" % dulwich_version)
+    else:
+        f.write(version.encode(encoding) + b"\n")
+
+
+def get_summary(commit):
+    """Determine the summary line for use in a filename.
+
+    :param commit: Commit
+    :return: Summary string
+    """
+    return commit.message.splitlines()[0].replace(" ", "-")
+
+
+def unified_diff(a, b, fromfile, tofile, n=3):
+    """difflib.unified_diff that doesn't write any dates or trailing spaces.
+
+    Based on the same function in Python2.6.5-rc2's difflib.py
+    """
+    started = False
+    for group in SequenceMatcher(None, a, b).get_grouped_opcodes(n):
+        if not started:
+            yield b'--- ' + fromfile + b'\n'
+            yield b'+++ ' + tofile + b'\n'
+            started = True
+        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
+        sizes = "@@ -%d,%d +%d,%d @@\n" % (i1+1, i2-i1, j1+1, j2-j1)
+        yield sizes.encode('ascii')
+        for tag, i1, i2, j1, j2 in group:
+            if tag == 'equal':
+                for line in a[i1:i2]:
+                    yield b' ' + line
+                continue
+            if tag == 'replace' or tag == 'delete':
+                for line in a[i1:i2]:
+                    if not line[-1:] == b'\n':
+                        line += b'\n\\ No newline at end of file\n'
+                    yield b'-' + line
+            if tag == 'replace' or tag == 'insert':
+                for line in b[j1:j2]:
+                    if not line[-1:] == b'\n':
+                        line += b'\n\\ No newline at end of file\n'
+                    yield b'+' + line
+
+
+def is_binary(content):
+    """See if the first few bytes contain any null characters.
+
+    :param content: Bytestring to check for binary content
+    """
+    return b'\0' in content[:FIRST_FEW_BYTES]
+
+
+def shortid(hexsha):
+    if hexsha is None:
+        return b"0" * 7
+    else:
+        return hexsha[:7]
+
+
+def patch_filename(p, root):
+    if p is None:
+        return b"/dev/null"
+    else:
+        return root + b"/" + p
+
+
+def write_object_diff(f, store, old_file, new_file, diff_binary=False):
+    """Write the diff for an object.
+
+    :param f: File-like object to write to
+    :param store: Store to retrieve objects from, if necessary
+    :param old_file: (path, mode, hexsha) tuple
+    :param new_file: (path, mode, hexsha) tuple
+    :param diff_binary: Whether to diff files even if they
+        are considered binary files by is_binary().
+
+    :note: the tuple elements should be None for nonexistant files
+    """
+    (old_path, old_mode, old_id) = old_file
+    (new_path, new_mode, new_id) = new_file
+    old_path = patch_filename(old_path, b"a")
+    new_path = patch_filename(new_path, b"b")
+
+    def content(mode, hexsha):
+        if hexsha is None:
+            return Blob.from_string(b'')
+        elif S_ISGITLINK(mode):
+            return Blob.from_string(b"Submodule commit " + hexsha + b"\n")
+        else:
+            return store[hexsha]
+
+    def lines(content):
+        if not content:
+            return []
+        else:
+            return content.splitlines()
+    f.writelines(gen_diff_header(
+        (old_path, new_path), (old_mode, new_mode), (old_id, new_id)))
+    old_content = content(old_mode, old_id)
+    new_content = content(new_mode, new_id)
+    if not diff_binary and (
+            is_binary(old_content.data) or is_binary(new_content.data)):
+        f.write(b"Binary files " + old_path + b" and " + new_path +
+                b" differ\n")
+    else:
+        f.writelines(unified_diff(lines(old_content), lines(new_content),
+                     old_path, new_path))
+
+
+# TODO(jelmer): Support writing unicode, rather than bytes.
+def gen_diff_header(paths, modes, shas):
+    """Write a blob diff header.
+
+    :param paths: Tuple with old and new path
+    :param modes: Tuple with old and new modes
+    :param shas: Tuple with old and new shas
+    """
+    (old_path, new_path) = paths
+    (old_mode, new_mode) = modes
+    (old_sha, new_sha) = shas
+    yield b"diff --git " + old_path + b" " + new_path + b"\n"
+    if old_mode != new_mode:
+        if new_mode is not None:
+            if old_mode is not None:
+                yield ("old mode %o\n" % old_mode).encode('ascii')
+            yield ("new mode %o\n" % new_mode).encode('ascii')
+        else:
+            yield ("deleted mode %o\n" % old_mode).encode('ascii')
+    yield b"index " + shortid(old_sha) + b".." + shortid(new_sha)
+    if new_mode is not None:
+        yield (" %o" % new_mode).encode('ascii')
+    yield b"\n"
+
+
+# TODO(jelmer): Support writing unicode, rather than bytes.
+def write_blob_diff(f, old_file, new_file):
+    """Write blob diff.
+
+    :param f: File-like object to write to
+    :param old_file: (path, mode, hexsha) tuple (None if nonexisting)
+    :param new_file: (path, mode, hexsha) tuple (None if nonexisting)
+
+    :note: The use of write_object_diff is recommended over this function.
+    """
+    (old_path, old_mode, old_blob) = old_file
+    (new_path, new_mode, new_blob) = new_file
+    old_path = patch_filename(old_path, b"a")
+    new_path = patch_filename(new_path, b"b")
+
+    def lines(blob):
+        if blob is not None:
+            return blob.splitlines()
+        else:
+            return []
+    f.writelines(gen_diff_header(
+        (old_path, new_path), (old_mode, new_mode),
+        (getattr(old_blob, "id", None), getattr(new_blob, "id", None))))
+    old_contents = lines(old_blob)
+    new_contents = lines(new_blob)
+    f.writelines(unified_diff(old_contents, new_contents,
+                 old_path, new_path))
+
+
+def write_tree_diff(f, store, old_tree, new_tree, diff_binary=False):
+    """Write tree diff.
+
+    :param f: File-like object to write to.
+    :param old_tree: Old tree id
+    :param new_tree: New tree id
+    :param diff_binary: Whether to diff files even if they
+        are considered binary files by is_binary().
+    """
+    changes = store.tree_changes(old_tree, new_tree)
+    for (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) in changes:
+        write_object_diff(f, store, (oldpath, oldmode, oldsha),
+                          (newpath, newmode, newsha), diff_binary=diff_binary)
+
+
+def git_am_patch_split(f, encoding=None):
+    """Parse a git-am-style patch and split it up into bits.
+
+    :param f: File-like object to parse
+    :param encoding: Encoding to use when creating Git objects
+    :return: Tuple with commit object, diff contents and git version
+    """
+    encoding = encoding or getattr(f, "encoding", "ascii")
+    contents = f.read()
+    if (isinstance(contents, bytes) and
+            getattr(email.parser, "BytesParser", None)):
+        parser = email.parser.BytesParser()
+        msg = parser.parsebytes(contents)
+    else:
+        parser = email.parser.Parser()
+        msg = parser.parsestr(contents)
+    return parse_patch_message(msg, encoding)
+
+
+def parse_patch_message(msg, encoding=None):
+    """Extract a Commit object and patch from an e-mail message.
+
+    :param msg: An email message (email.message.Message)
+    :param encoding: Encoding to use to encode Git commits
+    :return: Tuple with commit object, diff contents and git version
+    """
+    c = Commit()
+    c.author = msg["from"].encode(encoding)
+    c.committer = msg["from"].encode(encoding)
+    try:
+        patch_tag_start = msg["subject"].index("[PATCH")
+    except ValueError:
+        subject = msg["subject"]
+    else:
+        close = msg["subject"].index("] ", patch_tag_start)
+        subject = msg["subject"][close+2:]
+    c.message = (subject.replace("\n", "") + "\n").encode(encoding)
+    first = True
+
+    body = msg.get_payload(decode=True)
+    lines = body.splitlines(True)
+    line_iter = iter(lines)
+
+    for l in line_iter:
+        if l == b"---\n":
+            break
+        if first:
+            if l.startswith(b"From: "):
+                c.author = l[len(b"From: "):].rstrip()
+            else:
+                c.message += b"\n" + l
+            first = False
+        else:
+            c.message += l
+    diff = b""
+    for l in line_iter:
+        if l == b"-- \n":
+            break
+        diff += l
+    try:
+        version = next(line_iter).rstrip(b"\n")
+    except StopIteration:
+        version = None
+    return c, diff, version

+ 1141 - 0
dulwich/porcelain.py

@@ -0,0 +1,1141 @@
+# porcelain.py -- Porcelain-like layer on top of Dulwich
+# Copyright (C) 2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Simple wrapper that provides porcelain-like functions on top of Dulwich.
+
+Currently implemented:
+ * archive
+ * add
+ * branch{_create,_delete,_list}
+ * check-ignore
+ * clone
+ * commit
+ * commit-tree
+ * daemon
+ * diff-tree
+ * fetch
+ * init
+ * ls-remote
+ * ls-tree
+ * pull
+ * push
+ * rm
+ * remote{_add}
+ * receive-pack
+ * reset
+ * rev-list
+ * tag{_create,_delete,_list}
+ * upload-pack
+ * update-server-info
+ * status
+ * symbolic-ref
+
+These functions are meant to behave similarly to the git subcommands.
+Differences in behaviour are considered bugs.
+"""
+
+from collections import namedtuple
+from contextlib import (
+    closing,
+    contextmanager,
+)
+from io import BytesIO
+import os
+import posixpath
+import stat
+import sys
+import time
+
+from dulwich.archive import (
+    tar_stream,
+    )
+from dulwich.client import (
+    get_transport_and_path,
+    )
+from dulwich.diff_tree import (
+    CHANGE_ADD,
+    CHANGE_DELETE,
+    CHANGE_MODIFY,
+    CHANGE_RENAME,
+    CHANGE_COPY,
+    RENAME_CHANGE_TYPES,
+    )
+from dulwich.errors import (
+    SendPackError,
+    UpdateRefsError,
+    )
+from dulwich.ignore import IgnoreFilterManager
+from dulwich.index import (
+    blob_from_path_and_stat,
+    get_unstaged_changes,
+    )
+from dulwich.object_store import (
+    tree_lookup_path,
+    )
+from dulwich.objects import (
+    Commit,
+    Tag,
+    format_timezone,
+    parse_timezone,
+    pretty_format_tree_entry,
+    )
+from dulwich.objectspec import (
+    parse_object,
+    parse_reftuples,
+    parse_tree,
+    )
+from dulwich.pack import (
+    write_pack_index,
+    write_pack_objects,
+    )
+from dulwich.patch import write_tree_diff
+from dulwich.protocol import (
+    Protocol,
+    ZERO_SHA,
+    )
+from dulwich.refs import ANNOTATED_TAG_SUFFIX
+from dulwich.repo import (BaseRepo, Repo)
+from dulwich.server import (
+    FileSystemBackend,
+    TCPGitServer,
+    ReceivePackHandler,
+    UploadPackHandler,
+    update_server_info as server_update_server_info,
+    )
+
+
+# Module level tuple definition for status output
+GitStatus = namedtuple('GitStatus', 'staged unstaged untracked')
+
+
+default_bytes_out_stream = getattr(sys.stdout, 'buffer', sys.stdout)
+default_bytes_err_stream = getattr(sys.stderr, 'buffer', sys.stderr)
+
+
+DEFAULT_ENCODING = 'utf-8'
+
+
+class RemoteExists(Exception):
+    """Raised when the remote already exists."""
+
+
+def open_repo(path_or_repo):
+    """Open an argument that can be a repository or a path for a repository."""
+    if isinstance(path_or_repo, BaseRepo):
+        return path_or_repo
+    return Repo(path_or_repo)
+
+
+@contextmanager
+def _noop_context_manager(obj):
+    """Context manager that has the same api as closing but does nothing."""
+    yield obj
+
+
+def open_repo_closing(path_or_repo):
+    """Open an argument that can be a repository or a path for a repository.
+    returns a context manager that will close the repo on exit if the argument
+    is a path, else does nothing if the argument is a repo.
+    """
+    if isinstance(path_or_repo, BaseRepo):
+        return _noop_context_manager(path_or_repo)
+    return closing(Repo(path_or_repo))
+
+
+def path_to_tree_path(repopath, path):
+    """Convert a path to a path usable in e.g. an index.
+
+    :param repo: Repository
+    :param path: A path
+    :return: A path formatted for use in e.g. an index
+    """
+    os.path.relpath(path, repopath)
+    if os.path.sep != '/':
+        path = path.replace(os.path.sep, '/')
+    return path.encode(sys.getfilesystemencoding())
+
+
+def archive(repo, committish=None, outstream=default_bytes_out_stream,
+            errstream=default_bytes_err_stream):
+    """Create an archive.
+
+    :param repo: Path of repository for which to generate an archive.
+    :param committish: Commit SHA1 or ref to use
+    :param outstream: Output stream (defaults to stdout)
+    :param errstream: Error stream (defaults to stderr)
+    """
+
+    if committish is None:
+        committish = "HEAD"
+    with open_repo_closing(repo) as repo_obj:
+        c = repo_obj[committish]
+        for chunk in tar_stream(
+                repo_obj.object_store, repo_obj.object_store[c.tree],
+                c.commit_time):
+            outstream.write(chunk)
+
+
+def update_server_info(repo="."):
+    """Update server info files for a repository.
+
+    :param repo: path to the repository
+    """
+    with open_repo_closing(repo) as r:
+        server_update_server_info(r)
+
+
+def symbolic_ref(repo, ref_name, force=False):
+    """Set git symbolic ref into HEAD.
+
+    :param repo: path to the repository
+    :param ref_name: short name of the new ref
+    :param force: force settings without checking if it exists in refs/heads
+    """
+    with open_repo_closing(repo) as repo_obj:
+        ref_path = b'refs/heads/' + ref_name
+        if not force and ref_path not in repo_obj.refs.keys():
+            raise ValueError('fatal: ref `%s` is not a ref' % ref_name)
+        repo_obj.refs.set_symbolic_ref(b'HEAD', ref_path)
+
+
+def commit(repo=".", message=None, author=None, committer=None):
+    """Create a new commit.
+
+    :param repo: Path to repository
+    :param message: Optional commit message
+    :param author: Optional author name and email
+    :param committer: Optional committer name and email
+    :return: SHA1 of the new commit
+    """
+    # FIXME: Support --all argument
+    # FIXME: Support --signoff argument
+    with open_repo_closing(repo) as r:
+        return r.do_commit(message=message, author=author, committer=committer)
+
+
+def commit_tree(repo, tree, message=None, author=None, committer=None):
+    """Create a new commit object.
+
+    :param repo: Path to repository
+    :param tree: An existing tree object
+    :param author: Optional author name and email
+    :param committer: Optional committer name and email
+    """
+    with open_repo_closing(repo) as r:
+        return r.do_commit(
+            message=message, tree=tree, committer=committer, author=author)
+
+
+def init(path=".", bare=False):
+    """Create a new git repository.
+
+    :param path: Path to repository.
+    :param bare: Whether to create a bare repository.
+    :return: A Repo instance
+    """
+    if not os.path.exists(path):
+        os.mkdir(path)
+
+    if bare:
+        return Repo.init_bare(path)
+    else:
+        return Repo.init(path)
+
+
+def clone(source, target=None, bare=False, checkout=None,
+          errstream=default_bytes_err_stream, outstream=None,
+          origin=b"origin"):
+    """Clone a local or remote git repository.
+
+    :param source: Path or URL for source repository
+    :param target: Path to target repository (optional)
+    :param bare: Whether or not to create a bare repository
+    :param checkout: Whether or not to check-out HEAD after cloning
+    :param errstream: Optional stream to write progress to
+    :param outstream: Optional stream to write progress to (deprecated)
+    :return: The new repository
+    """
+    if outstream is not None:
+        import warnings
+        warnings.warn(
+            "outstream= has been deprecated in favour of errstream=.",
+            DeprecationWarning, stacklevel=3)
+        errstream = outstream
+
+    if checkout is None:
+        checkout = (not bare)
+    if checkout and bare:
+        raise ValueError("checkout and bare are incompatible")
+    client, host_path = get_transport_and_path(source)
+
+    if target is None:
+        target = host_path.split("/")[-1]
+
+    if not os.path.exists(target):
+        os.mkdir(target)
+
+    if bare:
+        r = Repo.init_bare(target)
+    else:
+        r = Repo.init(target)
+    try:
+        remote_refs = client.fetch(
+            host_path, r, determine_wants=r.object_store.determine_wants_all,
+            progress=errstream.write)
+        r.refs.import_refs(
+            b'refs/remotes/' + origin,
+            {n[len(b'refs/heads/'):]: v for (n, v) in remote_refs.items()
+                if n.startswith(b'refs/heads/')})
+        r.refs.import_refs(
+            b'refs/tags',
+            {n[len(b'refs/tags/'):]: v for (n, v) in remote_refs.items()
+                if n.startswith(b'refs/tags/') and
+                not n.endswith(ANNOTATED_TAG_SUFFIX)})
+        if b"HEAD" in remote_refs and not bare:
+            # TODO(jelmer): Support symref capability,
+            # https://github.com/jelmer/dulwich/issues/485
+            r[b"HEAD"] = remote_refs[b"HEAD"]
+        target_config = r.get_config()
+        if not isinstance(source, bytes):
+            source = source.encode(DEFAULT_ENCODING)
+        target_config.set((b'remote', b'origin'), b'url', source)
+        target_config.set(
+            (b'remote', b'origin'), b'fetch',
+            b'+refs/heads/*:refs/remotes/origin/*')
+        target_config.write_to_path()
+        if checkout and b"HEAD" in r.refs:
+            errstream.write(b'Checking out HEAD\n')
+            r.reset_index()
+    except:
+        r.close()
+        raise
+
+    return r
+
+
+def add(repo=".", paths=None):
+    """Add files to the staging area.
+
+    :param repo: Repository for the files
+    :param paths: Paths to add.  No value passed stages all modified files.
+    :return: Tuple with set of added files and ignored files
+    """
+    ignored = set()
+    with open_repo_closing(repo) as r:
+        ignore_manager = IgnoreFilterManager.from_repo(r)
+        if not paths:
+            paths = list(
+                get_untracked_paths(os.getcwd(), r.path, r.open_index()))
+        relpaths = []
+        if not isinstance(paths, list):
+            paths = [paths]
+        for p in paths:
+            relpath = os.path.relpath(p, r.path)
+            # FIXME: Support patterns, directories.
+            if ignore_manager.is_ignored(relpath):
+                ignored.add(relpath)
+                continue
+            relpaths.append(relpath)
+        r.stage(relpaths)
+    return (relpaths, ignored)
+
+
+def remove(repo=".", paths=None, cached=False):
+    """Remove files from the staging area.
+
+    :param repo: Repository for the files
+    :param paths: Paths to remove
+    """
+    with open_repo_closing(repo) as r:
+        index = r.open_index()
+        for p in paths:
+            full_path = os.path.abspath(p).encode(sys.getfilesystemencoding())
+            tree_path = path_to_tree_path(r.path, p)
+            try:
+                index_sha = index[tree_path].sha
+            except KeyError:
+                raise Exception('%s did not match any files' % p)
+
+            if not cached:
+                try:
+                    st = os.lstat(full_path)
+                except OSError:
+                    pass
+                else:
+                    try:
+                        blob = blob_from_path_and_stat(full_path, st)
+                    except IOError:
+                        pass
+                    else:
+                        try:
+                            committed_sha = tree_lookup_path(
+                                r.__getitem__, r[r.head()].tree, tree_path)[1]
+                        except KeyError:
+                            committed_sha = None
+
+                        if blob.id != index_sha and index_sha != committed_sha:
+                            raise Exception(
+                                'file has staged content differing '
+                                'from both the file and head: %s' % p)
+
+                        if index_sha != committed_sha:
+                            raise Exception(
+                                'file has staged changes: %s' % p)
+                        os.remove(full_path)
+            del index[tree_path]
+        index.write()
+
+
+rm = remove
+
+
+def commit_decode(commit, contents, default_encoding=DEFAULT_ENCODING):
+    if commit.encoding is not None:
+        return contents.decode(commit.encoding, "replace")
+    return contents.decode(default_encoding, "replace")
+
+
+def print_commit(commit, decode, outstream=sys.stdout):
+    """Write a human-readable commit log entry.
+
+    :param commit: A `Commit` object
+    :param outstream: A stream file to write to
+    """
+    outstream.write("-" * 50 + "\n")
+    outstream.write("commit: " + commit.id.decode('ascii') + "\n")
+    if len(commit.parents) > 1:
+        outstream.write(
+            "merge: " +
+            "...".join([c.decode('ascii') for c in commit.parents[1:]]) + "\n")
+    outstream.write("Author: " + decode(commit.author) + "\n")
+    if commit.author != commit.committer:
+        outstream.write("Committer: " + decode(commit.committer) + "\n")
+
+    time_tuple = time.gmtime(commit.author_time + commit.author_timezone)
+    time_str = time.strftime("%a %b %d %Y %H:%M:%S", time_tuple)
+    timezone_str = format_timezone(commit.author_timezone).decode('ascii')
+    outstream.write("Date:   " + time_str + " " + timezone_str + "\n")
+    outstream.write("\n")
+    outstream.write(decode(commit.message) + "\n")
+    outstream.write("\n")
+
+
+def print_tag(tag, decode, outstream=sys.stdout):
+    """Write a human-readable tag.
+
+    :param tag: A `Tag` object
+    :param decode: Function for decoding bytes to unicode string
+    :param outstream: A stream to write to
+    """
+    outstream.write("Tagger: " + decode(tag.tagger) + "\n")
+    outstream.write("Date:   " + decode(tag.tag_time) + "\n")
+    outstream.write("\n")
+    outstream.write(decode(tag.message) + "\n")
+    outstream.write("\n")
+
+
+def show_blob(repo, blob, decode, outstream=sys.stdout):
+    """Write a blob to a stream.
+
+    :param repo: A `Repo` object
+    :param blob: A `Blob` object
+    :param decode: Function for decoding bytes to unicode string
+    :param outstream: A stream file to write to
+    """
+    outstream.write(decode(blob.data))
+
+
+def show_commit(repo, commit, decode, outstream=sys.stdout):
+    """Show a commit to a stream.
+
+    :param repo: A `Repo` object
+    :param commit: A `Commit` object
+    :param decode: Function for decoding bytes to unicode string
+    :param outstream: Stream to write to
+    """
+    print_commit(commit, decode=decode, outstream=outstream)
+    if commit.parents:
+        parent_commit = repo[commit.parents[0]]
+        base_tree = parent_commit.tree
+    else:
+        base_tree = None
+    diffstream = BytesIO()
+    write_tree_diff(
+        diffstream,
+        repo.object_store, base_tree, commit.tree)
+    diffstream.seek(0)
+    outstream.write(
+        diffstream.getvalue().decode(
+                commit.encoding or DEFAULT_ENCODING, 'replace'))
+
+
+def show_tree(repo, tree, decode, outstream=sys.stdout):
+    """Print a tree to a stream.
+
+    :param repo: A `Repo` object
+    :param tree: A `Tree` object
+    :param decode: Function for decoding bytes to unicode string
+    :param outstream: Stream to write to
+    """
+    for n in tree:
+        outstream.write(decode(n) + "\n")
+
+
+def show_tag(repo, tag, decode, outstream=sys.stdout):
+    """Print a tag to a stream.
+
+    :param repo: A `Repo` object
+    :param tag: A `Tag` object
+    :param decode: Function for decoding bytes to unicode string
+    :param outstream: Stream to write to
+    """
+    print_tag(tag, decode, outstream)
+    show_object(repo, repo[tag.object[1]], outstream)
+
+
+def show_object(repo, obj, decode, outstream):
+    return {
+        b"tree": show_tree,
+        b"blob": show_blob,
+        b"commit": show_commit,
+        b"tag": show_tag,
+            }[obj.type_name](repo, obj, decode, outstream)
+
+
+def print_name_status(changes):
+    """Print a simple status summary, listing changed files.
+    """
+    for change in changes:
+        if not change:
+            continue
+        if isinstance(change, list):
+            change = change[0]
+        if change.type == CHANGE_ADD:
+            path1 = change.new.path
+            path2 = ''
+            kind = 'A'
+        elif change.type == CHANGE_DELETE:
+            path1 = change.old.path
+            path2 = ''
+            kind = 'D'
+        elif change.type == CHANGE_MODIFY:
+            path1 = change.new.path
+            path2 = ''
+            kind = 'M'
+        elif change.type in RENAME_CHANGE_TYPES:
+            path1 = change.old.path
+            path2 = change.new.path
+            if change.type == CHANGE_RENAME:
+                kind = 'R'
+            elif change.type == CHANGE_COPY:
+                kind = 'C'
+        yield '%-8s%-20s%-20s' % (kind, path1, path2)
+
+
+def log(repo=".", paths=None, outstream=sys.stdout, max_entries=None,
+        reverse=False, name_status=False):
+    """Write commit logs.
+
+    :param repo: Path to repository
+    :param paths: Optional set of specific paths to print entries for
+    :param outstream: Stream to write log output to
+    :param reverse: Reverse order in which entries are printed
+    :param name_status: Print name status
+    :param max_entries: Optional maximum number of entries to display
+    """
+    with open_repo_closing(repo) as r:
+        walker = r.get_walker(
+            max_entries=max_entries, paths=paths, reverse=reverse)
+        for entry in walker:
+            def decode(x):
+                return commit_decode(entry.commit, x)
+            print_commit(entry.commit, decode, outstream)
+            if name_status:
+                outstream.writelines(
+                    [l+'\n' for l in print_name_status(entry.changes())])
+
+
+# TODO(jelmer): better default for encoding?
+def show(repo=".", objects=None, outstream=sys.stdout,
+         default_encoding=DEFAULT_ENCODING):
+    """Print the changes in a commit.
+
+    :param repo: Path to repository
+    :param objects: Objects to show (defaults to [HEAD])
+    :param outstream: Stream to write to
+    :param default_encoding: Default encoding to use if none is set in the
+        commit
+    """
+    if objects is None:
+        objects = ["HEAD"]
+    if not isinstance(objects, list):
+        objects = [objects]
+    with open_repo_closing(repo) as r:
+        for objectish in objects:
+            o = parse_object(r, objectish)
+            if isinstance(o, Commit):
+                def decode(x):
+                    return commit_decode(o, x, default_encoding)
+            else:
+                def decode(x):
+                    return x.decode(default_encoding)
+            show_object(r, o, decode, outstream)
+
+
+def diff_tree(repo, old_tree, new_tree, outstream=sys.stdout):
+    """Compares the content and mode of blobs found via two tree objects.
+
+    :param repo: Path to repository
+    :param old_tree: Id of old tree
+    :param new_tree: Id of new tree
+    :param outstream: Stream to write to
+    """
+    with open_repo_closing(repo) as r:
+        write_tree_diff(outstream, r.object_store, old_tree, new_tree)
+
+
+def rev_list(repo, commits, outstream=sys.stdout):
+    """Lists commit objects in reverse chronological order.
+
+    :param repo: Path to repository
+    :param commits: Commits over which to iterate
+    :param outstream: Stream to write to
+    """
+    with open_repo_closing(repo) as r:
+        for entry in r.get_walker(include=[r[c].id for c in commits]):
+            outstream.write(entry.commit.id + b"\n")
+
+
+def tag(*args, **kwargs):
+    import warnings
+    warnings.warn("tag has been deprecated in favour of tag_create.",
+                  DeprecationWarning)
+    return tag_create(*args, **kwargs)
+
+
+def tag_create(
+        repo, tag, author=None, message=None, annotated=False,
+        objectish="HEAD", tag_time=None, tag_timezone=None):
+    """Creates a tag in git via dulwich calls:
+
+    :param repo: Path to repository
+    :param tag: tag string
+    :param author: tag author (optional, if annotated is set)
+    :param message: tag message (optional)
+    :param annotated: whether to create an annotated tag
+    :param objectish: object the tag should point at, defaults to HEAD
+    :param tag_time: Optional time for annotated tag
+    :param tag_timezone: Optional timezone for annotated tag
+    """
+
+    with open_repo_closing(repo) as r:
+        object = parse_object(r, objectish)
+
+        if annotated:
+            # Create the tag object
+            tag_obj = Tag()
+            if author is None:
+                # TODO(jelmer): Don't use repo private method.
+                author = r._get_user_identity()
+            tag_obj.tagger = author
+            tag_obj.message = message
+            tag_obj.name = tag
+            tag_obj.object = (type(object), object.id)
+            if tag_time is None:
+                tag_time = int(time.time())
+            tag_obj.tag_time = tag_time
+            if tag_timezone is None:
+                # TODO(jelmer) Use current user timezone rather than UTC
+                tag_timezone = 0
+            elif isinstance(tag_timezone, str):
+                tag_timezone = parse_timezone(tag_timezone)
+            tag_obj.tag_timezone = tag_timezone
+            r.object_store.add_object(tag_obj)
+            tag_id = tag_obj.id
+        else:
+            tag_id = object.id
+
+        r.refs[b'refs/tags/' + tag] = tag_id
+
+
+def list_tags(*args, **kwargs):
+    import warnings
+    warnings.warn("list_tags has been deprecated in favour of tag_list.",
+                  DeprecationWarning)
+    return tag_list(*args, **kwargs)
+
+
+def tag_list(repo, outstream=sys.stdout):
+    """List all tags.
+
+    :param repo: Path to repository
+    :param outstream: Stream to write tags to
+    """
+    with open_repo_closing(repo) as r:
+        tags = sorted(r.refs.as_dict(b"refs/tags"))
+        return tags
+
+
+def tag_delete(repo, name):
+    """Remove a tag.
+
+    :param repo: Path to repository
+    :param name: Name of tag to remove
+    """
+    with open_repo_closing(repo) as r:
+        if isinstance(name, bytes):
+            names = [name]
+        elif isinstance(name, list):
+            names = name
+        else:
+            raise TypeError("Unexpected tag name type %r" % name)
+        for name in names:
+            del r.refs[b"refs/tags/" + name]
+
+
+def reset(repo, mode, treeish="HEAD"):
+    """Reset current HEAD to the specified state.
+
+    :param repo: Path to repository
+    :param mode: Mode ("hard", "soft", "mixed")
+    :param treeish: Treeish to reset to
+    """
+
+    if mode != "hard":
+        raise ValueError("hard is the only mode currently supported")
+
+    with open_repo_closing(repo) as r:
+        tree = parse_tree(r, treeish)
+        r.reset_index(tree.id)
+
+
+def push(repo, remote_location, refspecs,
+         outstream=default_bytes_out_stream,
+         errstream=default_bytes_err_stream):
+    """Remote push with dulwich via dulwich.client
+
+    :param repo: Path to repository
+    :param remote_location: Location of the remote
+    :param refspecs: Refs to push to remote
+    :param outstream: A stream file to write output
+    :param errstream: A stream file to write errors
+    """
+
+    # Open the repo
+    with open_repo_closing(repo) as r:
+
+        # Get the client and path
+        client, path = get_transport_and_path(remote_location)
+
+        selected_refs = []
+
+        def update_refs(refs):
+            selected_refs.extend(parse_reftuples(r.refs, refs, refspecs))
+            new_refs = {}
+            # TODO: Handle selected_refs == {None: None}
+            for (lh, rh, force) in selected_refs:
+                if lh is None:
+                    new_refs[rh] = ZERO_SHA
+                else:
+                    new_refs[rh] = r.refs[lh]
+            return new_refs
+
+        err_encoding = getattr(errstream, 'encoding', None) or DEFAULT_ENCODING
+        remote_location_bytes = client.get_url(path).encode(err_encoding)
+        try:
+            client.send_pack(
+                path, update_refs, r.object_store.generate_pack_contents,
+                progress=errstream.write)
+            errstream.write(
+                b"Push to " + remote_location_bytes + b" successful.\n")
+        except (UpdateRefsError, SendPackError) as e:
+            errstream.write(b"Push to " + remote_location_bytes +
+                            b" failed -> " + e.message.encode(err_encoding) +
+                            b"\n")
+
+
+def pull(repo, remote_location=None, refspecs=None,
+         outstream=default_bytes_out_stream,
+         errstream=default_bytes_err_stream):
+    """Pull from remote via dulwich.client
+
+    :param repo: Path to repository
+    :param remote_location: Location of the remote
+    :param refspec: refspecs to fetch
+    :param outstream: A stream file to write to output
+    :param errstream: A stream file to write to errors
+    """
+    # Open the repo
+    with open_repo_closing(repo) as r:
+        if remote_location is None:
+            # TODO(jelmer): Lookup 'remote' for current branch in config
+            raise NotImplementedError(
+                "looking up remote from branch config not supported yet")
+        if refspecs is None:
+            refspecs = [b"HEAD"]
+        selected_refs = []
+
+        def determine_wants(remote_refs):
+            selected_refs.extend(
+                parse_reftuples(remote_refs, r.refs, refspecs))
+            return [remote_refs[lh] for (lh, rh, force) in selected_refs]
+        client, path = get_transport_and_path(remote_location)
+        remote_refs = client.fetch(
+            path, r, progress=errstream.write, determine_wants=determine_wants)
+        for (lh, rh, force) in selected_refs:
+            r.refs[rh] = remote_refs[lh]
+        if selected_refs:
+            r[b'HEAD'] = remote_refs[selected_refs[0][1]]
+
+        # Perform 'git checkout .' - syncs staged changes
+        tree = r[b"HEAD"].tree
+        r.reset_index(tree=tree)
+
+
+def status(repo=".", ignored=False):
+    """Returns staged, unstaged, and untracked changes relative to the HEAD.
+
+    :param repo: Path to repository or repository object
+    :param ignored: Whether to include ignoed files in `untracked`
+    :return: GitStatus tuple,
+        staged -    list of staged paths (diff index/HEAD)
+        unstaged -  list of unstaged paths (diff index/working-tree)
+        untracked - list of untracked, un-ignored & non-.git paths
+    """
+    with open_repo_closing(repo) as r:
+        # 1. Get status of staged
+        tracked_changes = get_tree_changes(r)
+        # 2. Get status of unstaged
+        index = r.open_index()
+        unstaged_changes = list(get_unstaged_changes(index, r.path))
+        ignore_manager = IgnoreFilterManager.from_repo(r)
+        untracked_paths = get_untracked_paths(r.path, r.path, index)
+        if ignored:
+            untracked_changes = list(untracked_paths)
+        else:
+            untracked_changes = [
+                    p for p in untracked_paths
+                    if not ignore_manager.is_ignored(p)]
+        return GitStatus(tracked_changes, unstaged_changes, untracked_changes)
+
+
+def get_untracked_paths(frompath, basepath, index):
+    """Get untracked paths.
+
+    ;param frompath: Path to walk
+    :param basepath: Path to compare to
+    :param index: Index to check against
+    """
+    # If nothing is specified, add all non-ignored files.
+    for dirpath, dirnames, filenames in os.walk(frompath):
+        # Skip .git and below.
+        if '.git' in dirnames:
+            dirnames.remove('.git')
+            if dirpath != basepath:
+                continue
+        if '.git' in filenames:
+            filenames.remove('.git')
+            if dirpath != basepath:
+                continue
+        for filename in filenames:
+            ap = os.path.join(dirpath, filename)
+            ip = path_to_tree_path(basepath, ap)
+            if ip not in index:
+                yield os.path.relpath(ap, frompath)
+
+
+def get_tree_changes(repo):
+    """Return add/delete/modify changes to tree by comparing index to HEAD.
+
+    :param repo: repo path or object
+    :return: dict with lists for each type of change
+    """
+    with open_repo_closing(repo) as r:
+        index = r.open_index()
+
+        # Compares the Index to the HEAD & determines changes
+        # Iterate through the changes and report add/delete/modify
+        # TODO: call out to dulwich.diff_tree somehow.
+        tracked_changes = {
+            'add': [],
+            'delete': [],
+            'modify': [],
+        }
+        try:
+            tree_id = r[b'HEAD'].tree
+        except KeyError:
+            tree_id = None
+
+        for change in index.changes_from_tree(r.object_store, tree_id):
+            if not change[0][0]:
+                tracked_changes['add'].append(change[0][1])
+            elif not change[0][1]:
+                tracked_changes['delete'].append(change[0][0])
+            elif change[0][0] == change[0][1]:
+                tracked_changes['modify'].append(change[0][0])
+            else:
+                raise AssertionError('git mv ops not yet supported')
+        return tracked_changes
+
+
+def daemon(path=".", address=None, port=None):
+    """Run a daemon serving Git requests over TCP/IP.
+
+    :param path: Path to the directory to serve.
+    :param address: Optional address to listen on (defaults to ::)
+    :param port: Optional port to listen on (defaults to TCP_GIT_PORT)
+    """
+    # TODO(jelmer): Support git-daemon-export-ok and --export-all.
+    backend = FileSystemBackend(path)
+    server = TCPGitServer(backend, address, port)
+    server.serve_forever()
+
+
+def web_daemon(path=".", address=None, port=None):
+    """Run a daemon serving Git requests over HTTP.
+
+    :param path: Path to the directory to serve
+    :param address: Optional address to listen on (defaults to ::)
+    :param port: Optional port to listen on (defaults to 80)
+    """
+    from dulwich.web import (
+        make_wsgi_chain,
+        make_server,
+        WSGIRequestHandlerLogger,
+        WSGIServerLogger)
+
+    backend = FileSystemBackend(path)
+    app = make_wsgi_chain(backend)
+    server = make_server(address, port, app,
+                         handler_class=WSGIRequestHandlerLogger,
+                         server_class=WSGIServerLogger)
+    server.serve_forever()
+
+
+def upload_pack(path=".", inf=None, outf=None):
+    """Upload a pack file after negotiating its contents using smart protocol.
+
+    :param path: Path to the repository
+    :param inf: Input stream to communicate with client
+    :param outf: Output stream to communicate with client
+    """
+    if outf is None:
+        outf = getattr(sys.stdout, 'buffer', sys.stdout)
+    if inf is None:
+        inf = getattr(sys.stdin, 'buffer', sys.stdin)
+    path = os.path.expanduser(path)
+    backend = FileSystemBackend(path)
+
+    def send_fn(data):
+        outf.write(data)
+        outf.flush()
+    proto = Protocol(inf.read, send_fn)
+    handler = UploadPackHandler(backend, [path], proto)
+    # FIXME: Catch exceptions and write a single-line summary to outf.
+    handler.handle()
+    return 0
+
+
+def receive_pack(path=".", inf=None, outf=None):
+    """Receive a pack file after negotiating its contents using smart protocol.
+
+    :param path: Path to the repository
+    :param inf: Input stream to communicate with client
+    :param outf: Output stream to communicate with client
+    """
+    if outf is None:
+        outf = getattr(sys.stdout, 'buffer', sys.stdout)
+    if inf is None:
+        inf = getattr(sys.stdin, 'buffer', sys.stdin)
+    path = os.path.expanduser(path)
+    backend = FileSystemBackend(path)
+
+    def send_fn(data):
+        outf.write(data)
+        outf.flush()
+    proto = Protocol(inf.read, send_fn)
+    handler = ReceivePackHandler(backend, [path], proto)
+    # FIXME: Catch exceptions and write a single-line summary to outf.
+    handler.handle()
+    return 0
+
+
+def branch_delete(repo, name):
+    """Delete a branch.
+
+    :param repo: Path to the repository
+    :param name: Name of the branch
+    """
+    with open_repo_closing(repo) as r:
+        if isinstance(name, bytes):
+            names = [name]
+        elif isinstance(name, list):
+            names = name
+        else:
+            raise TypeError("Unexpected branch name type %r" % name)
+        for name in names:
+            del r.refs[b"refs/heads/" + name]
+
+
+def branch_create(repo, name, objectish=None, force=False):
+    """Create a branch.
+
+    :param repo: Path to the repository
+    :param name: Name of the new branch
+    :param objectish: Target object to point new branch at (defaults to HEAD)
+    :param force: Force creation of branch, even if it already exists
+    """
+    with open_repo_closing(repo) as r:
+        if objectish is None:
+            objectish = "HEAD"
+        object = parse_object(r, objectish)
+        refname = b"refs/heads/" + name
+        if refname in r.refs and not force:
+            raise KeyError("Branch with name %s already exists." % name)
+        r.refs[refname] = object.id
+
+
+def branch_list(repo):
+    """List all branches.
+
+    :param repo: Path to the repository
+    """
+    with open_repo_closing(repo) as r:
+        return r.refs.keys(base=b"refs/heads/")
+
+
+def fetch(repo, remote_location, outstream=sys.stdout,
+          errstream=default_bytes_err_stream):
+    """Fetch objects from a remote server.
+
+    :param repo: Path to the repository
+    :param remote_location: String identifying a remote server
+    :param outstream: Output stream (defaults to stdout)
+    :param errstream: Error stream (defaults to stderr)
+    :return: Dictionary with refs on the remote
+    """
+    with open_repo_closing(repo) as r:
+        client, path = get_transport_and_path(remote_location)
+        remote_refs = client.fetch(path, r, progress=errstream.write)
+    return remote_refs
+
+
+def ls_remote(remote):
+    """List the refs in a remote.
+
+    :param remote: Remote repository location
+    :return: Dictionary with remote refs
+    """
+    client, host_path = get_transport_and_path(remote)
+    return client.get_refs(host_path)
+
+
+def repack(repo):
+    """Repack loose files in a repository.
+
+    Currently this only packs loose objects.
+
+    :param repo: Path to the repository
+    """
+    with open_repo_closing(repo) as r:
+        r.object_store.pack_loose_objects()
+
+
+def pack_objects(repo, object_ids, packf, idxf, delta_window_size=None):
+    """Pack objects into a file.
+
+    :param repo: Path to the repository
+    :param object_ids: List of object ids to write
+    :param packf: File-like object to write to
+    :param idxf: File-like object to write to (can be None)
+    """
+    with open_repo_closing(repo) as r:
+        entries, data_sum = write_pack_objects(
+            packf,
+            r.object_store.iter_shas((oid, None) for oid in object_ids),
+            delta_window_size=delta_window_size)
+    if idxf is not None:
+        entries = sorted([(k, v[0], v[1]) for (k, v) in entries.items()])
+        write_pack_index(idxf, entries, data_sum)
+
+
+def ls_tree(repo, treeish=b"HEAD", outstream=sys.stdout, recursive=False,
+            name_only=False):
+    """List contents of a tree.
+
+    :param repo: Path to the repository
+    :param tree_ish: Tree id to list
+    :param outstream: Output stream (defaults to stdout)
+    :param recursive: Whether to recursively list files
+    :param name_only: Only print item name
+    """
+    def list_tree(store, treeid, base):
+        for (name, mode, sha) in store[treeid].iteritems():
+            if base:
+                name = posixpath.join(base, name)
+            if name_only:
+                outstream.write(name + b"\n")
+            else:
+                outstream.write(pretty_format_tree_entry(name, mode, sha))
+            if stat.S_ISDIR(mode):
+                list_tree(store, sha, name)
+    with open_repo_closing(repo) as r:
+        tree = parse_tree(r, treeish)
+        list_tree(r.object_store, tree.id, "")
+
+
+def remote_add(repo, name, url):
+    """Add a remote.
+
+    :param repo: Path to the repository
+    :param name: Remote name
+    :param url: Remote URL
+    """
+    if not isinstance(name, bytes):
+        name = name.encode(DEFAULT_ENCODING)
+    if not isinstance(url, bytes):
+        url = url.encode(DEFAULT_ENCODING)
+    with open_repo_closing(repo) as r:
+        c = r.get_config()
+        section = (b'remote', name)
+        if c.has_section(section):
+            raise RemoteExists(section)
+        c.set(section, b"url", url)
+        c.write_to_path()
+
+
+def check_ignore(repo, paths, no_index=False):
+    """Debug gitignore files.
+
+    :param repo: Path to the repository
+    :param paths: List of paths to check for
+    :param no_index: Don't check index
+    :return: List of ignored files
+    """
+    with open_repo_closing(repo) as r:
+        index = r.open_index()
+        ignore_manager = IgnoreFilterManager.from_repo(r)
+        for path in paths:
+            if os.path.isabs(path):
+                path = os.path.relpath(path, r.path)
+            if not no_index and path_to_tree_path(r.path, path) in index:
+                continue
+            if ignore_manager.is_ignored(path):
+                yield path

+ 505 - 0
dulwich/protocol.py

@@ -0,0 +1,505 @@
+# protocol.py -- Shared parts of the git protocols
+# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
+# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Generic functions for talking the git smart server protocol."""
+
+from io import BytesIO
+from os import (
+    SEEK_END,
+    )
+import socket
+
+import dulwich
+from dulwich.errors import (
+    HangupException,
+    GitProtocolError,
+    )
+
+TCP_GIT_PORT = 9418
+
+ZERO_SHA = b"0" * 40
+
+SINGLE_ACK = 0
+MULTI_ACK = 1
+MULTI_ACK_DETAILED = 2
+
+# pack data
+SIDE_BAND_CHANNEL_DATA = 1
+# progress messages
+SIDE_BAND_CHANNEL_PROGRESS = 2
+# fatal error message just before stream aborts
+SIDE_BAND_CHANNEL_FATAL = 3
+
+CAPABILITY_DELETE_REFS = b'delete-refs'
+CAPABILITY_INCLUDE_TAG = b'include-tag'
+CAPABILITY_MULTI_ACK = b'multi_ack'
+CAPABILITY_MULTI_ACK_DETAILED = b'multi_ack_detailed'
+CAPABILITY_NO_DONE = b'no-done'
+CAPABILITY_NO_PROGRESS = b'no-progress'
+CAPABILITY_OFS_DELTA = b'ofs-delta'
+CAPABILITY_QUIET = b'quiet'
+CAPABILITY_REPORT_STATUS = b'report-status'
+CAPABILITY_SHALLOW = b'shallow'
+CAPABILITY_SIDE_BAND_64K = b'side-band-64k'
+CAPABILITY_THIN_PACK = b'thin-pack'
+CAPABILITY_AGENT = b'agent'
+
+# Magic ref that is used to attach capabilities to when
+# there are no refs. Should always be ste to ZERO_SHA.
+CAPABILITIES_REF = b'capabilities^{}'
+
+
+def agent_string():
+    return ('dulwich/%d.%d.%d' % dulwich.__version__).encode('ascii')
+
+
+def capability_agent():
+    return CAPABILITY_AGENT + b'=' + agent_string()
+
+
+COMMAND_DEEPEN = b'deepen'
+COMMAND_SHALLOW = b'shallow'
+COMMAND_UNSHALLOW = b'unshallow'
+COMMAND_DONE = b'done'
+COMMAND_WANT = b'want'
+COMMAND_HAVE = b'have'
+
+
+class ProtocolFile(object):
+    """A dummy file for network ops that expect file-like objects."""
+
+    def __init__(self, read, write):
+        self.read = read
+        self.write = write
+
+    def tell(self):
+        pass
+
+    def close(self):
+        pass
+
+
+def pkt_line(data):
+    """Wrap data in a pkt-line.
+
+    :param data: The data to wrap, as a str or None.
+    :return: The data prefixed with its length in pkt-line format; if data was
+        None, returns the flush-pkt ('0000').
+    """
+    if data is None:
+        return b'0000'
+    return ('%04x' % (len(data) + 4)).encode('ascii') + data
+
+
+class Protocol(object):
+    """Class for interacting with a remote git process over the wire.
+
+    Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
+    consists of the length of the line as a 4-byte hex string, followed by the
+    payload data. The length includes the 4-byte header. The special line
+    '0000' indicates the end of a section of input and is called a 'flush-pkt'.
+
+    For details on the pkt-line format, see the cgit distribution:
+        Documentation/technical/protocol-common.txt
+    """
+
+    def __init__(self, read, write, close=None, report_activity=None):
+        self.read = read
+        self.write = write
+        self._close = close
+        self.report_activity = report_activity
+        self._readahead = None
+
+    def close(self):
+        if self._close:
+            self._close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def read_pkt_line(self):
+        """Reads a pkt-line from the remote git process.
+
+        This method may read from the readahead buffer; see unread_pkt_line.
+
+        :return: The next string from the stream, without the length prefix, or
+            None for a flush-pkt ('0000').
+        """
+        if self._readahead is None:
+            read = self.read
+        else:
+            read = self._readahead.read
+            self._readahead = None
+
+        try:
+            sizestr = read(4)
+            if not sizestr:
+                raise HangupException()
+            size = int(sizestr, 16)
+            if size == 0:
+                if self.report_activity:
+                    self.report_activity(4, 'read')
+                return None
+            if self.report_activity:
+                self.report_activity(size, 'read')
+            pkt_contents = read(size-4)
+        except socket.error as e:
+            raise GitProtocolError(e)
+        else:
+            if len(pkt_contents) + 4 != size:
+                raise GitProtocolError(
+                    'Length of pkt read %04x does not match length prefix %04x'
+                    % (len(pkt_contents) + 4, size))
+            return pkt_contents
+
+    def eof(self):
+        """Test whether the protocol stream has reached EOF.
+
+        Note that this refers to the actual stream EOF and not just a
+        flush-pkt.
+
+        :return: True if the stream is at EOF, False otherwise.
+        """
+        try:
+            next_line = self.read_pkt_line()
+        except HangupException:
+            return True
+        self.unread_pkt_line(next_line)
+        return False
+
+    def unread_pkt_line(self, data):
+        """Unread a single line of data into the readahead buffer.
+
+        This method can be used to unread a single pkt-line into a fixed
+        readahead buffer.
+
+        :param data: The data to unread, without the length prefix.
+        :raise ValueError: If more than one pkt-line is unread.
+        """
+        if self._readahead is not None:
+            raise ValueError('Attempted to unread multiple pkt-lines.')
+        self._readahead = BytesIO(pkt_line(data))
+
+    def read_pkt_seq(self):
+        """Read a sequence of pkt-lines from the remote git process.
+
+        :return: Yields each line of data up to but not including the next
+            flush-pkt.
+        """
+        pkt = self.read_pkt_line()
+        while pkt:
+            yield pkt
+            pkt = self.read_pkt_line()
+
+    def write_pkt_line(self, line):
+        """Sends a pkt-line to the remote git process.
+
+        :param line: A string containing the data to send, without the length
+            prefix.
+        """
+        try:
+            line = pkt_line(line)
+            self.write(line)
+            if self.report_activity:
+                self.report_activity(len(line), 'write')
+        except socket.error as e:
+            raise GitProtocolError(e)
+
+    def write_file(self):
+        """Return a writable file-like object for this protocol."""
+
+        class ProtocolFile(object):
+
+            def __init__(self, proto):
+                self._proto = proto
+                self._offset = 0
+
+            def write(self, data):
+                self._proto.write(data)
+                self._offset += len(data)
+
+            def tell(self):
+                return self._offset
+
+            def close(self):
+                pass
+
+        return ProtocolFile(self)
+
+    def write_sideband(self, channel, blob):
+        """Write multiplexed data to the sideband.
+
+        :param channel: An int specifying the channel to write to.
+        :param blob: A blob of data (as a string) to send on this channel.
+        """
+        # a pktline can be a max of 65520. a sideband line can therefore be
+        # 65520-5 = 65515
+        # WTF: Why have the len in ASCII, but the channel in binary.
+        while blob:
+            self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515])
+            blob = blob[65515:]
+
+    def send_cmd(self, cmd, *args):
+        """Send a command and some arguments to a git server.
+
+        Only used for the TCP git protocol (git://).
+
+        :param cmd: The remote service to access.
+        :param args: List of arguments to send to remove service.
+        """
+        self.write_pkt_line(cmd + b" " + b"".join([(a + b"\0") for a in args]))
+
+    def read_cmd(self):
+        """Read a command and some arguments from the git client
+
+        Only used for the TCP git protocol (git://).
+
+        :return: A tuple of (command, [list of arguments]).
+        """
+        line = self.read_pkt_line()
+        splice_at = line.find(b" ")
+        cmd, args = line[:splice_at], line[splice_at+1:]
+        assert args[-1:] == b"\x00"
+        return cmd, args[:-1].split(b"\0")
+
+
+_RBUFSIZE = 8192  # Default read buffer size.
+
+
+class ReceivableProtocol(Protocol):
+    """Variant of Protocol that allows reading up to a size without blocking.
+
+    This class has a recv() method that behaves like socket.recv() in addition
+    to a read() method.
+
+    If you want to read n bytes from the wire and block until exactly n bytes
+    (or EOF) are read, use read(n). If you want to read at most n bytes from
+    the wire but don't care if you get less, use recv(n). Note that recv(n)
+    will still block until at least one byte is read.
+    """
+
+    def __init__(self, recv, write, report_activity=None, rbufsize=_RBUFSIZE):
+        super(ReceivableProtocol, self).__init__(self.read, write,
+                                                 report_activity)
+        self._recv = recv
+        self._rbuf = BytesIO()
+        self._rbufsize = rbufsize
+
+    def read(self, size):
+        # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
+        # with the following modifications:
+        #  - omit the size <= 0 branch
+        #  - seek back to start rather than 0 in case some buffer has been
+        #    consumed.
+        #  - use SEEK_END instead of the magic number.
+        # Copyright (c) 2001-2010 Python Software Foundation; All Rights
+        # Reserved
+        # Licensed under the Python Software Foundation License.
+        # TODO: see if buffer is more efficient than cBytesIO.
+        assert size > 0
+
+        # Our use of BytesIO rather than lists of string objects returned by
+        # recv() minimizes memory usage and fragmentation that occurs when
+        # rbufsize is large compared to the typical return value of recv().
+        buf = self._rbuf
+        start = buf.tell()
+        buf.seek(0, SEEK_END)
+        # buffer may have been partially consumed by recv()
+        buf_len = buf.tell() - start
+        if buf_len >= size:
+            # Already have size bytes in our buffer?  Extract and return.
+            buf.seek(start)
+            rv = buf.read(size)
+            self._rbuf = BytesIO()
+            self._rbuf.write(buf.read())
+            self._rbuf.seek(0)
+            return rv
+
+        self._rbuf = BytesIO()  # reset _rbuf.  we consume it via buf.
+        while True:
+            left = size - buf_len
+            # recv() will malloc the amount of memory given as its
+            # parameter even though it often returns much less data
+            # than that.  The returned data string is short lived
+            # as we copy it into a BytesIO and free it.  This avoids
+            # fragmentation issues on many platforms.
+            data = self._recv(left)
+            if not data:
+                break
+            n = len(data)
+            if n == size and not buf_len:
+                # Shortcut.  Avoid buffer data copies when:
+                # - We have no data in our buffer.
+                # AND
+                # - Our call to recv returned exactly the
+                #   number of bytes we were asked to read.
+                return data
+            if n == left:
+                buf.write(data)
+                del data  # explicit free
+                break
+            assert n <= left, "_recv(%d) returned %d bytes" % (left, n)
+            buf.write(data)
+            buf_len += n
+            del data  # explicit free
+            # assert buf_len == buf.tell()
+        buf.seek(start)
+        return buf.read()
+
+    def recv(self, size):
+        assert size > 0
+
+        buf = self._rbuf
+        start = buf.tell()
+        buf.seek(0, SEEK_END)
+        buf_len = buf.tell()
+        buf.seek(start)
+
+        left = buf_len - start
+        if not left:
+            # only read from the wire if our read buffer is exhausted
+            data = self._recv(self._rbufsize)
+            if len(data) == size:
+                # shortcut: skip the buffer if we read exactly size bytes
+                return data
+            buf = BytesIO()
+            buf.write(data)
+            buf.seek(0)
+            del data  # explicit free
+            self._rbuf = buf
+        return buf.read(size)
+
+
+def extract_capabilities(text):
+    """Extract a capabilities list from a string, if present.
+
+    :param text: String to extract from
+    :return: Tuple with text with capabilities removed and list of capabilities
+    """
+    if b"\0" not in text:
+        return text, []
+    text, capabilities = text.rstrip().split(b"\0")
+    return (text, capabilities.strip().split(b" "))
+
+
+def extract_want_line_capabilities(text):
+    """Extract a capabilities list from a want line, if present.
+
+    Note that want lines have capabilities separated from the rest of the line
+    by a space instead of a null byte. Thus want lines have the form:
+
+        want obj-id cap1 cap2 ...
+
+    :param text: Want line to extract from
+    :return: Tuple with text with capabilities removed and list of capabilities
+    """
+    split_text = text.rstrip().split(b" ")
+    if len(split_text) < 3:
+        return text, []
+    return (b" ".join(split_text[:2]), split_text[2:])
+
+
+def ack_type(capabilities):
+    """Extract the ack type from a capabilities list."""
+    if b'multi_ack_detailed' in capabilities:
+        return MULTI_ACK_DETAILED
+    elif b'multi_ack' in capabilities:
+        return MULTI_ACK
+    return SINGLE_ACK
+
+
+class BufferedPktLineWriter(object):
+    """Writer that wraps its data in pkt-lines and has an independent buffer.
+
+    Consecutive calls to write() wrap the data in a pkt-line and then buffers
+    it until enough lines have been written such that their total length
+    (including length prefix) reach the buffer size.
+    """
+
+    def __init__(self, write, bufsize=65515):
+        """Initialize the BufferedPktLineWriter.
+
+        :param write: A write callback for the underlying writer.
+        :param bufsize: The internal buffer size, including length prefixes.
+        """
+        self._write = write
+        self._bufsize = bufsize
+        self._wbuf = BytesIO()
+        self._buflen = 0
+
+    def write(self, data):
+        """Write data, wrapping it in a pkt-line."""
+        line = pkt_line(data)
+        line_len = len(line)
+        over = self._buflen + line_len - self._bufsize
+        if over >= 0:
+            start = line_len - over
+            self._wbuf.write(line[:start])
+            self.flush()
+        else:
+            start = 0
+        saved = line[start:]
+        self._wbuf.write(saved)
+        self._buflen += len(saved)
+
+    def flush(self):
+        """Flush all data from the buffer."""
+        data = self._wbuf.getvalue()
+        if data:
+            self._write(data)
+        self._len = 0
+        self._wbuf = BytesIO()
+
+
+class PktLineParser(object):
+    """Packet line parser that hands completed packets off to a callback.
+    """
+
+    def __init__(self, handle_pkt):
+        self.handle_pkt = handle_pkt
+        self._readahead = BytesIO()
+
+    def parse(self, data):
+        """Parse a fragment of data and call back for any completed packets.
+        """
+        self._readahead.write(data)
+        buf = self._readahead.getvalue()
+        if len(buf) < 4:
+            return
+        while len(buf) >= 4:
+            size = int(buf[:4], 16)
+            if size == 0:
+                self.handle_pkt(None)
+                buf = buf[4:]
+            elif size <= len(buf):
+                self.handle_pkt(buf[4:size])
+                buf = buf[size:]
+            else:
+                break
+        self._readahead = BytesIO()
+        self._readahead.write(buf)
+
+    def get_tail(self):
+        """Read back any unused data."""
+        return self._readahead.getvalue()

+ 76 - 0
dulwich/reflog.py

@@ -0,0 +1,76 @@
+# reflog.py -- Parsing and writing reflog files
+# Copyright (C) 2015 Jelmer Vernooij and others.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Utilities for reading and generating reflogs.
+"""
+
+import collections
+
+from dulwich.objects import (
+    format_timezone,
+    parse_timezone,
+    ZERO_SHA,
+    )
+
+Entry = collections.namedtuple(
+    'Entry', ['old_sha', 'new_sha', 'committer', 'timestamp', 'timezone',
+              'message'])
+
+
+def format_reflog_line(old_sha, new_sha, committer, timestamp, timezone,
+                       message):
+    """Generate a single reflog line.
+
+    :param old_sha: Old Commit SHA
+    :param new_sha: New Commit SHA
+    :param committer: Committer name and e-mail
+    :param timestamp: Timestamp
+    :param timezone: Timezone
+    :param message: Message
+    """
+    if old_sha is None:
+        old_sha = ZERO_SHA
+    return (old_sha + b' ' + new_sha + b' ' + committer + b' ' +
+            str(timestamp).encode('ascii') + b' ' +
+            format_timezone(timezone) + b'\t' + message)
+
+
+def parse_reflog_line(line):
+    """Parse a reflog line.
+
+    :param line: Line to parse
+    :return: Tuple of (old_sha, new_sha, committer, timestamp, timezone,
+        message)
+    """
+    (begin, message) = line.split(b'\t', 1)
+    (old_sha, new_sha, rest) = begin.split(b' ', 2)
+    (committer, timestamp_str, timezone_str) = rest.rsplit(b' ', 2)
+    return Entry(old_sha, new_sha, committer, int(timestamp_str),
+                 parse_timezone(timezone_str)[0], message)
+
+
+def read_reflog(f):
+    """Read reflog.
+
+    :param f: File-like object
+    :returns: Iterator over Entry objects
+    """
+    for l in f:
+        yield parse_reflog_line(l)

+ 795 - 0
dulwich/refs.py

@@ -0,0 +1,795 @@
+# refs.py -- For dealing with git refs
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+
+"""Ref handling.
+
+"""
+import errno
+import os
+import sys
+
+from dulwich.errors import (
+    PackedRefsException,
+    RefFormatError,
+    )
+from dulwich.objects import (
+    git_line,
+    valid_hexsha,
+    ZERO_SHA,
+    )
+from dulwich.file import (
+    GitFile,
+    ensure_dir_exists,
+    )
+
+
+SYMREF = b'ref: '
+LOCAL_BRANCH_PREFIX = b'refs/heads/'
+BAD_REF_CHARS = set(b'\177 ~^:?*[')
+ANNOTATED_TAG_SUFFIX = b'^{}'
+
+
+def check_ref_format(refname):
+    """Check if a refname is correctly formatted.
+
+    Implements all the same rules as git-check-ref-format[1].
+
+    [1]
+    http://www.kernel.org/pub/software/scm/git/docs/git-check-ref-format.html
+
+    :param refname: The refname to check
+    :return: True if refname is valid, False otherwise
+    """
+    # These could be combined into one big expression, but are listed
+    # separately to parallel [1].
+    if b'/.' in refname or refname.startswith(b'.'):
+        return False
+    if b'/' not in refname:
+        return False
+    if b'..' in refname:
+        return False
+    for i, c in enumerate(refname):
+        if ord(refname[i:i+1]) < 0o40 or c in BAD_REF_CHARS:
+            return False
+    if refname[-1] in b'/.':
+        return False
+    if refname.endswith(b'.lock'):
+        return False
+    if b'@{' in refname:
+        return False
+    if b'\\' in refname:
+        return False
+    return True
+
+
+class RefsContainer(object):
+    """A container for refs."""
+
+    def set_symbolic_ref(self, name, other):
+        """Make a ref point at another ref.
+
+        :param name: Name of the ref to set
+        :param other: Name of the ref to point at
+        """
+        raise NotImplementedError(self.set_symbolic_ref)
+
+    def get_packed_refs(self):
+        """Get contents of the packed-refs file.
+
+        :return: Dictionary mapping ref names to SHA1s
+
+        :note: Will return an empty dictionary when no packed-refs file is
+            present.
+        """
+        raise NotImplementedError(self.get_packed_refs)
+
+    def get_peeled(self, name):
+        """Return the cached peeled value of a ref, if available.
+
+        :param name: Name of the ref to peel
+        :return: The peeled value of the ref. If the ref is known not point to
+            a tag, this will be the SHA the ref refers to. If the ref may point
+            to a tag, but no cached information is available, None is returned.
+        """
+        return None
+
+    def import_refs(self, base, other):
+        for name, value in other.items():
+            self[b'/'.join((base, name))] = value
+
+    def allkeys(self):
+        """All refs present in this container."""
+        raise NotImplementedError(self.allkeys)
+
+    def keys(self, base=None):
+        """Refs present in this container.
+
+        :param base: An optional base to return refs under.
+        :return: An unsorted set of valid refs in this container, including
+            packed refs.
+        """
+        if base is not None:
+            return self.subkeys(base)
+        else:
+            return self.allkeys()
+
+    def subkeys(self, base):
+        """Refs present in this container under a base.
+
+        :param base: The base to return refs under.
+        :return: A set of valid refs in this container under the base; the base
+            prefix is stripped from the ref names returned.
+        """
+        keys = set()
+        base_len = len(base) + 1
+        for refname in self.allkeys():
+            if refname.startswith(base):
+                keys.add(refname[base_len:])
+        return keys
+
+    def as_dict(self, base=None):
+        """Return the contents of this container as a dictionary.
+
+        """
+        ret = {}
+        keys = self.keys(base)
+        if base is None:
+            base = b''
+        else:
+            base = base.rstrip(b'/')
+        for key in keys:
+            try:
+                ret[key] = self[(base + b'/' + key).strip(b'/')]
+            except KeyError:
+                continue  # Unable to resolve
+
+        return ret
+
+    def _check_refname(self, name):
+        """Ensure a refname is valid and lives in refs or is HEAD.
+
+        HEAD is not a valid refname according to git-check-ref-format, but this
+        class needs to be able to touch HEAD. Also, check_ref_format expects
+        refnames without the leading 'refs/', but this class requires that
+        so it cannot touch anything outside the refs dir (or HEAD).
+
+        :param name: The name of the reference.
+        :raises KeyError: if a refname is not HEAD or is otherwise not valid.
+        """
+        if name in (b'HEAD', b'refs/stash'):
+            return
+        if not name.startswith(b'refs/') or not check_ref_format(name[5:]):
+            raise RefFormatError(name)
+
+    def read_ref(self, refname):
+        """Read a reference without following any references.
+
+        :param refname: The name of the reference
+        :return: The contents of the ref file, or None if it does
+            not exist.
+        """
+        contents = self.read_loose_ref(refname)
+        if not contents:
+            contents = self.get_packed_refs().get(refname, None)
+        return contents
+
+    def read_loose_ref(self, name):
+        """Read a loose reference and return its contents.
+
+        :param name: the refname to read
+        :return: The contents of the ref file, or None if it does
+            not exist.
+        """
+        raise NotImplementedError(self.read_loose_ref)
+
+    def follow(self, name):
+        """Follow a reference name.
+
+        :return: a tuple of (refnames, sha), wheres refnames are the names of
+            references in the chain
+        """
+        contents = SYMREF + name
+        depth = 0
+        refnames = []
+        while contents.startswith(SYMREF):
+            refname = contents[len(SYMREF):]
+            refnames.append(refname)
+            contents = self.read_ref(refname)
+            if not contents:
+                break
+            depth += 1
+            if depth > 5:
+                raise KeyError(name)
+        return refnames, contents
+
+    def _follow(self, name):
+        import warnings
+        warnings.warn(
+            "RefsContainer._follow is deprecated. Use RefsContainer.follow "
+            "instead.", DeprecationWarning)
+        refnames, contents = self.follow(name)
+        if not refnames:
+            return (None, contents)
+        return (refnames[-1], contents)
+
+    def __contains__(self, refname):
+        if self.read_ref(refname):
+            return True
+        return False
+
+    def __getitem__(self, name):
+        """Get the SHA1 for a reference name.
+
+        This method follows all symbolic references.
+        """
+        _, sha = self.follow(name)
+        if sha is None:
+            raise KeyError(name)
+        return sha
+
+    def set_if_equals(self, name, old_ref, new_ref):
+        """Set a refname to new_ref only if it currently equals old_ref.
+
+        This method follows all symbolic references if applicable for the
+        subclass, and can be used to perform an atomic compare-and-swap
+        operation.
+
+        :param name: The refname to set.
+        :param old_ref: The old sha the refname must refer to, or None to set
+            unconditionally.
+        :param new_ref: The new sha the refname will refer to.
+        :return: True if the set was successful, False otherwise.
+        """
+        raise NotImplementedError(self.set_if_equals)
+
+    def add_if_new(self, name, ref):
+        """Add a new reference only if it does not already exist."""
+        raise NotImplementedError(self.add_if_new)
+
+    def __setitem__(self, name, ref):
+        """Set a reference name to point to the given SHA1.
+
+        This method follows all symbolic references if applicable for the
+        subclass.
+
+        :note: This method unconditionally overwrites the contents of a
+            reference. To update atomically only if the reference has not
+            changed, use set_if_equals().
+        :param name: The refname to set.
+        :param ref: The new sha the refname will refer to.
+        """
+        self.set_if_equals(name, None, ref)
+
+    def remove_if_equals(self, name, old_ref):
+        """Remove a refname only if it currently equals old_ref.
+
+        This method does not follow symbolic references, even if applicable for
+        the subclass. It can be used to perform an atomic compare-and-delete
+        operation.
+
+        :param name: The refname to delete.
+        :param old_ref: The old sha the refname must refer to, or None to
+            delete unconditionally.
+        :return: True if the delete was successful, False otherwise.
+        """
+        raise NotImplementedError(self.remove_if_equals)
+
+    def __delitem__(self, name):
+        """Remove a refname.
+
+        This method does not follow symbolic references, even if applicable for
+        the subclass.
+
+        :note: This method unconditionally deletes the contents of a reference.
+            To delete atomically only if the reference has not changed, use
+            remove_if_equals().
+
+        :param name: The refname to delete.
+        """
+        self.remove_if_equals(name, None)
+
+
+class DictRefsContainer(RefsContainer):
+    """RefsContainer backed by a simple dict.
+
+    This container does not support symbolic or packed references and is not
+    threadsafe.
+    """
+
+    def __init__(self, refs):
+        self._refs = refs
+        self._peeled = {}
+
+    def allkeys(self):
+        return self._refs.keys()
+
+    def read_loose_ref(self, name):
+        return self._refs.get(name, None)
+
+    def get_packed_refs(self):
+        return {}
+
+    def set_symbolic_ref(self, name, other):
+        self._refs[name] = SYMREF + other
+
+    def set_if_equals(self, name, old_ref, new_ref):
+        if old_ref is not None and self._refs.get(name, ZERO_SHA) != old_ref:
+            return False
+        realnames, _ = self.follow(name)
+        for realname in realnames:
+            self._check_refname(realname)
+            self._refs[realname] = new_ref
+        return True
+
+    def add_if_new(self, name, ref):
+        if name in self._refs:
+            return False
+        self._refs[name] = ref
+        return True
+
+    def remove_if_equals(self, name, old_ref):
+        if old_ref is not None and self._refs.get(name, ZERO_SHA) != old_ref:
+            return False
+        try:
+            del self._refs[name]
+        except KeyError:
+            pass
+        return True
+
+    def get_peeled(self, name):
+        return self._peeled.get(name)
+
+    def _update(self, refs):
+        """Update multiple refs; intended only for testing."""
+        # TODO(dborowitz): replace this with a public function that uses
+        # set_if_equal.
+        self._refs.update(refs)
+
+    def _update_peeled(self, peeled):
+        """Update cached peeled refs; intended only for testing."""
+        self._peeled.update(peeled)
+
+
+class InfoRefsContainer(RefsContainer):
+    """Refs container that reads refs from a info/refs file."""
+
+    def __init__(self, f):
+        self._refs = {}
+        self._peeled = {}
+        for l in f.readlines():
+            sha, name = l.rstrip(b'\n').split(b'\t')
+            if name.endswith(ANNOTATED_TAG_SUFFIX):
+                name = name[:-3]
+                if not check_ref_format(name):
+                    raise ValueError("invalid ref name %r" % name)
+                self._peeled[name] = sha
+            else:
+                if not check_ref_format(name):
+                    raise ValueError("invalid ref name %r" % name)
+                self._refs[name] = sha
+
+    def allkeys(self):
+        return self._refs.keys()
+
+    def read_loose_ref(self, name):
+        return self._refs.get(name, None)
+
+    def get_packed_refs(self):
+        return {}
+
+    def get_peeled(self, name):
+        try:
+            return self._peeled[name]
+        except KeyError:
+            return self._refs[name]
+
+
+class DiskRefsContainer(RefsContainer):
+    """Refs container that reads refs from disk."""
+
+    def __init__(self, path, worktree_path=None):
+        self.path = path
+        self.worktree_path = worktree_path or path
+        self._packed_refs = None
+        self._peeled_refs = None
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self.path)
+
+    def subkeys(self, base):
+        subkeys = set()
+        path = self.refpath(base)
+        for root, dirs, files in os.walk(path):
+            dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
+            for filename in files:
+                refname = (("%s/%s" % (dir, filename))
+                           .strip("/").encode(sys.getfilesystemencoding()))
+                # check_ref_format requires at least one /, so we prepend the
+                # base before calling it.
+                if check_ref_format(base + b'/' + refname):
+                    subkeys.add(refname)
+        for key in self.get_packed_refs():
+            if key.startswith(base):
+                subkeys.add(key[len(base):].strip(b'/'))
+        return subkeys
+
+    def allkeys(self):
+        allkeys = set()
+        if os.path.exists(self.refpath(b'HEAD')):
+            allkeys.add(b'HEAD')
+        path = self.refpath(b'')
+        for root, dirs, files in os.walk(self.refpath(b'refs')):
+            dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
+            for filename in files:
+                refname = (
+                    "%s/%s" % (dir, filename)).encode(
+                            sys.getfilesystemencoding())
+                if check_ref_format(refname):
+                    allkeys.add(refname)
+        allkeys.update(self.get_packed_refs())
+        return allkeys
+
+    def refpath(self, name):
+        """Return the disk path of a ref.
+
+        """
+        if (getattr(self.path, "encode", None) and
+                getattr(name, "decode", None)):
+            name = name.decode(sys.getfilesystemencoding())
+        if os.path.sep != "/":
+            name = name.replace("/", os.path.sep)
+        # TODO: as the 'HEAD' reference is working tree specific, it
+        # should actually not be a part of RefsContainer
+        if name == 'HEAD':
+            return os.path.join(self.worktree_path, name)
+        else:
+            return os.path.join(self.path, name)
+
+    def get_packed_refs(self):
+        """Get contents of the packed-refs file.
+
+        :return: Dictionary mapping ref names to SHA1s
+
+        :note: Will return an empty dictionary when no packed-refs file is
+            present.
+        """
+        # TODO: invalidate the cache on repacking
+        if self._packed_refs is None:
+            # set both to empty because we want _peeled_refs to be
+            # None if and only if _packed_refs is also None.
+            self._packed_refs = {}
+            self._peeled_refs = {}
+            path = os.path.join(self.path, 'packed-refs')
+            try:
+                f = GitFile(path, 'rb')
+            except IOError as e:
+                if e.errno == errno.ENOENT:
+                    return {}
+                raise
+            with f:
+                first_line = next(iter(f)).rstrip()
+                if (first_line.startswith(b'# pack-refs') and b' peeled' in
+                        first_line):
+                    for sha, name, peeled in read_packed_refs_with_peeled(f):
+                        self._packed_refs[name] = sha
+                        if peeled:
+                            self._peeled_refs[name] = peeled
+                else:
+                    f.seek(0)
+                    for sha, name in read_packed_refs(f):
+                        self._packed_refs[name] = sha
+        return self._packed_refs
+
+    def get_peeled(self, name):
+        """Return the cached peeled value of a ref, if available.
+
+        :param name: Name of the ref to peel
+        :return: The peeled value of the ref. If the ref is known not point to
+            a tag, this will be the SHA the ref refers to. If the ref may point
+            to a tag, but no cached information is available, None is returned.
+        """
+        self.get_packed_refs()
+        if self._peeled_refs is None or name not in self._packed_refs:
+            # No cache: no peeled refs were read, or this ref is loose
+            return None
+        if name in self._peeled_refs:
+            return self._peeled_refs[name]
+        else:
+            # Known not peelable
+            return self[name]
+
+    def read_loose_ref(self, name):
+        """Read a reference file and return its contents.
+
+        If the reference file a symbolic reference, only read the first line of
+        the file. Otherwise, only read the first 40 bytes.
+
+        :param name: the refname to read, relative to refpath
+        :return: The contents of the ref file, or None if the file does not
+            exist.
+        :raises IOError: if any other error occurs
+        """
+        filename = self.refpath(name)
+        try:
+            with GitFile(filename, 'rb') as f:
+                header = f.read(len(SYMREF))
+                if header == SYMREF:
+                    # Read only the first line
+                    return header + next(iter(f)).rstrip(b'\r\n')
+                else:
+                    # Read only the first 40 bytes
+                    return header + f.read(40 - len(SYMREF))
+        except IOError as e:
+            if e.errno == errno.ENOENT:
+                return None
+            raise
+
+    def _remove_packed_ref(self, name):
+        if self._packed_refs is None:
+            return
+        filename = os.path.join(self.path, 'packed-refs')
+        # reread cached refs from disk, while holding the lock
+        f = GitFile(filename, 'wb')
+        try:
+            self._packed_refs = None
+            self.get_packed_refs()
+
+            if name not in self._packed_refs:
+                return
+
+            del self._packed_refs[name]
+            if name in self._peeled_refs:
+                del self._peeled_refs[name]
+            write_packed_refs(f, self._packed_refs, self._peeled_refs)
+            f.close()
+        finally:
+            f.abort()
+
+    def set_symbolic_ref(self, name, other):
+        """Make a ref point at another ref.
+
+        :param name: Name of the ref to set
+        :param other: Name of the ref to point at
+        """
+        self._check_refname(name)
+        self._check_refname(other)
+        filename = self.refpath(name)
+        try:
+            f = GitFile(filename, 'wb')
+            try:
+                f.write(SYMREF + other + b'\n')
+            except (IOError, OSError):
+                f.abort()
+                raise
+        finally:
+            f.close()
+
+    def set_if_equals(self, name, old_ref, new_ref):
+        """Set a refname to new_ref only if it currently equals old_ref.
+
+        This method follows all symbolic references, and can be used to perform
+        an atomic compare-and-swap operation.
+
+        :param name: The refname to set.
+        :param old_ref: The old sha the refname must refer to, or None to set
+            unconditionally.
+        :param new_ref: The new sha the refname will refer to.
+        :return: True if the set was successful, False otherwise.
+        """
+        self._check_refname(name)
+        try:
+            realnames, _ = self.follow(name)
+            realname = realnames[-1]
+        except (KeyError, IndexError):
+            realname = name
+        filename = self.refpath(realname)
+        ensure_dir_exists(os.path.dirname(filename))
+        with GitFile(filename, 'wb') as f:
+            if old_ref is not None:
+                try:
+                    # read again while holding the lock
+                    orig_ref = self.read_loose_ref(realname)
+                    if orig_ref is None:
+                        orig_ref = self.get_packed_refs().get(
+                                realname, ZERO_SHA)
+                    if orig_ref != old_ref:
+                        f.abort()
+                        return False
+                except (OSError, IOError):
+                    f.abort()
+                    raise
+            try:
+                f.write(new_ref + b'\n')
+            except (OSError, IOError):
+                f.abort()
+                raise
+        return True
+
+    def add_if_new(self, name, ref):
+        """Add a new reference only if it does not already exist.
+
+        This method follows symrefs, and only ensures that the last ref in the
+        chain does not exist.
+
+        :param name: The refname to set.
+        :param ref: The new sha the refname will refer to.
+        :return: True if the add was successful, False otherwise.
+        """
+        try:
+            realnames, contents = self.follow(name)
+            if contents is not None:
+                return False
+            realname = realnames[-1]
+        except (KeyError, IndexError):
+            realname = name
+        self._check_refname(realname)
+        filename = self.refpath(realname)
+        ensure_dir_exists(os.path.dirname(filename))
+        with GitFile(filename, 'wb') as f:
+            if os.path.exists(filename) or name in self.get_packed_refs():
+                f.abort()
+                return False
+            try:
+                f.write(ref + b'\n')
+            except (OSError, IOError):
+                f.abort()
+                raise
+        return True
+
+    def remove_if_equals(self, name, old_ref):
+        """Remove a refname only if it currently equals old_ref.
+
+        This method does not follow symbolic references. It can be used to
+        perform an atomic compare-and-delete operation.
+
+        :param name: The refname to delete.
+        :param old_ref: The old sha the refname must refer to, or None to
+            delete unconditionally.
+        :return: True if the delete was successful, False otherwise.
+        """
+        self._check_refname(name)
+        filename = self.refpath(name)
+        ensure_dir_exists(os.path.dirname(filename))
+        f = GitFile(filename, 'wb')
+        try:
+            if old_ref is not None:
+                orig_ref = self.read_loose_ref(name)
+                if orig_ref is None:
+                    orig_ref = self.get_packed_refs().get(name, ZERO_SHA)
+                if orig_ref != old_ref:
+                    return False
+            # may only be packed
+            try:
+                os.remove(filename)
+            except OSError as e:
+                if e.errno != errno.ENOENT:
+                    raise
+            self._remove_packed_ref(name)
+        finally:
+            # never write, we just wanted the lock
+            f.abort()
+        return True
+
+
+def _split_ref_line(line):
+    """Split a single ref line into a tuple of SHA1 and name."""
+    fields = line.rstrip(b'\n\r').split(b' ')
+    if len(fields) != 2:
+        raise PackedRefsException("invalid ref line %r" % line)
+    sha, name = fields
+    if not valid_hexsha(sha):
+        raise PackedRefsException("Invalid hex sha %r" % sha)
+    if not check_ref_format(name):
+        raise PackedRefsException("invalid ref name %r" % name)
+    return (sha, name)
+
+
+def read_packed_refs(f):
+    """Read a packed refs file.
+
+    :param f: file-like object to read from
+    :return: Iterator over tuples with SHA1s and ref names.
+    """
+    for l in f:
+        if l.startswith(b'#'):
+            # Comment
+            continue
+        if l.startswith(b'^'):
+            raise PackedRefsException(
+              "found peeled ref in packed-refs without peeled")
+        yield _split_ref_line(l)
+
+
+def read_packed_refs_with_peeled(f):
+    """Read a packed refs file including peeled refs.
+
+    Assumes the "# pack-refs with: peeled" line was already read. Yields tuples
+    with ref names, SHA1s, and peeled SHA1s (or None).
+
+    :param f: file-like object to read from, seek'ed to the second line
+    """
+    last = None
+    for l in f:
+        if l[0] == b'#':
+            continue
+        l = l.rstrip(b'\r\n')
+        if l.startswith(b'^'):
+            if not last:
+                raise PackedRefsException("unexpected peeled ref line")
+            if not valid_hexsha(l[1:]):
+                raise PackedRefsException("Invalid hex sha %r" % l[1:])
+            sha, name = _split_ref_line(last)
+            last = None
+            yield (sha, name, l[1:])
+        else:
+            if last:
+                sha, name = _split_ref_line(last)
+                yield (sha, name, None)
+            last = l
+    if last:
+        sha, name = _split_ref_line(last)
+        yield (sha, name, None)
+
+
+def write_packed_refs(f, packed_refs, peeled_refs=None):
+    """Write a packed refs file.
+
+    :param f: empty file-like object to write to
+    :param packed_refs: dict of refname to sha of packed refs to write
+    :param peeled_refs: dict of refname to peeled value of sha
+    """
+    if peeled_refs is None:
+        peeled_refs = {}
+    else:
+        f.write(b'# pack-refs with: peeled\n')
+    for refname in sorted(packed_refs.keys()):
+        f.write(git_line(packed_refs[refname], refname))
+        if refname in peeled_refs:
+            f.write(b'^' + peeled_refs[refname] + b'\n')
+
+
+def read_info_refs(f):
+    ret = {}
+    for l in f.readlines():
+        (sha, name) = l.rstrip(b"\r\n").split(b"\t", 1)
+        ret[name] = sha
+    return ret
+
+
+def write_info_refs(refs, store):
+    """Generate info refs."""
+    for name, sha in sorted(refs.items()):
+        # get_refs() includes HEAD as a special case, but we don't want to
+        # advertise it
+        if name == b'HEAD':
+            continue
+        try:
+            o = store[sha]
+        except KeyError:
+            continue
+        peeled = store.peel_sha(sha)
+        yield o.id + b'\t' + name + b'\n'
+        if o.id != peeled.id:
+            yield peeled.id + b'\t' + name + ANNOTATED_TAG_SUFFIX + b'\n'
+
+
+def is_local_branch(x):
+    return x.startswith(b'refs/heads/')

+ 1160 - 0
dulwich/repo.py

@@ -0,0 +1,1160 @@
+# repo.py -- For dealing with git repositories.
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+
+"""Repository access.
+
+This module contains the base class for git repositories
+(BaseRepo) and an implementation which uses a repository on
+local disk (Repo).
+
+"""
+
+from io import BytesIO
+import errno
+import os
+import sys
+import stat
+
+from dulwich.errors import (
+    NoIndexPresent,
+    NotBlobError,
+    NotCommitError,
+    NotGitRepository,
+    NotTreeError,
+    NotTagError,
+    CommitError,
+    RefFormatError,
+    HookError,
+    )
+from dulwich.file import (
+    GitFile,
+    )
+from dulwich.object_store import (
+    DiskObjectStore,
+    MemoryObjectStore,
+    ObjectStoreGraphWalker,
+    )
+from dulwich.objects import (
+    check_hexsha,
+    Blob,
+    Commit,
+    ShaFile,
+    Tag,
+    Tree,
+    )
+
+from dulwich.hooks import (
+    PreCommitShellHook,
+    PostCommitShellHook,
+    CommitMsgShellHook,
+    )
+
+from dulwich.refs import (  # noqa: F401
+    check_ref_format,
+    RefsContainer,
+    DictRefsContainer,
+    InfoRefsContainer,
+    DiskRefsContainer,
+    read_packed_refs,
+    read_packed_refs_with_peeled,
+    write_packed_refs,
+    SYMREF,
+    )
+
+
+import warnings
+
+
+CONTROLDIR = '.git'
+OBJECTDIR = 'objects'
+REFSDIR = 'refs'
+REFSDIR_TAGS = 'tags'
+REFSDIR_HEADS = 'heads'
+INDEX_FILENAME = "index"
+COMMONDIR = 'commondir'
+GITDIR = 'gitdir'
+WORKTREES = 'worktrees'
+
+BASE_DIRECTORIES = [
+    ["branches"],
+    [REFSDIR],
+    [REFSDIR, REFSDIR_TAGS],
+    [REFSDIR, REFSDIR_HEADS],
+    ["hooks"],
+    ["info"]
+    ]
+
+DEFAULT_REF = b'refs/heads/master'
+
+
+def parse_graftpoints(graftpoints):
+    """Convert a list of graftpoints into a dict
+
+    :param graftpoints: Iterator of graftpoint lines
+
+    Each line is formatted as:
+        <commit sha1> <parent sha1> [<parent sha1>]*
+
+    Resulting dictionary is:
+        <commit sha1>: [<parent sha1>*]
+
+    https://git.wiki.kernel.org/index.php/GraftPoint
+    """
+    grafts = {}
+    for l in graftpoints:
+        raw_graft = l.split(None, 1)
+
+        commit = raw_graft[0]
+        if len(raw_graft) == 2:
+            parents = raw_graft[1].split()
+        else:
+            parents = []
+
+        for sha in [commit] + parents:
+            check_hexsha(sha, 'Invalid graftpoint')
+
+        grafts[commit] = parents
+    return grafts
+
+
+def serialize_graftpoints(graftpoints):
+    """Convert a dictionary of grafts into string
+
+    The graft dictionary is:
+        <commit sha1>: [<parent sha1>*]
+
+    Each line is formatted as:
+        <commit sha1> <parent sha1> [<parent sha1>]*
+
+    https://git.wiki.kernel.org/index.php/GraftPoint
+
+    """
+    graft_lines = []
+    for commit, parents in graftpoints.items():
+        if parents:
+            graft_lines.append(commit + b' ' + b' '.join(parents))
+        else:
+            graft_lines.append(commit)
+    return b'\n'.join(graft_lines)
+
+
+class BaseRepo(object):
+    """Base class for a git repository.
+
+    :ivar object_store: Dictionary-like object for accessing
+        the objects
+    :ivar refs: Dictionary-like object with the refs in this
+        repository
+    """
+
+    def __init__(self, object_store, refs):
+        """Open a repository.
+
+        This shouldn't be called directly, but rather through one of the
+        base classes, such as MemoryRepo or Repo.
+
+        :param object_store: Object store to use
+        :param refs: Refs container to use
+        """
+        self.object_store = object_store
+        self.refs = refs
+
+        self._graftpoints = {}
+        self.hooks = {}
+
+    def _determine_file_mode(self):
+        """Probe the file-system to determine whether permissions can be trusted.
+
+        :return: True if permissions can be trusted, False otherwise.
+        """
+        raise NotImplementedError(self._determine_file_mode)
+
+    def _init_files(self, bare):
+        """Initialize a default set of named files."""
+        from dulwich.config import ConfigFile
+        self._put_named_file('description', b"Unnamed repository")
+        f = BytesIO()
+        cf = ConfigFile()
+        cf.set(b"core", b"repositoryformatversion", b"0")
+        if self._determine_file_mode():
+            cf.set(b"core", b"filemode", True)
+        else:
+            cf.set(b"core", b"filemode", False)
+
+        cf.set(b"core", b"bare", bare)
+        cf.set(b"core", b"logallrefupdates", True)
+        cf.write_to_file(f)
+        self._put_named_file('config', f.getvalue())
+        self._put_named_file(os.path.join('info', 'exclude'), b'')
+
+    def get_named_file(self, path):
+        """Get a file from the control dir with a specific name.
+
+        Although the filename should be interpreted as a filename relative to
+        the control dir in a disk-based Repo, the object returned need not be
+        pointing to a file in that location.
+
+        :param path: The path to the file, relative to the control dir.
+        :return: An open file object, or None if the file does not exist.
+        """
+        raise NotImplementedError(self.get_named_file)
+
+    def _put_named_file(self, path, contents):
+        """Write a file to the control dir with the given name and contents.
+
+        :param path: The path to the file, relative to the control dir.
+        :param contents: A string to write to the file.
+        """
+        raise NotImplementedError(self._put_named_file)
+
+    def open_index(self):
+        """Open the index for this repository.
+
+        :raise NoIndexPresent: If no index is present
+        :return: The matching `Index`
+        """
+        raise NotImplementedError(self.open_index)
+
+    def fetch(self, target, determine_wants=None, progress=None):
+        """Fetch objects into another repository.
+
+        :param target: The target repository
+        :param determine_wants: Optional function to determine what refs to
+            fetch.
+        :param progress: Optional progress function
+        :return: The local refs
+        """
+        if determine_wants is None:
+            determine_wants = target.object_store.determine_wants_all
+        target.object_store.add_objects(
+            self.fetch_objects(determine_wants, target.get_graph_walker(),
+                               progress))
+        return self.get_refs()
+
+    def fetch_objects(self, determine_wants, graph_walker, progress,
+                      get_tagged=None):
+        """Fetch the missing objects required for a set of revisions.
+
+        :param determine_wants: Function that takes a dictionary with heads
+            and returns the list of heads to fetch.
+        :param graph_walker: Object that can iterate over the list of revisions
+            to fetch and has an "ack" method that will be called to acknowledge
+            that a revision is present.
+        :param progress: Simple progress function that will be called with
+            updated progress strings.
+        :param get_tagged: Function that returns a dict of pointed-to sha ->
+            tag sha for including tags.
+        :return: iterator over objects, with __len__ implemented
+        """
+        wants = determine_wants(self.get_refs())
+        if not isinstance(wants, list):
+            raise TypeError("determine_wants() did not return a list")
+
+        shallows = getattr(graph_walker, 'shallow', frozenset())
+        unshallows = getattr(graph_walker, 'unshallow', frozenset())
+
+        if wants == []:
+            # TODO(dborowitz): find a way to short-circuit that doesn't change
+            # this interface.
+
+            if shallows or unshallows:
+                # Do not send a pack in shallow short-circuit path
+                return None
+
+            return []
+
+        # If the graph walker is set up with an implementation that can
+        # ACK/NAK to the wire, it will write data to the client through
+        # this call as a side-effect.
+        haves = self.object_store.find_common_revisions(graph_walker)
+
+        # Deal with shallow requests separately because the haves do
+        # not reflect what objects are missing
+        if shallows or unshallows:
+            # TODO: filter the haves commits from iter_shas. the specific
+            # commits aren't missing.
+            haves = []
+
+        def get_parents(commit):
+            if commit.id in shallows:
+                return []
+            return self.get_parents(commit.id, commit)
+
+        return self.object_store.iter_shas(
+          self.object_store.find_missing_objects(
+              haves, wants, progress,
+              get_tagged,
+              get_parents=get_parents))
+
+    def get_graph_walker(self, heads=None):
+        """Retrieve a graph walker.
+
+        A graph walker is used by a remote repository (or proxy)
+        to find out which objects are present in this repository.
+
+        :param heads: Repository heads to use (optional)
+        :return: A graph walker object
+        """
+        if heads is None:
+            heads = self.refs.as_dict(b'refs/heads').values()
+        return ObjectStoreGraphWalker(heads, self.get_parents)
+
+    def get_refs(self):
+        """Get dictionary with all refs.
+
+        :return: A ``dict`` mapping ref names to SHA1s
+        """
+        return self.refs.as_dict()
+
+    def head(self):
+        """Return the SHA1 pointed at by HEAD."""
+        return self.refs[b'HEAD']
+
+    def _get_object(self, sha, cls):
+        assert len(sha) in (20, 40)
+        ret = self.get_object(sha)
+        if not isinstance(ret, cls):
+            if cls is Commit:
+                raise NotCommitError(ret)
+            elif cls is Blob:
+                raise NotBlobError(ret)
+            elif cls is Tree:
+                raise NotTreeError(ret)
+            elif cls is Tag:
+                raise NotTagError(ret)
+            else:
+                raise Exception("Type invalid: %r != %r" % (
+                  ret.type_name, cls.type_name))
+        return ret
+
+    def get_object(self, sha):
+        """Retrieve the object with the specified SHA.
+
+        :param sha: SHA to retrieve
+        :return: A ShaFile object
+        :raise KeyError: when the object can not be found
+        """
+        return self.object_store[sha]
+
+    def get_parents(self, sha, commit=None):
+        """Retrieve the parents of a specific commit.
+
+        If the specific commit is a graftpoint, the graft parents
+        will be returned instead.
+
+        :param sha: SHA of the commit for which to retrieve the parents
+        :param commit: Optional commit matching the sha
+        :return: List of parents
+        """
+
+        try:
+            return self._graftpoints[sha]
+        except KeyError:
+            if commit is None:
+                commit = self[sha]
+            return commit.parents
+
+    def get_config(self):
+        """Retrieve the config object.
+
+        :return: `ConfigFile` object for the ``.git/config`` file.
+        """
+        raise NotImplementedError(self.get_config)
+
+    def get_description(self):
+        """Retrieve the description for this repository.
+
+        :return: String with the description of the repository
+            as set by the user.
+        """
+        raise NotImplementedError(self.get_description)
+
+    def set_description(self, description):
+        """Set the description for this repository.
+
+        :param description: Text to set as description for this repository.
+        """
+        raise NotImplementedError(self.set_description)
+
+    def get_config_stack(self):
+        """Return a config stack for this repository.
+
+        This stack accesses the configuration for both this repository
+        itself (.git/config) and the global configuration, which usually
+        lives in ~/.gitconfig.
+
+        :return: `Config` instance for this repository
+        """
+        from dulwich.config import StackedConfig
+        backends = [self.get_config()] + StackedConfig.default_backends()
+        return StackedConfig(backends, writable=backends[0])
+
+    def get_peeled(self, ref):
+        """Get the peeled value of a ref.
+
+        :param ref: The refname to peel.
+        :return: The fully-peeled SHA1 of a tag object, after peeling all
+            intermediate tags; if the original ref does not point to a tag,
+            this will equal the original SHA1.
+        """
+        cached = self.refs.get_peeled(ref)
+        if cached is not None:
+            return cached
+        return self.object_store.peel_sha(self.refs[ref]).id
+
+    def get_walker(self, include=None, *args, **kwargs):
+        """Obtain a walker for this repository.
+
+        :param include: Iterable of SHAs of commits to include along with their
+            ancestors. Defaults to [HEAD]
+        :param exclude: Iterable of SHAs of commits to exclude along with their
+            ancestors, overriding includes.
+        :param order: ORDER_* constant specifying the order of results.
+            Anything other than ORDER_DATE may result in O(n) memory usage.
+        :param reverse: If True, reverse the order of output, requiring O(n)
+            memory.
+        :param max_entries: The maximum number of entries to yield, or None for
+            no limit.
+        :param paths: Iterable of file or subtree paths to show entries for.
+        :param rename_detector: diff.RenameDetector object for detecting
+            renames.
+        :param follow: If True, follow path across renames/copies. Forces a
+            default rename_detector.
+        :param since: Timestamp to list commits after.
+        :param until: Timestamp to list commits before.
+        :param queue_cls: A class to use for a queue of commits, supporting the
+            iterator protocol. The constructor takes a single argument, the
+            Walker.
+        :return: A `Walker` object
+        """
+        from dulwich.walk import Walker
+        if include is None:
+            include = [self.head()]
+        if isinstance(include, str):
+            include = [include]
+
+        kwargs['get_parents'] = lambda commit: self.get_parents(
+            commit.id, commit)
+
+        return Walker(self.object_store, include, *args, **kwargs)
+
+    def __getitem__(self, name):
+        """Retrieve a Git object by SHA1 or ref.
+
+        :param name: A Git object SHA1 or a ref name
+        :return: A `ShaFile` object, such as a Commit or Blob
+        :raise KeyError: when the specified ref or object does not exist
+        """
+        if not isinstance(name, bytes):
+            raise TypeError("'name' must be bytestring, not %.80s" %
+                            type(name).__name__)
+        if len(name) in (20, 40):
+            try:
+                return self.object_store[name]
+            except (KeyError, ValueError):
+                pass
+        try:
+            return self.object_store[self.refs[name]]
+        except RefFormatError:
+            raise KeyError(name)
+
+    def __contains__(self, name):
+        """Check if a specific Git object or ref is present.
+
+        :param name: Git object SHA1 or ref name
+        """
+        if len(name) in (20, 40):
+            return name in self.object_store or name in self.refs
+        else:
+            return name in self.refs
+
+    def __setitem__(self, name, value):
+        """Set a ref.
+
+        :param name: ref name
+        :param value: Ref value - either a ShaFile object, or a hex sha
+        """
+        if name.startswith(b"refs/") or name == b'HEAD':
+            if isinstance(value, ShaFile):
+                self.refs[name] = value.id
+            elif isinstance(value, bytes):
+                self.refs[name] = value
+            else:
+                raise TypeError(value)
+        else:
+            raise ValueError(name)
+
+    def __delitem__(self, name):
+        """Remove a ref.
+
+        :param name: Name of the ref to remove
+        """
+        if name.startswith(b"refs/") or name == b"HEAD":
+            del self.refs[name]
+        else:
+            raise ValueError(name)
+
+    def _get_user_identity(self):
+        """Determine the identity to use for new commits.
+        """
+        config = self.get_config_stack()
+        return (config.get((b"user", ), b"name") + b" <" +
+                config.get((b"user", ), b"email") + b">")
+
+    def _add_graftpoints(self, updated_graftpoints):
+        """Add or modify graftpoints
+
+        :param updated_graftpoints: Dict of commit shas to list of parent shas
+        """
+
+        # Simple validation
+        for commit, parents in updated_graftpoints.items():
+            for sha in [commit] + parents:
+                check_hexsha(sha, 'Invalid graftpoint')
+
+        self._graftpoints.update(updated_graftpoints)
+
+    def _remove_graftpoints(self, to_remove=[]):
+        """Remove graftpoints
+
+        :param to_remove: List of commit shas
+        """
+        for sha in to_remove:
+            del self._graftpoints[sha]
+
+    def do_commit(self, message=None, committer=None,
+                  author=None, commit_timestamp=None,
+                  commit_timezone=None, author_timestamp=None,
+                  author_timezone=None, tree=None, encoding=None,
+                  ref=b'HEAD', merge_heads=None):
+        """Create a new commit.
+
+        :param message: Commit message
+        :param committer: Committer fullname
+        :param author: Author fullname (defaults to committer)
+        :param commit_timestamp: Commit timestamp (defaults to now)
+        :param commit_timezone: Commit timestamp timezone (defaults to GMT)
+        :param author_timestamp: Author timestamp (defaults to commit
+            timestamp)
+        :param author_timezone: Author timestamp timezone
+            (defaults to commit timestamp timezone)
+        :param tree: SHA1 of the tree root to use (if not specified the
+            current index will be committed).
+        :param encoding: Encoding
+        :param ref: Optional ref to commit to (defaults to current branch)
+        :param merge_heads: Merge heads (defaults to .git/MERGE_HEADS)
+        :return: New commit SHA1
+        """
+        import time
+        c = Commit()
+        if tree is None:
+            index = self.open_index()
+            c.tree = index.commit(self.object_store)
+        else:
+            if len(tree) != 40:
+                raise ValueError("tree must be a 40-byte hex sha string")
+            c.tree = tree
+
+        try:
+            self.hooks['pre-commit'].execute()
+        except HookError as e:
+            raise CommitError(e)
+        except KeyError:  # no hook defined, silent fallthrough
+            pass
+
+        if merge_heads is None:
+            # FIXME: Read merge heads from .git/MERGE_HEADS
+            merge_heads = []
+        if committer is None:
+            # FIXME: Support GIT_COMMITTER_NAME/GIT_COMMITTER_EMAIL environment
+            # variables
+            committer = self._get_user_identity()
+        c.committer = committer
+        if commit_timestamp is None:
+            # FIXME: Support GIT_COMMITTER_DATE environment variable
+            commit_timestamp = time.time()
+        c.commit_time = int(commit_timestamp)
+        if commit_timezone is None:
+            # FIXME: Use current user timezone rather than UTC
+            commit_timezone = 0
+        c.commit_timezone = commit_timezone
+        if author is None:
+            # FIXME: Support GIT_AUTHOR_NAME/GIT_AUTHOR_EMAIL environment
+            # variables
+            author = committer
+        c.author = author
+        if author_timestamp is None:
+            # FIXME: Support GIT_AUTHOR_DATE environment variable
+            author_timestamp = commit_timestamp
+        c.author_time = int(author_timestamp)
+        if author_timezone is None:
+            author_timezone = commit_timezone
+        c.author_timezone = author_timezone
+        if encoding is not None:
+            c.encoding = encoding
+        if message is None:
+            # FIXME: Try to read commit message from .git/MERGE_MSG
+            raise ValueError("No commit message specified")
+
+        try:
+            c.message = self.hooks['commit-msg'].execute(message)
+            if c.message is None:
+                c.message = message
+        except HookError as e:
+            raise CommitError(e)
+        except KeyError:  # no hook defined, message not modified
+            c.message = message
+
+        if ref is None:
+            # Create a dangling commit
+            c.parents = merge_heads
+            self.object_store.add_object(c)
+        else:
+            try:
+                old_head = self.refs[ref]
+                c.parents = [old_head] + merge_heads
+                self.object_store.add_object(c)
+                ok = self.refs.set_if_equals(ref, old_head, c.id)
+            except KeyError:
+                c.parents = merge_heads
+                self.object_store.add_object(c)
+                ok = self.refs.add_if_new(ref, c.id)
+            if not ok:
+                # Fail if the atomic compare-and-swap failed, leaving the
+                # commit and all its objects as garbage.
+                raise CommitError("%s changed during commit" % (ref,))
+
+        try:
+            self.hooks['post-commit'].execute()
+        except HookError as e:  # silent failure
+            warnings.warn("post-commit hook failed: %s" % e, UserWarning)
+        except KeyError:  # no hook defined, silent fallthrough
+            pass
+
+        return c.id
+
+
+def read_gitfile(f):
+    """Read a ``.git`` file.
+
+    The first line of the file should start with "gitdir: "
+
+    :param f: File-like object to read from
+    :return: A path
+    """
+    cs = f.read()
+    if not cs.startswith("gitdir: "):
+        raise ValueError("Expected file to start with 'gitdir: '")
+    return cs[len("gitdir: "):].rstrip("\n")
+
+
+class Repo(BaseRepo):
+    """A git repository backed by local disk.
+
+    To open an existing repository, call the contructor with
+    the path of the repository.
+
+    To create a new repository, use the Repo.init class method.
+    """
+
+    def __init__(self, root):
+        hidden_path = os.path.join(root, CONTROLDIR)
+        if os.path.isdir(os.path.join(hidden_path, OBJECTDIR)):
+            self.bare = False
+            self._controldir = hidden_path
+        elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
+              os.path.isdir(os.path.join(root, REFSDIR))):
+            self.bare = True
+            self._controldir = root
+        elif os.path.isfile(hidden_path):
+            self.bare = False
+            with open(hidden_path, 'r') as f:
+                path = read_gitfile(f)
+            self.bare = False
+            self._controldir = os.path.join(root, path)
+        else:
+            raise NotGitRepository(
+                "No git repository was found at %(path)s" % dict(path=root)
+            )
+        commondir = self.get_named_file(COMMONDIR)
+        if commondir is not None:
+            with commondir:
+                self._commondir = os.path.join(
+                    self.controldir(),
+                    commondir.read().rstrip(b"\r\n").decode(
+                        sys.getfilesystemencoding()))
+        else:
+            self._commondir = self._controldir
+        self.path = root
+        object_store = DiskObjectStore(
+            os.path.join(self.commondir(), OBJECTDIR))
+        refs = DiskRefsContainer(self.commondir(), self._controldir)
+        BaseRepo.__init__(self, object_store, refs)
+
+        self._graftpoints = {}
+        graft_file = self.get_named_file(os.path.join("info", "grafts"),
+                                         basedir=self.commondir())
+        if graft_file:
+            with graft_file:
+                self._graftpoints.update(parse_graftpoints(graft_file))
+        graft_file = self.get_named_file("shallow",
+                                         basedir=self.commondir())
+        if graft_file:
+            with graft_file:
+                self._graftpoints.update(parse_graftpoints(graft_file))
+
+        self.hooks['pre-commit'] = PreCommitShellHook(self.controldir())
+        self.hooks['commit-msg'] = CommitMsgShellHook(self.controldir())
+        self.hooks['post-commit'] = PostCommitShellHook(self.controldir())
+
+    @classmethod
+    def discover(cls, start='.'):
+        """Iterate parent directories to discover a repository
+
+        Return a Repo object for the first parent directory that looks like a
+        Git repository.
+
+        :param start: The directory to start discovery from (defaults to '.')
+        """
+        remaining = True
+        path = os.path.abspath(start)
+        while remaining:
+            try:
+                return cls(path)
+            except NotGitRepository:
+                path, remaining = os.path.split(path)
+        raise NotGitRepository(
+            "No git repository was found at %(path)s" % dict(path=start)
+        )
+
+    def controldir(self):
+        """Return the path of the control directory."""
+        return self._controldir
+
+    def commondir(self):
+        """Return the path of the common directory.
+
+        For a main working tree, it is identical to controldir().
+
+        For a linked working tree, it is the control directory of the
+        main working tree."""
+
+        return self._commondir
+
+    def _determine_file_mode(self):
+        """Probe the file-system to determine whether permissions can be trusted.
+
+        :return: True if permissions can be trusted, False otherwise.
+        """
+        fname = os.path.join(self.path, '.probe-permissions')
+        with open(fname, 'w') as f:
+            f.write('')
+
+        st1 = os.lstat(fname)
+        os.chmod(fname, st1.st_mode ^ stat.S_IXUSR)
+        st2 = os.lstat(fname)
+
+        os.unlink(fname)
+
+        mode_differs = st1.st_mode != st2.st_mode
+        st2_has_exec = (st2.st_mode & stat.S_IXUSR) != 0
+
+        return mode_differs and st2_has_exec
+
+    def _put_named_file(self, path, contents):
+        """Write a file to the control dir with the given name and contents.
+
+        :param path: The path to the file, relative to the control dir.
+        :param contents: A string to write to the file.
+        """
+        path = path.lstrip(os.path.sep)
+        with GitFile(os.path.join(self.controldir(), path), 'wb') as f:
+            f.write(contents)
+
+    def get_named_file(self, path, basedir=None):
+        """Get a file from the control dir with a specific name.
+
+        Although the filename should be interpreted as a filename relative to
+        the control dir in a disk-based Repo, the object returned need not be
+        pointing to a file in that location.
+
+        :param path: The path to the file, relative to the control dir.
+        :param basedir: Optional argument that specifies an alternative to the
+            control dir.
+        :return: An open file object, or None if the file does not exist.
+        """
+        # TODO(dborowitz): sanitize filenames, since this is used directly by
+        # the dumb web serving code.
+        if basedir is None:
+            basedir = self.controldir()
+        path = path.lstrip(os.path.sep)
+        try:
+            return open(os.path.join(basedir, path), 'rb')
+        except (IOError, OSError) as e:
+            if e.errno == errno.ENOENT:
+                return None
+            raise
+
+    def index_path(self):
+        """Return path to the index file."""
+        return os.path.join(self.controldir(), INDEX_FILENAME)
+
+    def open_index(self):
+        """Open the index for this repository.
+
+        :raise NoIndexPresent: If no index is present
+        :return: The matching `Index`
+        """
+        from dulwich.index import Index
+        if not self.has_index():
+            raise NoIndexPresent()
+        return Index(self.index_path())
+
+    def has_index(self):
+        """Check if an index is present."""
+        # Bare repos must never have index files; non-bare repos may have a
+        # missing index file, which is treated as empty.
+        return not self.bare
+
+    def stage(self, fs_paths):
+        """Stage a set of paths.
+
+        :param fs_paths: List of paths, relative to the repository path
+        """
+
+        root_path_bytes = self.path.encode(sys.getfilesystemencoding())
+
+        if not isinstance(fs_paths, list):
+            fs_paths = [fs_paths]
+        from dulwich.index import (
+            blob_from_path_and_stat,
+            index_entry_from_stat,
+            _fs_to_tree_path,
+            )
+        index = self.open_index()
+        for fs_path in fs_paths:
+            if not isinstance(fs_path, bytes):
+                fs_path = fs_path.encode(sys.getfilesystemencoding())
+            if os.path.isabs(fs_path):
+                raise ValueError(
+                    "path %r should be relative to "
+                    "repository root, not absolute" % fs_path)
+            tree_path = _fs_to_tree_path(fs_path)
+            full_path = os.path.join(root_path_bytes, fs_path)
+            try:
+                st = os.lstat(full_path)
+            except OSError:
+                # File no longer exists
+                try:
+                    del index[tree_path]
+                except KeyError:
+                    pass  # already removed
+            else:
+                blob = blob_from_path_and_stat(full_path, st)
+                self.object_store.add_object(blob)
+                index[tree_path] = index_entry_from_stat(st, blob.id, 0)
+        index.write()
+
+    def clone(self, target_path, mkdir=True, bare=False,
+              origin=b"origin"):
+        """Clone this repository.
+
+        :param target_path: Target path
+        :param mkdir: Create the target directory
+        :param bare: Whether to create a bare repository
+        :param origin: Base name for refs in target repository
+            cloned from this repository
+        :return: Created repository as `Repo`
+        """
+        if not bare:
+            target = self.init(target_path, mkdir=mkdir)
+        else:
+            target = self.init_bare(target_path, mkdir=mkdir)
+        self.fetch(target)
+        target.refs.import_refs(
+            b'refs/remotes/' + origin, self.refs.as_dict(b'refs/heads'))
+        target.refs.import_refs(
+            b'refs/tags', self.refs.as_dict(b'refs/tags'))
+        try:
+            target.refs.add_if_new(DEFAULT_REF, self.refs[DEFAULT_REF])
+        except KeyError:
+            pass
+        target_config = target.get_config()
+        encoded_path = self.path
+        if not isinstance(encoded_path, bytes):
+            encoded_path = encoded_path.encode(sys.getfilesystemencoding())
+        target_config.set((b'remote', b'origin'), b'url', encoded_path)
+        target_config.set((b'remote', b'origin'), b'fetch',
+                          b'+refs/heads/*:refs/remotes/origin/*')
+        target_config.write_to_path()
+
+        # Update target head
+        head_chain, head_sha = self.refs.follow(b'HEAD')
+        if head_chain and head_sha is not None:
+            target.refs.set_symbolic_ref(b'HEAD', head_chain[-1])
+            target[b'HEAD'] = head_sha
+
+            if not bare:
+                # Checkout HEAD to target dir
+                target.reset_index()
+
+        return target
+
+    def reset_index(self, tree=None):
+        """Reset the index back to a specific tree.
+
+        :param tree: Tree SHA to reset to, None for current HEAD tree.
+        """
+        from dulwich.index import (
+            build_index_from_tree,
+            validate_path_element_default,
+            validate_path_element_ntfs,
+            )
+        if tree is None:
+            tree = self[b'HEAD'].tree
+        config = self.get_config()
+        honor_filemode = config.get_boolean(
+            'core', 'filemode', os.name != "nt")
+        if config.get_boolean('core', 'core.protectNTFS', os.name == "nt"):
+            validate_path_element = validate_path_element_ntfs
+        else:
+            validate_path_element = validate_path_element_default
+        return build_index_from_tree(
+            self.path, self.index_path(), self.object_store, tree,
+            honor_filemode=honor_filemode,
+            validate_path_element=validate_path_element)
+
+    def get_config(self):
+        """Retrieve the config object.
+
+        :return: `ConfigFile` object for the ``.git/config`` file.
+        """
+        from dulwich.config import ConfigFile
+        path = os.path.join(self._controldir, 'config')
+        try:
+            return ConfigFile.from_path(path)
+        except (IOError, OSError) as e:
+            if e.errno != errno.ENOENT:
+                raise
+            ret = ConfigFile()
+            ret.path = path
+            return ret
+
+    def get_description(self):
+        """Retrieve the description of this repository.
+
+        :return: A string describing the repository or None.
+        """
+        path = os.path.join(self._controldir, 'description')
+        try:
+            with GitFile(path, 'rb') as f:
+                return f.read()
+        except (IOError, OSError) as e:
+            if e.errno != errno.ENOENT:
+                raise
+            return None
+
+    def __repr__(self):
+        return "<Repo at %r>" % self.path
+
+    def set_description(self, description):
+        """Set the description for this repository.
+
+        :param description: Text to set as description for this repository.
+        """
+
+        self._put_named_file('description', description)
+
+    @classmethod
+    def _init_maybe_bare(cls, path, bare):
+        for d in BASE_DIRECTORIES:
+            os.mkdir(os.path.join(path, *d))
+        DiskObjectStore.init(os.path.join(path, OBJECTDIR))
+        ret = cls(path)
+        ret.refs.set_symbolic_ref(b'HEAD', DEFAULT_REF)
+        ret._init_files(bare)
+        return ret
+
+    @classmethod
+    def init(cls, path, mkdir=False):
+        """Create a new repository.
+
+        :param path: Path in which to create the repository
+        :param mkdir: Whether to create the directory
+        :return: `Repo` instance
+        """
+        if mkdir:
+            os.mkdir(path)
+        controldir = os.path.join(path, CONTROLDIR)
+        os.mkdir(controldir)
+        cls._init_maybe_bare(controldir, False)
+        return cls(path)
+
+    @classmethod
+    def _init_new_working_directory(cls, path, main_repo, identifier=None,
+                                    mkdir=False):
+        """Create a new working directory linked to a repository.
+
+        :param path: Path in which to create the working tree.
+        :param main_repo: Main repository to reference
+        :param identifier: Worktree identifier
+        :param mkdir: Whether to create the directory
+        :return: `Repo` instance
+        """
+        if mkdir:
+            os.mkdir(path)
+        if identifier is None:
+            identifier = os.path.basename(path)
+        main_worktreesdir = os.path.join(main_repo.controldir(), WORKTREES)
+        worktree_controldir = os.path.join(main_worktreesdir, identifier)
+        gitdirfile = os.path.join(path, CONTROLDIR)
+        with open(gitdirfile, 'wb') as f:
+            f.write(b'gitdir: ' +
+                    worktree_controldir.encode(sys.getfilesystemencoding()) +
+                    b'\n')
+        try:
+            os.mkdir(main_worktreesdir)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+        try:
+            os.mkdir(worktree_controldir)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+        with open(os.path.join(worktree_controldir, GITDIR), 'wb') as f:
+            f.write(gitdirfile.encode(sys.getfilesystemencoding()) + b'\n')
+        with open(os.path.join(worktree_controldir, COMMONDIR), 'wb') as f:
+            f.write(b'../..\n')
+        with open(os.path.join(worktree_controldir, 'HEAD'), 'wb') as f:
+            f.write(main_repo.head() + b'\n')
+        r = cls(path)
+        r.reset_index()
+        return r
+
+    @classmethod
+    def init_bare(cls, path, mkdir=False):
+        """Create a new bare repository.
+
+        ``path`` should already exist and be an empty directory.
+
+        :param path: Path to create bare repository in
+        :return: a `Repo` instance
+        """
+        if mkdir:
+            os.mkdir(path)
+        return cls._init_maybe_bare(path, True)
+
+    create = init_bare
+
+    def close(self):
+        """Close any files opened by this repository."""
+        self.object_store.close()
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+
+class MemoryRepo(BaseRepo):
+    """Repo that stores refs, objects, and named files in memory.
+
+    MemoryRepos are always bare: they have no working tree and no index, since
+    those have a stronger dependency on the filesystem.
+    """
+
+    def __init__(self):
+        from dulwich.config import ConfigFile
+        BaseRepo.__init__(self, MemoryObjectStore(), DictRefsContainer({}))
+        self._named_files = {}
+        self.bare = True
+        self._config = ConfigFile()
+        self._description = None
+
+    def set_description(self, description):
+        self._description = description
+
+    def get_description(self):
+        return self._description
+
+    def _determine_file_mode(self):
+        """Probe the file-system to determine whether permissions can be trusted.
+
+        :return: True if permissions can be trusted, False otherwise.
+        """
+        return sys.platform != 'win32'
+
+    def _put_named_file(self, path, contents):
+        """Write a file to the control dir with the given name and contents.
+
+        :param path: The path to the file, relative to the control dir.
+        :param contents: A string to write to the file.
+        """
+        self._named_files[path] = contents
+
+    def get_named_file(self, path):
+        """Get a file from the control dir with a specific name.
+
+        Although the filename should be interpreted as a filename relative to
+        the control dir in a disk-baked Repo, the object returned need not be
+        pointing to a file in that location.
+
+        :param path: The path to the file, relative to the control dir.
+        :return: An open file object, or None if the file does not exist.
+        """
+        contents = self._named_files.get(path, None)
+        if contents is None:
+            return None
+        return BytesIO(contents)
+
+    def open_index(self):
+        """Fail to open index for this repo, since it is bare.
+
+        :raise NoIndexPresent: Raised when no index is present
+        """
+        raise NoIndexPresent()
+
+    def get_config(self):
+        """Retrieve the config object.
+
+        :return: `ConfigFile` object.
+        """
+        return self._config
+
+    @classmethod
+    def init_bare(cls, objects, refs):
+        """Create a new bare repository in memory.
+
+        :param objects: Objects for the new repository,
+            as iterable
+        :param refs: Refs as dictionary, mapping names
+            to object SHA1s
+        """
+        ret = cls()
+        for obj in objects:
+            ret.object_store.add_object(obj)
+        for refname, sha in refs.items():
+            ret.refs[refname] = sha
+        ret._init_files(bare=True)
+        return ret

+ 1138 - 0
dulwich/server.py

@@ -0,0 +1,1138 @@
+# server.py -- Implementation of the server side git protocols
+# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
+# Coprygith (C) 2011-2012 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Git smart network protocol server implementation.
+
+For more detailed implementation on the network protocol, see the
+Documentation/technical directory in the cgit distribution, and in particular:
+
+* Documentation/technical/protocol-capabilities.txt
+* Documentation/technical/pack-protocol.txt
+
+Currently supported capabilities:
+
+ * include-tag
+ * thin-pack
+ * multi_ack_detailed
+ * multi_ack
+ * side-band-64k
+ * ofs-delta
+ * no-progress
+ * report-status
+ * delete-refs
+ * shallow
+"""
+
+import collections
+import os
+import socket
+import sys
+import zlib
+
+try:
+    import SocketServer
+except ImportError:
+    import socketserver as SocketServer
+
+from dulwich.errors import (
+    ApplyDeltaError,
+    ChecksumMismatch,
+    GitProtocolError,
+    NotGitRepository,
+    UnexpectedCommandError,
+    ObjectFormatException,
+    )
+from dulwich import log_utils
+from dulwich.objects import (
+    Commit,
+    valid_hexsha,
+    )
+from dulwich.pack import (
+    write_pack_objects,
+    )
+from dulwich.protocol import (  # noqa: F401
+    BufferedPktLineWriter,
+    capability_agent,
+    CAPABILITIES_REF,
+    CAPABILITY_DELETE_REFS,
+    CAPABILITY_INCLUDE_TAG,
+    CAPABILITY_MULTI_ACK_DETAILED,
+    CAPABILITY_MULTI_ACK,
+    CAPABILITY_NO_DONE,
+    CAPABILITY_NO_PROGRESS,
+    CAPABILITY_OFS_DELTA,
+    CAPABILITY_QUIET,
+    CAPABILITY_REPORT_STATUS,
+    CAPABILITY_SHALLOW,
+    CAPABILITY_SIDE_BAND_64K,
+    CAPABILITY_THIN_PACK,
+    COMMAND_DEEPEN,
+    COMMAND_DONE,
+    COMMAND_HAVE,
+    COMMAND_SHALLOW,
+    COMMAND_UNSHALLOW,
+    COMMAND_WANT,
+    MULTI_ACK,
+    MULTI_ACK_DETAILED,
+    Protocol,
+    ProtocolFile,
+    ReceivableProtocol,
+    SIDE_BAND_CHANNEL_DATA,
+    SIDE_BAND_CHANNEL_PROGRESS,
+    SIDE_BAND_CHANNEL_FATAL,
+    SINGLE_ACK,
+    TCP_GIT_PORT,
+    ZERO_SHA,
+    ack_type,
+    extract_capabilities,
+    extract_want_line_capabilities,
+    )
+from dulwich.refs import (
+    ANNOTATED_TAG_SUFFIX,
+    write_info_refs,
+    )
+from dulwich.repo import (
+    Repo,
+    )
+
+
+logger = log_utils.getLogger(__name__)
+
+
+class Backend(object):
+    """A backend for the Git smart server implementation."""
+
+    def open_repository(self, path):
+        """Open the repository at a path.
+
+        :param path: Path to the repository
+        :raise NotGitRepository: no git repository was found at path
+        :return: Instance of BackendRepo
+        """
+        raise NotImplementedError(self.open_repository)
+
+
+class BackendRepo(object):
+    """Repository abstraction used by the Git server.
+
+    The methods required here are a subset of those provided by
+    dulwich.repo.Repo.
+    """
+
+    object_store = None
+    refs = None
+
+    def get_refs(self):
+        """
+        Get all the refs in the repository
+
+        :return: dict of name -> sha
+        """
+        raise NotImplementedError
+
+    def get_peeled(self, name):
+        """Return the cached peeled value of a ref, if available.
+
+        :param name: Name of the ref to peel
+        :return: The peeled value of the ref. If the ref is known not point to
+            a tag, this will be the SHA the ref refers to. If no cached
+            information about a tag is available, this method may return None,
+            but it should attempt to peel the tag if possible.
+        """
+        return None
+
+    def fetch_objects(self, determine_wants, graph_walker, progress,
+                      get_tagged=None):
+        """
+        Yield the objects required for a list of commits.
+
+        :param progress: is a callback to send progress messages to the client
+        :param get_tagged: Function that returns a dict of pointed-to sha ->
+            tag sha for including tags.
+        """
+        raise NotImplementedError
+
+
+class DictBackend(Backend):
+    """Trivial backend that looks up Git repositories in a dictionary."""
+
+    def __init__(self, repos):
+        self.repos = repos
+
+    def open_repository(self, path):
+        logger.debug('Opening repository at %s', path)
+        try:
+            return self.repos[path]
+        except KeyError:
+            raise NotGitRepository(
+                "No git repository was found at %(path)s" % dict(path=path)
+            )
+
+
+class FileSystemBackend(Backend):
+    """Simple backend looking up Git repositories in the local file system."""
+
+    def __init__(self, root=os.sep):
+        super(FileSystemBackend, self).__init__()
+        self.root = (os.path.abspath(root) + os.sep).replace(
+                os.sep * 2, os.sep)
+
+    def open_repository(self, path):
+        logger.debug('opening repository at %s', path)
+        abspath = os.path.abspath(os.path.join(self.root, path)) + os.sep
+        normcase_abspath = os.path.normcase(abspath)
+        normcase_root = os.path.normcase(self.root)
+        if not normcase_abspath.startswith(normcase_root):
+            raise NotGitRepository(
+                    "Path %r not inside root %r" %
+                    (path, self.root))
+        return Repo(abspath)
+
+
+class Handler(object):
+    """Smart protocol command handler base class."""
+
+    def __init__(self, backend, proto, http_req=None):
+        self.backend = backend
+        self.proto = proto
+        self.http_req = http_req
+
+    def handle(self):
+        raise NotImplementedError(self.handle)
+
+
+class PackHandler(Handler):
+    """Protocol handler for packs."""
+
+    def __init__(self, backend, proto, http_req=None):
+        super(PackHandler, self).__init__(backend, proto, http_req)
+        self._client_capabilities = None
+        # Flags needed for the no-done capability
+        self._done_received = False
+
+    @classmethod
+    def capability_line(cls):
+        return b"".join([b" " + c for c in cls.capabilities()])
+
+    @classmethod
+    def capabilities(cls):
+        raise NotImplementedError(cls.capabilities)
+
+    @classmethod
+    def innocuous_capabilities(cls):
+        return (CAPABILITY_INCLUDE_TAG, CAPABILITY_THIN_PACK,
+                CAPABILITY_NO_PROGRESS, CAPABILITY_OFS_DELTA,
+                capability_agent())
+
+    @classmethod
+    def required_capabilities(cls):
+        """Return a list of capabilities that we require the client to have."""
+        return []
+
+    def set_client_capabilities(self, caps):
+        allowable_caps = set(self.innocuous_capabilities())
+        allowable_caps.update(self.capabilities())
+        for cap in caps:
+            if cap not in allowable_caps:
+                raise GitProtocolError('Client asked for capability %s that '
+                                       'was not advertised.' % cap)
+        for cap in self.required_capabilities():
+            if cap not in caps:
+                raise GitProtocolError('Client does not support required '
+                                       'capability %s.' % cap)
+        self._client_capabilities = set(caps)
+        logger.info('Client capabilities: %s', caps)
+
+    def has_capability(self, cap):
+        if self._client_capabilities is None:
+            raise GitProtocolError('Server attempted to access capability %s '
+                                   'before asking client' % cap)
+        return cap in self._client_capabilities
+
+    def notify_done(self):
+        self._done_received = True
+
+
+class UploadPackHandler(PackHandler):
+    """Protocol handler for uploading a pack to the client."""
+
+    def __init__(self, backend, args, proto, http_req=None,
+                 advertise_refs=False):
+        super(UploadPackHandler, self).__init__(
+                backend, proto, http_req=http_req)
+        self.repo = backend.open_repository(args[0])
+        self._graph_walker = None
+        self.advertise_refs = advertise_refs
+        # A state variable for denoting that the have list is still
+        # being processed, and the client is not accepting any other
+        # data (such as side-band, see the progress method here).
+        self._processing_have_lines = False
+
+    @classmethod
+    def capabilities(cls):
+        return (CAPABILITY_MULTI_ACK_DETAILED, CAPABILITY_MULTI_ACK,
+                CAPABILITY_SIDE_BAND_64K, CAPABILITY_THIN_PACK,
+                CAPABILITY_OFS_DELTA, CAPABILITY_NO_PROGRESS,
+                CAPABILITY_INCLUDE_TAG, CAPABILITY_SHALLOW, CAPABILITY_NO_DONE)
+
+    @classmethod
+    def required_capabilities(cls):
+        return (CAPABILITY_SIDE_BAND_64K, CAPABILITY_THIN_PACK,
+                CAPABILITY_OFS_DELTA)
+
+    def progress(self, message):
+        if (self.has_capability(CAPABILITY_NO_PROGRESS) or
+                self._processing_have_lines):
+            return
+        self.proto.write_sideband(SIDE_BAND_CHANNEL_PROGRESS, message)
+
+    def get_tagged(self, refs=None, repo=None):
+        """Get a dict of peeled values of tags to their original tag shas.
+
+        :param refs: dict of refname -> sha of possible tags; defaults to all
+            of the backend's refs.
+        :param repo: optional Repo instance for getting peeled refs; defaults
+            to the backend's repo, if available
+        :return: dict of peeled_sha -> tag_sha, where tag_sha is the sha of a
+            tag whose peeled value is peeled_sha.
+        """
+        if not self.has_capability(CAPABILITY_INCLUDE_TAG):
+            return {}
+        if refs is None:
+            refs = self.repo.get_refs()
+        if repo is None:
+            repo = getattr(self.repo, "repo", None)
+            if repo is None:
+                # Bail if we don't have a Repo available; this is ok since
+                # clients must be able to handle if the server doesn't include
+                # all relevant tags.
+                # TODO: fix behavior when missing
+                return {}
+        tagged = {}
+        for name, sha in refs.items():
+            peeled_sha = repo.get_peeled(name)
+            if peeled_sha != sha:
+                tagged[peeled_sha] = sha
+        return tagged
+
+    def handle(self):
+        def write(x):
+            return self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, x)
+
+        graph_walker = ProtocolGraphWalker(
+                self, self.repo.object_store, self.repo.get_peeled)
+        objects_iter = self.repo.fetch_objects(
+            graph_walker.determine_wants, graph_walker, self.progress,
+            get_tagged=self.get_tagged)
+
+        # Note the fact that client is only processing responses related
+        # to the have lines it sent, and any other data (including side-
+        # band) will be be considered a fatal error.
+        self._processing_have_lines = True
+
+        # Did the process short-circuit (e.g. in a stateless RPC call)? Note
+        # that the client still expects a 0-object pack in most cases.
+        # Also, if it also happens that the object_iter is instantiated
+        # with a graph walker with an implementation that talks over the
+        # wire (which is this instance of this class) this will actually
+        # iterate through everything and write things out to the wire.
+        if len(objects_iter) == 0:
+            return
+
+        # The provided haves are processed, and it is safe to send side-
+        # band data now.
+        self._processing_have_lines = False
+
+        if not graph_walker.handle_done(
+                not self.has_capability(CAPABILITY_NO_DONE),
+                self._done_received):
+            return
+
+        self.progress(b"dul-daemon says what\n")
+        self.progress(
+                ("counting objects: %d, done.\n" % len(objects_iter)).encode(
+                    'ascii'))
+        write_pack_objects(ProtocolFile(None, write), objects_iter)
+        self.progress(b"how was that, then?\n")
+        # we are done
+        self.proto.write_pkt_line(None)
+
+
+def _split_proto_line(line, allowed):
+    """Split a line read from the wire.
+
+    :param line: The line read from the wire.
+    :param allowed: An iterable of command names that should be allowed.
+        Command names not listed below as possible return values will be
+        ignored.  If None, any commands from the possible return values are
+        allowed.
+    :return: a tuple having one of the following forms:
+        ('want', obj_id)
+        ('have', obj_id)
+        ('done', None)
+        (None, None)  (for a flush-pkt)
+
+    :raise UnexpectedCommandError: if the line cannot be parsed into one of the
+        allowed return values.
+    """
+    if not line:
+        fields = [None]
+    else:
+        fields = line.rstrip(b'\n').split(b' ', 1)
+    command = fields[0]
+    if allowed is not None and command not in allowed:
+        raise UnexpectedCommandError(command)
+    if len(fields) == 1 and command in (COMMAND_DONE, None):
+        return (command, None)
+    elif len(fields) == 2:
+        if command in (COMMAND_WANT, COMMAND_HAVE, COMMAND_SHALLOW,
+                       COMMAND_UNSHALLOW):
+            if not valid_hexsha(fields[1]):
+                raise GitProtocolError("Invalid sha")
+            return tuple(fields)
+        elif command == COMMAND_DEEPEN:
+            return command, int(fields[1])
+    raise GitProtocolError('Received invalid line from client: %r' % line)
+
+
+def _find_shallow(store, heads, depth):
+    """Find shallow commits according to a given depth.
+
+    :param store: An ObjectStore for looking up objects.
+    :param heads: Iterable of head SHAs to start walking from.
+    :param depth: The depth of ancestors to include. A depth of one includes
+        only the heads themselves.
+    :return: A tuple of (shallow, not_shallow), sets of SHAs that should be
+        considered shallow and unshallow according to the arguments. Note that
+        these sets may overlap if a commit is reachable along multiple paths.
+    """
+    parents = {}
+
+    def get_parents(sha):
+        result = parents.get(sha, None)
+        if not result:
+            result = store[sha].parents
+            parents[sha] = result
+        return result
+
+    todo = []  # stack of (sha, depth)
+    for head_sha in heads:
+        obj = store.peel_sha(head_sha)
+        if isinstance(obj, Commit):
+            todo.append((obj.id, 1))
+
+    not_shallow = set()
+    shallow = set()
+    while todo:
+        sha, cur_depth = todo.pop()
+        if cur_depth < depth:
+            not_shallow.add(sha)
+            new_depth = cur_depth + 1
+            todo.extend((p, new_depth) for p in get_parents(sha))
+        else:
+            shallow.add(sha)
+
+    return shallow, not_shallow
+
+
+def _want_satisfied(store, haves, want, earliest):
+    o = store[want]
+    pending = collections.deque([o])
+    known = set([want])
+    while pending:
+        commit = pending.popleft()
+        if commit.id in haves:
+            return True
+        if commit.type_name != b"commit":
+            # non-commit wants are assumed to be satisfied
+            continue
+        for parent in commit.parents:
+            if parent in known:
+                continue
+            known.add(parent)
+            parent_obj = store[parent]
+            # TODO: handle parents with later commit times than children
+            if parent_obj.commit_time >= earliest:
+                pending.append(parent_obj)
+    return False
+
+
+def _all_wants_satisfied(store, haves, wants):
+    """Check whether all the current wants are satisfied by a set of haves.
+
+    :param store: Object store to retrieve objects from
+    :param haves: A set of commits we know the client has.
+    :param wants: A set of commits the client wants
+    :note: Wants are specified with set_wants rather than passed in since
+        in the current interface they are determined outside this class.
+    """
+    haves = set(haves)
+    if haves:
+        earliest = min([store[h].commit_time for h in haves])
+    else:
+        earliest = 0
+    for want in wants:
+        if not _want_satisfied(store, haves, want, earliest):
+            return False
+
+    return True
+
+
+class ProtocolGraphWalker(object):
+    """A graph walker that knows the git protocol.
+
+    As a graph walker, this class implements ack(), next(), and reset(). It
+    also contains some base methods for interacting with the wire and walking
+    the commit tree.
+
+    The work of determining which acks to send is passed on to the
+    implementation instance stored in _impl. The reason for this is that we do
+    not know at object creation time what ack level the protocol requires. A
+    call to set_ack_type() is required to set up the implementation, before
+    any calls to next() or ack() are made.
+    """
+    def __init__(self, handler, object_store, get_peeled):
+        self.handler = handler
+        self.store = object_store
+        self.get_peeled = get_peeled
+        self.proto = handler.proto
+        self.http_req = handler.http_req
+        self.advertise_refs = handler.advertise_refs
+        self._wants = []
+        self.shallow = set()
+        self.client_shallow = set()
+        self.unshallow = set()
+        self._cached = False
+        self._cache = []
+        self._cache_index = 0
+        self._impl = None
+
+    def determine_wants(self, heads):
+        """Determine the wants for a set of heads.
+
+        The given heads are advertised to the client, who then specifies which
+        refs he wants using 'want' lines. This portion of the protocol is the
+        same regardless of ack type, and in fact is used to set the ack type of
+        the ProtocolGraphWalker.
+
+        If the client has the 'shallow' capability, this method also reads and
+        responds to the 'shallow' and 'deepen' lines from the client. These are
+        not part of the wants per se, but they set up necessary state for
+        walking the graph. Additionally, later code depends on this method
+        consuming everything up to the first 'have' line.
+
+        :param heads: a dict of refname->SHA1 to advertise
+        :return: a list of SHA1s requested by the client
+        """
+        values = set(heads.values())
+        if self.advertise_refs or not self.http_req:
+            for i, (ref, sha) in enumerate(sorted(heads.items())):
+                line = sha + b' ' + ref
+                if not i:
+                    line += b'\x00' + self.handler.capability_line()
+                self.proto.write_pkt_line(line + b'\n')
+                peeled_sha = self.get_peeled(ref)
+                if peeled_sha != sha:
+                    self.proto.write_pkt_line(
+                        peeled_sha + b' ' + ref + ANNOTATED_TAG_SUFFIX + b'\n')
+
+            # i'm done..
+            self.proto.write_pkt_line(None)
+
+            if self.advertise_refs:
+                return []
+
+        # Now client will sending want want want commands
+        want = self.proto.read_pkt_line()
+        if not want:
+            return []
+        line, caps = extract_want_line_capabilities(want)
+        self.handler.set_client_capabilities(caps)
+        self.set_ack_type(ack_type(caps))
+        allowed = (COMMAND_WANT, COMMAND_SHALLOW, COMMAND_DEEPEN, None)
+        command, sha = _split_proto_line(line, allowed)
+
+        want_revs = []
+        while command == COMMAND_WANT:
+            if sha not in values:
+                raise GitProtocolError(
+                  'Client wants invalid object %s' % sha)
+            want_revs.append(sha)
+            command, sha = self.read_proto_line(allowed)
+
+        self.set_wants(want_revs)
+        if command in (COMMAND_SHALLOW, COMMAND_DEEPEN):
+            self.unread_proto_line(command, sha)
+            self._handle_shallow_request(want_revs)
+
+        if self.http_req and self.proto.eof():
+            # The client may close the socket at this point, expecting a
+            # flush-pkt from the server. We might be ready to send a packfile
+            # at this point, so we need to explicitly short-circuit in this
+            # case.
+            return []
+
+        return want_revs
+
+    def unread_proto_line(self, command, value):
+        if isinstance(value, int):
+            value = str(value).encode('ascii')
+        self.proto.unread_pkt_line(command + b' ' + value)
+
+    def ack(self, have_ref):
+        if len(have_ref) != 40:
+            raise ValueError("invalid sha %r" % have_ref)
+        return self._impl.ack(have_ref)
+
+    def reset(self):
+        self._cached = True
+        self._cache_index = 0
+
+    def next(self):
+        if not self._cached:
+            if not self._impl and self.http_req:
+                return None
+            return next(self._impl)
+        self._cache_index += 1
+        if self._cache_index > len(self._cache):
+            return None
+        return self._cache[self._cache_index]
+
+    __next__ = next
+
+    def read_proto_line(self, allowed):
+        """Read a line from the wire.
+
+        :param allowed: An iterable of command names that should be allowed.
+        :return: A tuple of (command, value); see _split_proto_line.
+        :raise UnexpectedCommandError: If an error occurred reading the line.
+        """
+        return _split_proto_line(self.proto.read_pkt_line(), allowed)
+
+    def _handle_shallow_request(self, wants):
+        while True:
+            command, val = self.read_proto_line(
+                    (COMMAND_DEEPEN, COMMAND_SHALLOW))
+            if command == COMMAND_DEEPEN:
+                depth = val
+                break
+            self.client_shallow.add(val)
+        self.read_proto_line((None,))  # consume client's flush-pkt
+
+        shallow, not_shallow = _find_shallow(self.store, wants, depth)
+
+        # Update self.shallow instead of reassigning it since we passed a
+        # reference to it before this method was called.
+        self.shallow.update(shallow - not_shallow)
+        new_shallow = self.shallow - self.client_shallow
+        unshallow = self.unshallow = not_shallow & self.client_shallow
+
+        for sha in sorted(new_shallow):
+            self.proto.write_pkt_line(COMMAND_SHALLOW + b' ' + sha)
+        for sha in sorted(unshallow):
+            self.proto.write_pkt_line(COMMAND_UNSHALLOW + b' ' + sha)
+
+        self.proto.write_pkt_line(None)
+
+    def notify_done(self):
+        # relay the message down to the handler.
+        self.handler.notify_done()
+
+    def send_ack(self, sha, ack_type=b''):
+        if ack_type:
+            ack_type = b' ' + ack_type
+        self.proto.write_pkt_line(b'ACK ' + sha + ack_type + b'\n')
+
+    def send_nak(self):
+        self.proto.write_pkt_line(b'NAK\n')
+
+    def handle_done(self, done_required, done_received):
+        # Delegate this to the implementation.
+        return self._impl.handle_done(done_required, done_received)
+
+    def set_wants(self, wants):
+        self._wants = wants
+
+    def all_wants_satisfied(self, haves):
+        """Check whether all the current wants are satisfied by a set of haves.
+
+        :param haves: A set of commits we know the client has.
+        :note: Wants are specified with set_wants rather than passed in since
+            in the current interface they are determined outside this class.
+        """
+        return _all_wants_satisfied(self.store, haves, self._wants)
+
+    def set_ack_type(self, ack_type):
+        impl_classes = {
+          MULTI_ACK: MultiAckGraphWalkerImpl,
+          MULTI_ACK_DETAILED: MultiAckDetailedGraphWalkerImpl,
+          SINGLE_ACK: SingleAckGraphWalkerImpl,
+          }
+        self._impl = impl_classes[ack_type](self)
+
+
+_GRAPH_WALKER_COMMANDS = (COMMAND_HAVE, COMMAND_DONE, None)
+
+
+class SingleAckGraphWalkerImpl(object):
+    """Graph walker implementation that speaks the single-ack protocol."""
+
+    def __init__(self, walker):
+        self.walker = walker
+        self._common = []
+
+    def ack(self, have_ref):
+        if not self._common:
+            self.walker.send_ack(have_ref)
+            self._common.append(have_ref)
+
+    def next(self):
+        command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
+        if command in (None, COMMAND_DONE):
+            # defer the handling of done
+            self.walker.notify_done()
+            return None
+        elif command == COMMAND_HAVE:
+            return sha
+
+    __next__ = next
+
+    def handle_done(self, done_required, done_received):
+        if not self._common:
+            self.walker.send_nak()
+
+        if done_required and not done_received:
+            # we are not done, especially when done is required; skip
+            # the pack for this request and especially do not handle
+            # the done.
+            return False
+
+        if not done_received and not self._common:
+            # Okay we are not actually done then since the walker picked
+            # up no haves.  This is usually triggered when client attempts
+            # to pull from a source that has no common base_commit.
+            # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
+            #          test_multi_ack_stateless_nodone
+            return False
+
+        return True
+
+
+class MultiAckGraphWalkerImpl(object):
+    """Graph walker implementation that speaks the multi-ack protocol."""
+
+    def __init__(self, walker):
+        self.walker = walker
+        self._found_base = False
+        self._common = []
+
+    def ack(self, have_ref):
+        self._common.append(have_ref)
+        if not self._found_base:
+            self.walker.send_ack(have_ref, b'continue')
+            if self.walker.all_wants_satisfied(self._common):
+                self._found_base = True
+        # else we blind ack within next
+
+    def next(self):
+        while True:
+            command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
+            if command is None:
+                self.walker.send_nak()
+                # in multi-ack mode, a flush-pkt indicates the client wants to
+                # flush but more have lines are still coming
+                continue
+            elif command == COMMAND_DONE:
+                self.walker.notify_done()
+                return None
+            elif command == COMMAND_HAVE:
+                if self._found_base:
+                    # blind ack
+                    self.walker.send_ack(sha, b'continue')
+                return sha
+
+    __next__ = next
+
+    def handle_done(self, done_required, done_received):
+        if done_required and not done_received:
+            # we are not done, especially when done is required; skip
+            # the pack for this request and especially do not handle
+            # the done.
+            return False
+
+        if not done_received and not self._common:
+            # Okay we are not actually done then since the walker picked
+            # up no haves.  This is usually triggered when client attempts
+            # to pull from a source that has no common base_commit.
+            # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
+            #          test_multi_ack_stateless_nodone
+            return False
+
+        # don't nak unless no common commits were found, even if not
+        # everything is satisfied
+        if self._common:
+            self.walker.send_ack(self._common[-1])
+        else:
+            self.walker.send_nak()
+        return True
+
+
+class MultiAckDetailedGraphWalkerImpl(object):
+    """Graph walker implementation speaking the multi-ack-detailed protocol."""
+
+    def __init__(self, walker):
+        self.walker = walker
+        self._common = []
+
+    def ack(self, have_ref):
+        # Should only be called iff have_ref is common
+        self._common.append(have_ref)
+        self.walker.send_ack(have_ref, b'common')
+
+    def next(self):
+        while True:
+            command, sha = self.walker.read_proto_line(_GRAPH_WALKER_COMMANDS)
+            if command is None:
+                if self.walker.all_wants_satisfied(self._common):
+                    self.walker.send_ack(self._common[-1], b'ready')
+                self.walker.send_nak()
+                if self.walker.http_req:
+                    # The HTTP version of this request a flush-pkt always
+                    # signifies an end of request, so we also return
+                    # nothing here as if we are done (but not really, as
+                    # it depends on whether no-done capability was
+                    # specified and that's handled in handle_done which
+                    # may or may not call post_nodone_check depending on
+                    # that).
+                    return None
+            elif command == COMMAND_DONE:
+                # Let the walker know that we got a done.
+                self.walker.notify_done()
+                break
+            elif command == COMMAND_HAVE:
+                # return the sha and let the caller ACK it with the
+                # above ack method.
+                return sha
+        # don't nak unless no common commits were found, even if not
+        # everything is satisfied
+
+    __next__ = next
+
+    def handle_done(self, done_required, done_received):
+        if done_required and not done_received:
+            # we are not done, especially when done is required; skip
+            # the pack for this request and especially do not handle
+            # the done.
+            return False
+
+        if not done_received and not self._common:
+            # Okay we are not actually done then since the walker picked
+            # up no haves.  This is usually triggered when client attempts
+            # to pull from a source that has no common base_commit.
+            # See: test_server.MultiAckDetailedGraphWalkerImplTestCase.\
+            #          test_multi_ack_stateless_nodone
+            return False
+
+        # don't nak unless no common commits were found, even if not
+        # everything is satisfied
+        if self._common:
+            self.walker.send_ack(self._common[-1])
+        else:
+            self.walker.send_nak()
+        return True
+
+
+class ReceivePackHandler(PackHandler):
+    """Protocol handler for downloading a pack from the client."""
+
+    def __init__(self, backend, args, proto, http_req=None,
+                 advertise_refs=False):
+        super(ReceivePackHandler, self).__init__(
+                backend, proto, http_req=http_req)
+        self.repo = backend.open_repository(args[0])
+        self.advertise_refs = advertise_refs
+
+    @classmethod
+    def capabilities(cls):
+        return (CAPABILITY_REPORT_STATUS, CAPABILITY_DELETE_REFS,
+                CAPABILITY_QUIET, CAPABILITY_OFS_DELTA,
+                CAPABILITY_SIDE_BAND_64K, CAPABILITY_NO_DONE)
+
+    def _apply_pack(self, refs):
+        all_exceptions = (IOError, OSError, ChecksumMismatch, ApplyDeltaError,
+                          AssertionError, socket.error, zlib.error,
+                          ObjectFormatException)
+        status = []
+        will_send_pack = False
+
+        for command in refs:
+            if command[1] != ZERO_SHA:
+                will_send_pack = True
+
+        if will_send_pack:
+            # TODO: more informative error messages than just the exception
+            # string
+            try:
+                recv = getattr(self.proto, "recv", None)
+                self.repo.object_store.add_thin_pack(self.proto.read, recv)
+                status.append((b'unpack', b'ok'))
+            except all_exceptions as e:
+                status.append((b'unpack', str(e).replace('\n', '')))
+                # The pack may still have been moved in, but it may contain
+                # broken objects. We trust a later GC to clean it up.
+        else:
+            # The git protocol want to find a status entry related to unpack
+            # process even if no pack data has been sent.
+            status.append((b'unpack', b'ok'))
+
+        for oldsha, sha, ref in refs:
+            ref_status = b'ok'
+            try:
+                if sha == ZERO_SHA:
+                    if CAPABILITY_DELETE_REFS not in self.capabilities():
+                        raise GitProtocolError(
+                          'Attempted to delete refs without delete-refs '
+                          'capability.')
+                    try:
+                        self.repo.refs.remove_if_equals(ref, oldsha)
+                    except all_exceptions:
+                        ref_status = b'failed to delete'
+                else:
+                    try:
+                        self.repo.refs.set_if_equals(ref, oldsha, sha)
+                    except all_exceptions:
+                        ref_status = b'failed to write'
+            except KeyError as e:
+                ref_status = b'bad ref'
+            status.append((ref, ref_status))
+
+        return status
+
+    def _report_status(self, status):
+        if self.has_capability(CAPABILITY_SIDE_BAND_64K):
+            writer = BufferedPktLineWriter(
+              lambda d: self.proto.write_sideband(SIDE_BAND_CHANNEL_DATA, d))
+            write = writer.write
+
+            def flush():
+                writer.flush()
+                self.proto.write_pkt_line(None)
+        else:
+            write = self.proto.write_pkt_line
+
+            def flush():
+                pass
+
+        for name, msg in status:
+            if name == b'unpack':
+                write(b'unpack ' + msg + b'\n')
+            elif msg == b'ok':
+                write(b'ok ' + name + b'\n')
+            else:
+                write(b'ng ' + name + b' ' + msg + b'\n')
+        write(None)
+        flush()
+
+    def handle(self):
+        if self.advertise_refs or not self.http_req:
+            refs = sorted(self.repo.get_refs().items())
+
+            if not refs:
+                refs = [(CAPABILITIES_REF, ZERO_SHA)]
+            self.proto.write_pkt_line(
+              refs[0][1] + b' ' + refs[0][0] + b'\0' +
+              self.capability_line() + b'\n')
+            for i in range(1, len(refs)):
+                ref = refs[i]
+                self.proto.write_pkt_line(ref[1] + b' ' + ref[0] + b'\n')
+
+            self.proto.write_pkt_line(None)
+            if self.advertise_refs:
+                return
+
+        client_refs = []
+        ref = self.proto.read_pkt_line()
+
+        # if ref is none then client doesnt want to send us anything..
+        if ref is None:
+            return
+
+        ref, caps = extract_capabilities(ref)
+        self.set_client_capabilities(caps)
+
+        # client will now send us a list of (oldsha, newsha, ref)
+        while ref:
+            client_refs.append(ref.split())
+            ref = self.proto.read_pkt_line()
+
+        # backend can now deal with this refs and read a pack using self.read
+        status = self._apply_pack(client_refs)
+
+        # when we have read all the pack from the client, send a status report
+        # if the client asked for it
+        if self.has_capability(CAPABILITY_REPORT_STATUS):
+            self._report_status(status)
+
+
+class UploadArchiveHandler(Handler):
+
+    def __init__(self, backend, proto, http_req=None):
+        super(UploadArchiveHandler, self).__init__(backend, proto, http_req)
+
+    def handle(self):
+        # TODO(jelmer)
+        raise NotImplementedError(self.handle)
+
+
+# Default handler classes for git services.
+DEFAULT_HANDLERS = {
+  b'git-upload-pack': UploadPackHandler,
+  b'git-receive-pack': ReceivePackHandler,
+  # b'git-upload-archive': UploadArchiveHandler,
+}
+
+
+class TCPGitRequestHandler(SocketServer.StreamRequestHandler):
+
+    def __init__(self, handlers, *args, **kwargs):
+        self.handlers = handlers
+        SocketServer.StreamRequestHandler.__init__(self, *args, **kwargs)
+
+    def handle(self):
+        proto = ReceivableProtocol(self.connection.recv, self.wfile.write)
+        command, args = proto.read_cmd()
+        logger.info('Handling %s request, args=%s', command, args)
+
+        cls = self.handlers.get(command, None)
+        if not callable(cls):
+            raise GitProtocolError('Invalid service %s' % command)
+        h = cls(self.server.backend, args, proto)
+        h.handle()
+
+
+class TCPGitServer(SocketServer.TCPServer):
+
+    allow_reuse_address = True
+    serve = SocketServer.TCPServer.serve_forever
+
+    def _make_handler(self, *args, **kwargs):
+        return TCPGitRequestHandler(self.handlers, *args, **kwargs)
+
+    def __init__(self, backend, listen_addr, port=TCP_GIT_PORT, handlers=None):
+        self.handlers = dict(DEFAULT_HANDLERS)
+        if handlers is not None:
+            self.handlers.update(handlers)
+        self.backend = backend
+        logger.info('Listening for TCP connections on %s:%d',
+                    listen_addr, port)
+        SocketServer.TCPServer.__init__(self, (listen_addr, port),
+                                        self._make_handler)
+
+    def verify_request(self, request, client_address):
+        logger.info('Handling request from %s', client_address)
+        return True
+
+    def handle_error(self, request, client_address):
+        logger.exception('Exception happened during processing of request '
+                         'from %s', client_address)
+
+
+def main(argv=sys.argv):
+    """Entry point for starting a TCP git server."""
+    import optparse
+    parser = optparse.OptionParser()
+    parser.add_option("-l", "--listen_address", dest="listen_address",
+                      default="localhost",
+                      help="Binding IP address.")
+    parser.add_option("-p", "--port", dest="port", type=int,
+                      default=TCP_GIT_PORT,
+                      help="Binding TCP port.")
+    options, args = parser.parse_args(argv)
+
+    log_utils.default_logging_config()
+    if len(args) > 1:
+        gitdir = args[1]
+    else:
+        gitdir = '.'
+    # TODO(jelmer): Support git-daemon-export-ok and --export-all.
+    backend = FileSystemBackend(gitdir)
+    server = TCPGitServer(backend, options.listen_address, options.port)
+    server.serve_forever()
+
+
+def serve_command(handler_cls, argv=sys.argv, backend=None, inf=sys.stdin,
+                  outf=sys.stdout):
+    """Serve a single command.
+
+    This is mostly useful for the implementation of commands used by e.g.
+    git+ssh.
+
+    :param handler_cls: `Handler` class to use for the request
+    :param argv: execv-style command-line arguments. Defaults to sys.argv.
+    :param backend: `Backend` to use
+    :param inf: File-like object to read from, defaults to standard input.
+    :param outf: File-like object to write to, defaults to standard output.
+    :return: Exit code for use with sys.exit. 0 on success, 1 on failure.
+    """
+    if backend is None:
+        backend = FileSystemBackend()
+
+    def send_fn(data):
+        outf.write(data)
+        outf.flush()
+    proto = Protocol(inf.read, send_fn)
+    handler = handler_cls(backend, argv[1:], proto)
+    # FIXME: Catch exceptions and write a single-line summary to outf.
+    handler.handle()
+    return 0
+
+
+def generate_info_refs(repo):
+    """Generate an info refs file."""
+    refs = repo.get_refs()
+    return write_info_refs(refs, repo.object_store)
+
+
+def generate_objects_info_packs(repo):
+    """Generate an index for for packs."""
+    for pack in repo.object_store.packs:
+        yield (
+            b'P ' + pack.data.filename.encode(sys.getfilesystemencoding()) +
+            b'\n')
+
+
+def update_server_info(repo):
+    """Generate server info for dumb file access.
+
+    This generates info/refs and objects/info/packs,
+    similar to "git update-server-info".
+    """
+    repo._put_named_file(
+        os.path.join('info', 'refs'),
+        b"".join(generate_info_refs(repo)))
+
+    repo._put_named_file(
+        os.path.join('objects', 'info', 'packs'),
+        b"".join(generate_objects_info_packs(repo)))
+
+
+if __name__ == '__main__':
+    main()

+ 19 - 0
dulwich/stdint.h

@@ -0,0 +1,19 @@
+/**
+ * Replacement of gcc' stdint.h for MSVC
+ */
+
+#ifndef STDINT_H
+#define STDINT_H
+
+typedef signed char       int8_t;
+typedef signed short      int16_t;
+typedef signed int        int32_t;
+typedef signed long long  int64_t;
+
+typedef unsigned char       uint8_t;
+typedef unsigned short      uint16_t;
+typedef unsigned int        uint32_t;
+typedef unsigned long long  uint64_t;
+
+
+#endif

+ 184 - 0
dulwich/tests/__init__.py

@@ -0,0 +1,184 @@
+# __init__.py -- The tests for dulwich
+# Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for Dulwich."""
+
+import doctest
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+
+
+# If Python itself provides an exception, use that
+import unittest
+from unittest import (  # noqa: F401
+    SkipTest,
+    TestCase as _TestCase,
+    skipIf,
+    expectedFailure,
+    )
+
+
+class TestCase(_TestCase):
+
+    def setUp(self):
+        super(TestCase, self).setUp()
+        self._old_home = os.environ.get("HOME")
+        os.environ["HOME"] = "/nonexistant"
+
+    def tearDown(self):
+        super(TestCase, self).tearDown()
+        if self._old_home:
+            os.environ["HOME"] = self._old_home
+        else:
+            del os.environ["HOME"]
+
+
+class BlackboxTestCase(TestCase):
+    """Blackbox testing."""
+
+    # TODO(jelmer): Include more possible binary paths.
+    bin_directories = [os.path.abspath(os.path.join(
+            os.path.dirname(__file__), "..", "..", "bin")), '/usr/bin',
+            '/usr/local/bin']
+
+    def bin_path(self, name):
+        """Determine the full path of a binary.
+
+        :param name: Name of the script
+        :return: Full path
+        """
+        for d in self.bin_directories:
+            p = os.path.join(d, name)
+            if os.path.isfile(p):
+                return p
+        else:
+            raise SkipTest("Unable to find binary %s" % name)
+
+    def run_command(self, name, args):
+        """Run a Dulwich command.
+
+        :param name: Name of the command, as it exists in bin/
+        :param args: Arguments to the command
+        """
+        env = dict(os.environ)
+        env["PYTHONPATH"] = os.pathsep.join(sys.path)
+
+        # Since they don't have any extensions, Windows can't recognize
+        # executablility of the Python files in /bin. Even then, we'd have to
+        # expect the user to set up file associations for .py files.
+        #
+        # Save us from all that headache and call python with the bin script.
+        argv = [sys.executable, self.bin_path(name)] + args
+        return subprocess.Popen(
+                argv,
+                stdout=subprocess.PIPE,
+                stdin=subprocess.PIPE, stderr=subprocess.PIPE,
+                env=env)
+
+
+def self_test_suite():
+    names = [
+        'archive',
+        'blackbox',
+        'client',
+        'config',
+        'diff_tree',
+        'fastexport',
+        'file',
+        'grafts',
+        'greenthreads',
+        'hooks',
+        'ignore',
+        'index',
+        'lru_cache',
+        'objects',
+        'objectspec',
+        'object_store',
+        'missing_obj_finder',
+        'pack',
+        'patch',
+        'porcelain',
+        'protocol',
+        'reflog',
+        'refs',
+        'repository',
+        'server',
+        'utils',
+        'walk',
+        'web',
+        ]
+    module_names = ['dulwich.tests.test_' + name for name in names]
+    loader = unittest.TestLoader()
+    return loader.loadTestsFromNames(module_names)
+
+
+def tutorial_test_suite():
+    tutorial = [
+        'introduction',
+        'file-format',
+        'repo',
+        'object-store',
+        'remote',
+        'conclusion',
+        ]
+    tutorial_files = ["../../docs/tutorial/%s.txt" % name for name in tutorial]
+
+    def setup(test):
+        test.__old_cwd = os.getcwd()
+        test.__dulwich_tempdir = tempfile.mkdtemp()
+        os.chdir(test.__dulwich_tempdir)
+
+    def teardown(test):
+        os.chdir(test.__old_cwd)
+        shutil.rmtree(test.__dulwich_tempdir)
+    return doctest.DocFileSuite(
+            setUp=setup, tearDown=teardown, *tutorial_files)
+
+
+def nocompat_test_suite():
+    result = unittest.TestSuite()
+    result.addTests(self_test_suite())
+    from dulwich.contrib import test_suite as contrib_test_suite
+    if sys.version_info[0] == 2:
+        result.addTests(tutorial_test_suite())
+    result.addTests(contrib_test_suite())
+    return result
+
+
+def compat_test_suite():
+    result = unittest.TestSuite()
+    from dulwich.tests.compat import test_suite as compat_test_suite
+    result.addTests(compat_test_suite())
+    return result
+
+
+def test_suite():
+    result = unittest.TestSuite()
+    result.addTests(self_test_suite())
+    if sys.version_info[0] == 2 and sys.platform != 'win32':
+        result.addTests(tutorial_test_suite())
+    from dulwich.tests.compat import test_suite as compat_test_suite
+    result.addTests(compat_test_suite())
+    from dulwich.contrib import test_suite as contrib_test_suite
+    result.addTests(contrib_test_suite())
+    return result

+ 40 - 0
dulwich/tests/compat/__init__.py

@@ -0,0 +1,40 @@
+# __init__.py -- Compatibility tests for dulwich
+# Copyright (C) 2010 Jelmer Vernooij <jelmer@samba.org>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests for Dulwich."""
+
+import unittest
+
+
+def test_suite():
+    names = [
+        'client',
+        'pack',
+        'repository',
+        'server',
+        'utils',
+        'web',
+        ]
+    module_names = ['dulwich.tests.compat.test_' + name for name in names]
+    result = unittest.TestSuite()
+    loader = unittest.TestLoader()
+    suite = loader.loadTestsFromNames(module_names)
+    result.addTests(suite)
+    return result

+ 311 - 0
dulwich/tests/compat/server_utils.py

@@ -0,0 +1,311 @@
+# server_utils.py -- Git server compatibility utilities
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Utilities for testing git server compatibility."""
+
+import errno
+import os
+import shutil
+import socket
+import tempfile
+
+from dulwich.repo import Repo
+from dulwich.objects import hex_to_sha
+from dulwich.server import (
+    ReceivePackHandler,
+    )
+from dulwich.tests.utils import (
+    tear_down_repo,
+    )
+from dulwich.tests.compat.utils import (
+    run_git_or_fail,
+    )
+from dulwich.tests.compat.utils import require_git_version
+
+
+class _StubRepo(object):
+    """A stub repo that just contains a path to tear down."""
+
+    def __init__(self, name):
+        temp_dir = tempfile.mkdtemp()
+        self.path = os.path.join(temp_dir, name)
+        os.mkdir(self.path)
+
+    def close(self):
+        pass
+
+
+def _get_shallow(repo):
+    shallow_file = repo.get_named_file('shallow')
+    if not shallow_file:
+        return []
+    shallows = []
+    with shallow_file:
+        for line in shallow_file:
+            sha = line.strip()
+            if not sha:
+                continue
+            hex_to_sha(sha)
+            shallows.append(sha)
+    return shallows
+
+
+class ServerTests(object):
+    """Base tests for testing servers.
+
+    Does not inherit from TestCase so tests are not automatically run.
+    """
+
+    min_single_branch_version = (1, 7, 10,)
+
+    def import_repos(self):
+        self._old_repo = self.import_repo('server_old.export')
+        self._new_repo = self.import_repo('server_new.export')
+
+    def url(self, port):
+        return '%s://localhost:%s/' % (self.protocol, port)
+
+    def branch_args(self, branches=None):
+        if branches is None:
+            branches = ['master', 'branch']
+        return ['%s:%s' % (b, b) for b in branches]
+
+    def test_push_to_dulwich(self):
+        self.import_repos()
+        self.assertReposNotEqual(self._old_repo, self._new_repo)
+        port = self._start_server(self._old_repo)
+
+        run_git_or_fail(['push', self.url(port)] + self.branch_args(),
+                        cwd=self._new_repo.path)
+        self.assertReposEqual(self._old_repo, self._new_repo)
+
+    def test_push_to_dulwich_no_op(self):
+        self._old_repo = self.import_repo('server_old.export')
+        self._new_repo = self.import_repo('server_old.export')
+        self.assertReposEqual(self._old_repo, self._new_repo)
+        port = self._start_server(self._old_repo)
+
+        run_git_or_fail(['push', self.url(port)] + self.branch_args(),
+                        cwd=self._new_repo.path)
+        self.assertReposEqual(self._old_repo, self._new_repo)
+
+    def test_push_to_dulwich_remove_branch(self):
+        self._old_repo = self.import_repo('server_old.export')
+        self._new_repo = self.import_repo('server_old.export')
+        self.assertReposEqual(self._old_repo, self._new_repo)
+        port = self._start_server(self._old_repo)
+
+        run_git_or_fail(['push', self.url(port), ":master"],
+                        cwd=self._new_repo.path)
+
+        self.assertEqual(
+            list(self._old_repo.get_refs().keys()), [b"refs/heads/branch"])
+
+    def test_fetch_from_dulwich(self):
+        self.import_repos()
+        self.assertReposNotEqual(self._old_repo, self._new_repo)
+        port = self._start_server(self._new_repo)
+
+        run_git_or_fail(['fetch', self.url(port)] + self.branch_args(),
+                        cwd=self._old_repo.path)
+        # flush the pack cache so any new packs are picked up
+        self._old_repo.object_store._pack_cache_time = 0
+        self.assertReposEqual(self._old_repo, self._new_repo)
+
+    def test_fetch_from_dulwich_no_op(self):
+        self._old_repo = self.import_repo('server_old.export')
+        self._new_repo = self.import_repo('server_old.export')
+        self.assertReposEqual(self._old_repo, self._new_repo)
+        port = self._start_server(self._new_repo)
+
+        run_git_or_fail(['fetch', self.url(port)] + self.branch_args(),
+                        cwd=self._old_repo.path)
+        # flush the pack cache so any new packs are picked up
+        self._old_repo.object_store._pack_cache_time = 0
+        self.assertReposEqual(self._old_repo, self._new_repo)
+
+    def test_clone_from_dulwich_empty(self):
+        old_repo_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, old_repo_dir)
+        self._old_repo = Repo.init_bare(old_repo_dir)
+        port = self._start_server(self._old_repo)
+
+        new_repo_base_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, new_repo_base_dir)
+        new_repo_dir = os.path.join(new_repo_base_dir, 'empty_new')
+        run_git_or_fail(['clone', self.url(port), new_repo_dir],
+                        cwd=new_repo_base_dir)
+        new_repo = Repo(new_repo_dir)
+        self.assertReposEqual(self._old_repo, new_repo)
+
+    def test_lsremote_from_dulwich(self):
+        self._repo = self.import_repo('server_old.export')
+        port = self._start_server(self._repo)
+        o = run_git_or_fail(['ls-remote', self.url(port)])
+        self.assertEqual(len(o.split(b'\n')), 4)
+
+    def test_new_shallow_clone_from_dulwich(self):
+        require_git_version(self.min_single_branch_version)
+        self._source_repo = self.import_repo('server_new.export')
+        self._stub_repo = _StubRepo('shallow')
+        self.addCleanup(tear_down_repo, self._stub_repo)
+        port = self._start_server(self._source_repo)
+
+        # Fetch at depth 1
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=1', '--no-single-branch',
+             self.url(port), self._stub_repo.path])
+        clone = self._stub_repo = Repo(self._stub_repo.path)
+        expected_shallow = [b'35e0b59e187dd72a0af294aedffc213eaa4d03ff',
+                            b'514dc6d3fbfe77361bcaef320c4d21b72bc10be9']
+        self.assertEqual(expected_shallow, _get_shallow(clone))
+        self.assertReposNotEqual(clone, self._source_repo)
+
+    def test_shallow_clone_from_git_is_identical(self):
+        require_git_version(self.min_single_branch_version)
+        self._source_repo = self.import_repo('server_new.export')
+        self._stub_repo_git = _StubRepo('shallow-git')
+        self.addCleanup(tear_down_repo, self._stub_repo_git)
+        self._stub_repo_dw = _StubRepo('shallow-dw')
+        self.addCleanup(tear_down_repo, self._stub_repo_dw)
+
+        # shallow clone using stock git, then using dulwich
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=1', '--no-single-branch',
+             'file://' + self._source_repo.path, self._stub_repo_git.path])
+
+        port = self._start_server(self._source_repo)
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=1', '--no-single-branch',
+             self.url(port), self._stub_repo_dw.path])
+
+        # compare the two clones; they should be equal
+        self.assertReposEqual(Repo(self._stub_repo_git.path),
+                              Repo(self._stub_repo_dw.path))
+
+    def test_fetch_same_depth_into_shallow_clone_from_dulwich(self):
+        require_git_version(self.min_single_branch_version)
+        self._source_repo = self.import_repo('server_new.export')
+        self._stub_repo = _StubRepo('shallow')
+        self.addCleanup(tear_down_repo, self._stub_repo)
+        port = self._start_server(self._source_repo)
+
+        # Fetch at depth 2
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=2', '--no-single-branch',
+             self.url(port), self._stub_repo.path])
+        clone = self._stub_repo = Repo(self._stub_repo.path)
+
+        # Fetching at the same depth is a no-op.
+        run_git_or_fail(
+          ['fetch', '--depth=2', self.url(port)] + self.branch_args(),
+          cwd=self._stub_repo.path)
+        expected_shallow = [b'94de09a530df27ac3bb613aaecdd539e0a0655e1',
+                            b'da5cd81e1883c62a25bb37c4d1f8ad965b29bf8d']
+        self.assertEqual(expected_shallow, _get_shallow(clone))
+        self.assertReposNotEqual(clone, self._source_repo)
+
+    def test_fetch_full_depth_into_shallow_clone_from_dulwich(self):
+        require_git_version(self.min_single_branch_version)
+        self._source_repo = self.import_repo('server_new.export')
+        self._stub_repo = _StubRepo('shallow')
+        self.addCleanup(tear_down_repo, self._stub_repo)
+        port = self._start_server(self._source_repo)
+
+        # Fetch at depth 2
+        run_git_or_fail(
+            ['clone', '--mirror', '--depth=2', '--no-single-branch',
+             self.url(port), self._stub_repo.path])
+        clone = self._stub_repo = Repo(self._stub_repo.path)
+
+        # Fetching at the same depth is a no-op.
+        run_git_or_fail(
+          ['fetch', '--depth=2', self.url(port)] + self.branch_args(),
+          cwd=self._stub_repo.path)
+
+        # The whole repo only has depth 4, so it should equal server_new.
+        run_git_or_fail(
+          ['fetch', '--depth=4', self.url(port)] + self.branch_args(),
+          cwd=self._stub_repo.path)
+        self.assertEqual([], _get_shallow(clone))
+        self.assertReposEqual(clone, self._source_repo)
+
+    def test_fetch_from_dulwich_issue_88_standard(self):
+        # Basically an integration test to see that the ACK/NAK
+        # generation works on repos with common head.
+        self._source_repo = self.import_repo(
+            'issue88_expect_ack_nak_server.export')
+        self._client_repo = self.import_repo(
+            'issue88_expect_ack_nak_client.export')
+        port = self._start_server(self._source_repo)
+
+        run_git_or_fail(['fetch', self.url(port), 'master'],
+                        cwd=self._client_repo.path)
+        self.assertObjectStoreEqual(
+            self._source_repo.object_store,
+            self._client_repo.object_store)
+
+    def test_fetch_from_dulwich_issue_88_alternative(self):
+        # likewise, but the case where the two repos have no common parent
+        self._source_repo = self.import_repo(
+            'issue88_expect_ack_nak_other.export')
+        self._client_repo = self.import_repo(
+            'issue88_expect_ack_nak_client.export')
+        port = self._start_server(self._source_repo)
+
+        self.assertRaises(
+            KeyError, self._client_repo.get_object,
+            b'02a14da1fc1fc13389bbf32f0af7d8899f2b2323')
+        run_git_or_fail(['fetch', self.url(port), 'master'],
+                        cwd=self._client_repo.path)
+        self.assertEqual(b'commit', self._client_repo.get_object(
+            b'02a14da1fc1fc13389bbf32f0af7d8899f2b2323').type_name)
+
+    def test_push_to_dulwich_issue_88_standard(self):
+        # Same thing, but we reverse the role of the server/client
+        # and do a push instead.
+        self._source_repo = self.import_repo(
+            'issue88_expect_ack_nak_client.export')
+        self._client_repo = self.import_repo(
+            'issue88_expect_ack_nak_server.export')
+        port = self._start_server(self._source_repo)
+
+        run_git_or_fail(['push', self.url(port), 'master'],
+                        cwd=self._client_repo.path)
+        self.assertReposEqual(self._source_repo, self._client_repo)
+
+
+# TODO(dborowitz): Come up with a better way of testing various permutations of
+# capabilities. The only reason it is the way it is now is that side-band-64k
+# was only recently introduced into git-receive-pack.
+class NoSideBand64kReceivePackHandler(ReceivePackHandler):
+    """ReceivePackHandler that does not support side-band-64k."""
+
+    @classmethod
+    def capabilities(cls):
+        return tuple(c for c in ReceivePackHandler.capabilities()
+                     if c != b'side-band-64k')
+
+
+def ignore_error(error):
+    """Check whether this error is safe to ignore."""
+    (e_type, e_value, e_tb) = error
+    return (issubclass(e_type, socket.error) and
+            e_value[0] in (errno.ECONNRESET, errno.EPIPE))

+ 542 - 0
dulwich/tests/compat/test_client.py

@@ -0,0 +1,542 @@
+# test_client.py -- Compatibilty tests for git client.
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibilty tests between the Dulwich client and the cgit server."""
+
+import copy
+from io import BytesIO
+import os
+import select
+import signal
+import subprocess
+import sys
+import tarfile
+import tempfile
+import threading
+
+try:
+    from urlparse import unquote
+except ImportError:
+    from urllib.parse import unquote
+
+
+try:
+    import BaseHTTPServer
+    import SimpleHTTPServer
+except ImportError:
+    import http.server
+    BaseHTTPServer = http.server
+    SimpleHTTPServer = http.server
+
+from dulwich import (
+    client,
+    errors,
+    file,
+    index,
+    protocol,
+    objects,
+    repo,
+    )
+from dulwich.tests import (
+    SkipTest,
+    expectedFailure,
+    )
+from dulwich.tests.compat.utils import (
+    CompatTestCase,
+    check_for_daemon,
+    import_repo_to_dir,
+    rmtree_ro,
+    run_git_or_fail,
+    _DEFAULT_GIT,
+    )
+
+
+if sys.platform == 'win32':
+    import ctypes
+
+
+class DulwichClientTestBase(object):
+    """Tests for client/server compatibility."""
+
+    def setUp(self):
+        self.gitroot = os.path.dirname(
+                import_repo_to_dir('server_new.export').rstrip(os.sep))
+        self.dest = os.path.join(self.gitroot, 'dest')
+        file.ensure_dir_exists(self.dest)
+        run_git_or_fail(['init', '--quiet', '--bare'], cwd=self.dest)
+
+    def tearDown(self):
+        rmtree_ro(self.gitroot)
+
+    def assertDestEqualsSrc(self):
+        repo_dir = os.path.join(self.gitroot, 'server_new.export')
+        dest_repo_dir = os.path.join(self.gitroot, 'dest')
+        with repo.Repo(repo_dir) as src:
+            with repo.Repo(dest_repo_dir) as dest:
+                self.assertReposEqual(src, dest)
+
+    def _client(self):
+        raise NotImplementedError()
+
+    def _build_path(self):
+        raise NotImplementedError()
+
+    def _do_send_pack(self):
+        c = self._client()
+        srcpath = os.path.join(self.gitroot, 'server_new.export')
+        with repo.Repo(srcpath) as src:
+            sendrefs = dict(src.get_refs())
+            del sendrefs[b'HEAD']
+            c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
+                        src.object_store.generate_pack_contents)
+
+    def test_send_pack(self):
+        self._do_send_pack()
+        self.assertDestEqualsSrc()
+
+    def test_send_pack_nothing_to_send(self):
+        self._do_send_pack()
+        self.assertDestEqualsSrc()
+        # nothing to send, but shouldn't raise either.
+        self._do_send_pack()
+
+    def test_send_without_report_status(self):
+        c = self._client()
+        c._send_capabilities.remove(b'report-status')
+        srcpath = os.path.join(self.gitroot, 'server_new.export')
+        with repo.Repo(srcpath) as src:
+            sendrefs = dict(src.get_refs())
+            del sendrefs[b'HEAD']
+            c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
+                        src.object_store.generate_pack_contents)
+            self.assertDestEqualsSrc()
+
+    def make_dummy_commit(self, dest):
+        b = objects.Blob.from_string(b'hi')
+        dest.object_store.add_object(b)
+        t = index.commit_tree(dest.object_store, [(b'hi', b.id, 0o100644)])
+        c = objects.Commit()
+        c.author = c.committer = b'Foo Bar <foo@example.com>'
+        c.author_time = c.commit_time = 0
+        c.author_timezone = c.commit_timezone = 0
+        c.message = b'hi'
+        c.tree = t
+        dest.object_store.add_object(c)
+        return c.id
+
+    def disable_ff_and_make_dummy_commit(self):
+        # disable non-fast-forward pushes to the server
+        dest = repo.Repo(os.path.join(self.gitroot, 'dest'))
+        run_git_or_fail(['config', 'receive.denyNonFastForwards', 'true'],
+                        cwd=dest.path)
+        commit_id = self.make_dummy_commit(dest)
+        return dest, commit_id
+
+    def compute_send(self, src):
+        sendrefs = dict(src.get_refs())
+        del sendrefs[b'HEAD']
+        return sendrefs, src.object_store.generate_pack_contents
+
+    def test_send_pack_one_error(self):
+        dest, dummy_commit = self.disable_ff_and_make_dummy_commit()
+        dest.refs[b'refs/heads/master'] = dummy_commit
+        repo_dir = os.path.join(self.gitroot, 'server_new.export')
+        with repo.Repo(repo_dir) as src:
+            sendrefs, gen_pack = self.compute_send(src)
+            c = self._client()
+            try:
+                c.send_pack(self._build_path('/dest'),
+                            lambda _: sendrefs, gen_pack)
+            except errors.UpdateRefsError as e:
+                self.assertEqual('refs/heads/master failed to update',
+                                 e.args[0])
+                self.assertEqual({b'refs/heads/branch': b'ok',
+                                  b'refs/heads/master': b'non-fast-forward'},
+                                 e.ref_status)
+
+    def test_send_pack_multiple_errors(self):
+        dest, dummy = self.disable_ff_and_make_dummy_commit()
+        # set up for two non-ff errors
+        branch, master = b'refs/heads/branch', b'refs/heads/master'
+        dest.refs[branch] = dest.refs[master] = dummy
+        repo_dir = os.path.join(self.gitroot, 'server_new.export')
+        with repo.Repo(repo_dir) as src:
+            sendrefs, gen_pack = self.compute_send(src)
+            c = self._client()
+            try:
+                c.send_pack(self._build_path('/dest'), lambda _: sendrefs,
+                            gen_pack)
+            except errors.UpdateRefsError as e:
+                self.assertIn(
+                        str(e),
+                        ['{0}, {1} failed to update'.format(
+                            branch.decode('ascii'), master.decode('ascii')),
+                         '{1}, {0} failed to update'.format(
+                             branch.decode('ascii'), master.decode('ascii'))])
+                self.assertEqual({branch: b'non-fast-forward',
+                                  master: b'non-fast-forward'},
+                                 e.ref_status)
+
+    def test_archive(self):
+        c = self._client()
+        f = BytesIO()
+        c.archive(self._build_path('/server_new.export'), b'HEAD', f.write)
+        f.seek(0)
+        tf = tarfile.open(fileobj=f)
+        self.assertEqual(['baz', 'foo'], tf.getnames())
+
+    def test_fetch_pack(self):
+        c = self._client()
+        with repo.Repo(os.path.join(self.gitroot, 'dest')) as dest:
+            refs = c.fetch(self._build_path('/server_new.export'), dest)
+            for r in refs.items():
+                dest.refs.set_if_equals(r[0], None, r[1])
+            self.assertDestEqualsSrc()
+
+    def test_incremental_fetch_pack(self):
+        self.test_fetch_pack()
+        dest, dummy = self.disable_ff_and_make_dummy_commit()
+        dest.refs[b'refs/heads/master'] = dummy
+        c = self._client()
+        repo_dir = os.path.join(self.gitroot, 'server_new.export')
+        with repo.Repo(repo_dir) as dest:
+            refs = c.fetch(self._build_path('/dest'), dest)
+            for r in refs.items():
+                dest.refs.set_if_equals(r[0], None, r[1])
+            self.assertDestEqualsSrc()
+
+    def test_fetch_pack_no_side_band_64k(self):
+        c = self._client()
+        c._fetch_capabilities.remove(b'side-band-64k')
+        with repo.Repo(os.path.join(self.gitroot, 'dest')) as dest:
+            refs = c.fetch(self._build_path('/server_new.export'), dest)
+            for r in refs.items():
+                dest.refs.set_if_equals(r[0], None, r[1])
+            self.assertDestEqualsSrc()
+
+    def test_fetch_pack_zero_sha(self):
+        # zero sha1s are already present on the client, and should
+        # be ignored
+        c = self._client()
+        with repo.Repo(os.path.join(self.gitroot, 'dest')) as dest:
+            refs = c.fetch(
+                self._build_path('/server_new.export'), dest,
+                lambda refs: [protocol.ZERO_SHA])
+            for r in refs.items():
+                dest.refs.set_if_equals(r[0], None, r[1])
+
+    def test_send_remove_branch(self):
+        with repo.Repo(os.path.join(self.gitroot, 'dest')) as dest:
+            dummy_commit = self.make_dummy_commit(dest)
+            dest.refs[b'refs/heads/master'] = dummy_commit
+            dest.refs[b'refs/heads/abranch'] = dummy_commit
+            sendrefs = dict(dest.refs)
+            sendrefs[b'refs/heads/abranch'] = b"00" * 20
+            del sendrefs[b'HEAD']
+
+            def gen_pack(have, want):
+                return []
+            c = self._client()
+            self.assertEqual(dest.refs[b"refs/heads/abranch"], dummy_commit)
+            c.send_pack(
+                self._build_path('/dest'), lambda _: sendrefs, gen_pack)
+            self.assertFalse(b"refs/heads/abranch" in dest.refs)
+
+    def test_get_refs(self):
+        c = self._client()
+        refs = c.get_refs(self._build_path('/server_new.export'))
+
+        repo_dir = os.path.join(self.gitroot, 'server_new.export')
+        with repo.Repo(repo_dir) as dest:
+            self.assertDictEqual(dest.refs.as_dict(), refs)
+
+
+class DulwichTCPClientTest(CompatTestCase, DulwichClientTestBase):
+
+    def setUp(self):
+        CompatTestCase.setUp(self)
+        DulwichClientTestBase.setUp(self)
+        if check_for_daemon(limit=1):
+            raise SkipTest('git-daemon was already running on port %s' %
+                           protocol.TCP_GIT_PORT)
+        fd, self.pidfile = tempfile.mkstemp(prefix='dulwich-test-git-client',
+                                            suffix=".pid")
+        os.fdopen(fd).close()
+        args = [_DEFAULT_GIT, 'daemon', '--verbose', '--export-all',
+                '--pid-file=%s' % self.pidfile,
+                '--base-path=%s' % self.gitroot,
+                '--enable=receive-pack', '--enable=upload-archive',
+                '--listen=localhost', '--reuseaddr',
+                self.gitroot]
+        self.process = subprocess.Popen(
+            args, cwd=self.gitroot,
+            stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        if not check_for_daemon():
+            raise SkipTest('git-daemon failed to start')
+
+    def tearDown(self):
+        with open(self.pidfile) as f:
+            pid = int(f.read().strip())
+        if sys.platform == 'win32':
+            PROCESS_TERMINATE = 1
+            handle = ctypes.windll.kernel32.OpenProcess(
+                PROCESS_TERMINATE, False, pid)
+            ctypes.windll.kernel32.TerminateProcess(handle, -1)
+            ctypes.windll.kernel32.CloseHandle(handle)
+        else:
+            try:
+                os.kill(pid, signal.SIGKILL)
+                os.unlink(self.pidfile)
+            except (OSError, IOError):
+                pass
+        self.process.wait()
+        self.process.stdout.close()
+        self.process.stderr.close()
+        DulwichClientTestBase.tearDown(self)
+        CompatTestCase.tearDown(self)
+
+    def _client(self):
+        return client.TCPGitClient('localhost')
+
+    def _build_path(self, path):
+        return path
+
+    if sys.platform == 'win32':
+        @expectedFailure
+        def test_fetch_pack_no_side_band_64k(self):
+            DulwichClientTestBase.test_fetch_pack_no_side_band_64k(self)
+
+
+class TestSSHVendor(object):
+
+    @staticmethod
+    def run_command(host, command, username=None, port=None):
+        cmd, path = command.split(' ')
+        cmd = cmd.split('-', 1)
+        path = path.replace("'", "")
+        p = subprocess.Popen(cmd + [path], bufsize=0, stdin=subprocess.PIPE,
+                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return client.SubprocessWrapper(p)
+
+
+class DulwichMockSSHClientTest(CompatTestCase, DulwichClientTestBase):
+
+    def setUp(self):
+        CompatTestCase.setUp(self)
+        DulwichClientTestBase.setUp(self)
+        self.real_vendor = client.get_ssh_vendor
+        client.get_ssh_vendor = TestSSHVendor
+
+    def tearDown(self):
+        DulwichClientTestBase.tearDown(self)
+        CompatTestCase.tearDown(self)
+        client.get_ssh_vendor = self.real_vendor
+
+    def _client(self):
+        return client.SSHGitClient('localhost')
+
+    def _build_path(self, path):
+        return self.gitroot + path
+
+
+class DulwichSubprocessClientTest(CompatTestCase, DulwichClientTestBase):
+
+    def setUp(self):
+        CompatTestCase.setUp(self)
+        DulwichClientTestBase.setUp(self)
+
+    def tearDown(self):
+        DulwichClientTestBase.tearDown(self)
+        CompatTestCase.tearDown(self)
+
+    def _client(self):
+        return client.SubprocessGitClient(stderr=subprocess.PIPE)
+
+    def _build_path(self, path):
+        return self.gitroot + path
+
+
+class GitHTTPRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
+    """HTTP Request handler that calls out to 'git http-backend'."""
+
+    # Make rfile unbuffered -- we need to read one line and then pass
+    # the rest to a subprocess, so we can't use buffered input.
+    rbufsize = 0
+
+    def do_POST(self):
+        self.run_backend()
+
+    def do_GET(self):
+        self.run_backend()
+
+    def send_head(self):
+        return self.run_backend()
+
+    def log_request(self, code='-', size='-'):
+        # Let's be quiet, the test suite is noisy enough already
+        pass
+
+    def run_backend(self):
+        """Call out to git http-backend."""
+        # Based on CGIHTTPServer.CGIHTTPRequestHandler.run_cgi:
+        # Copyright (c) 2001-2010 Python Software Foundation;
+        # All Rights Reserved
+        # Licensed under the Python Software Foundation License.
+        rest = self.path
+        # find an explicit query string, if present.
+        i = rest.rfind('?')
+        if i >= 0:
+            rest, query = rest[:i], rest[i+1:]
+        else:
+            query = ''
+
+        env = copy.deepcopy(os.environ)
+        env['SERVER_SOFTWARE'] = self.version_string()
+        env['SERVER_NAME'] = self.server.server_name
+        env['GATEWAY_INTERFACE'] = 'CGI/1.1'
+        env['SERVER_PROTOCOL'] = self.protocol_version
+        env['SERVER_PORT'] = str(self.server.server_port)
+        env['GIT_PROJECT_ROOT'] = self.server.root_path
+        env["GIT_HTTP_EXPORT_ALL"] = "1"
+        env['REQUEST_METHOD'] = self.command
+        uqrest = unquote(rest)
+        env['PATH_INFO'] = uqrest
+        env['SCRIPT_NAME'] = "/"
+        if query:
+            env['QUERY_STRING'] = query
+        host = self.address_string()
+        if host != self.client_address[0]:
+            env['REMOTE_HOST'] = host
+        env['REMOTE_ADDR'] = self.client_address[0]
+        authorization = self.headers.get("authorization")
+        if authorization:
+            authorization = authorization.split()
+            if len(authorization) == 2:
+                import base64
+                import binascii
+                env['AUTH_TYPE'] = authorization[0]
+                if authorization[0].lower() == "basic":
+                    try:
+                        authorization = base64.decodestring(authorization[1])
+                    except binascii.Error:
+                        pass
+                    else:
+                        authorization = authorization.split(':')
+                        if len(authorization) == 2:
+                            env['REMOTE_USER'] = authorization[0]
+        # XXX REMOTE_IDENT
+        env['CONTENT_TYPE'] = self.headers.get('content-type')
+        length = self.headers.get('content-length')
+        if length:
+            env['CONTENT_LENGTH'] = length
+        referer = self.headers.get('referer')
+        if referer:
+            env['HTTP_REFERER'] = referer
+        accept = []
+        for line in self.headers.getallmatchingheaders('accept'):
+            if line[:1] in "\t\n\r ":
+                accept.append(line.strip())
+            else:
+                accept = accept + line[7:].split(',')
+        env['HTTP_ACCEPT'] = ','.join(accept)
+        ua = self.headers.get('user-agent')
+        if ua:
+            env['HTTP_USER_AGENT'] = ua
+        co = self.headers.get('cookie')
+        if co:
+            env['HTTP_COOKIE'] = co
+        # XXX Other HTTP_* headers
+        # Since we're setting the env in the parent, provide empty
+        # values to override previously set values
+        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',
+                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):
+            env.setdefault(k, "")
+
+        self.wfile.write(b"HTTP/1.1 200 Script output follows\r\n")
+        self.wfile.write(
+            ("Server: %s\r\n" % self.server.server_name).encode('ascii'))
+        self.wfile.write(
+            ("Date: %s\r\n" % self.date_time_string()).encode('ascii'))
+
+        decoded_query = query.replace('+', ' ')
+
+        try:
+            nbytes = int(length)
+        except (TypeError, ValueError):
+            nbytes = 0
+        if self.command.lower() == "post" and nbytes > 0:
+            data = self.rfile.read(nbytes)
+        else:
+            data = None
+        # throw away additional data [see bug #427345]
+        while select.select([self.rfile._sock], [], [], 0)[0]:
+            if not self.rfile._sock.recv(1):
+                break
+        args = ['http-backend']
+        if '=' not in decoded_query:
+            args.append(decoded_query)
+        stdout = run_git_or_fail(
+            args, input=data, env=env, stderr=subprocess.PIPE)
+        self.wfile.write(stdout)
+
+
+class HTTPGitServer(BaseHTTPServer.HTTPServer):
+
+    allow_reuse_address = True
+
+    def __init__(self, server_address, root_path):
+        BaseHTTPServer.HTTPServer.__init__(
+            self, server_address, GitHTTPRequestHandler)
+        self.root_path = root_path
+        self.server_name = "localhost"
+
+    def get_url(self):
+        return 'http://%s:%s/' % (self.server_name, self.server_port)
+
+
+class DulwichHttpClientTest(CompatTestCase, DulwichClientTestBase):
+
+    min_git_version = (1, 7, 0, 2)
+
+    def setUp(self):
+        CompatTestCase.setUp(self)
+        DulwichClientTestBase.setUp(self)
+        self._httpd = HTTPGitServer(("localhost", 0), self.gitroot)
+        self.addCleanup(self._httpd.shutdown)
+        threading.Thread(target=self._httpd.serve_forever).start()
+        run_git_or_fail(['config', 'http.uploadpack', 'true'],
+                        cwd=self.dest)
+        run_git_or_fail(['config', 'http.receivepack', 'true'],
+                        cwd=self.dest)
+
+    def tearDown(self):
+        DulwichClientTestBase.tearDown(self)
+        CompatTestCase.tearDown(self)
+        self._httpd.shutdown()
+        self._httpd.socket.close()
+
+    def _client(self):
+        return client.HttpGitClient(self._httpd.get_url())
+
+    def _build_path(self, path):
+        return path
+
+    def test_archive(self):
+        raise SkipTest("exporting archives not supported over http")

+ 156 - 0
dulwich/tests/compat/test_pack.py

@@ -0,0 +1,156 @@
+# test_pack.py -- Compatibility tests for git packs.
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests for git packs."""
+
+
+import binascii
+import os
+import re
+import shutil
+import tempfile
+
+from dulwich.pack import (
+    write_pack,
+    )
+from dulwich.objects import (
+    Blob,
+    )
+from dulwich.tests import (
+    SkipTest,
+    )
+from dulwich.tests.test_pack import (
+    a_sha,
+    pack1_sha,
+    PackTests,
+    )
+from dulwich.tests.compat.utils import (
+    require_git_version,
+    run_git_or_fail,
+    )
+
+_NON_DELTA_RE = re.compile(b'non delta: (?P<non_delta>\d+) objects')
+
+
+def _git_verify_pack_object_list(output):
+    pack_shas = set()
+    for line in output.splitlines():
+        sha = line[:40]
+        try:
+            binascii.unhexlify(sha)
+        except (TypeError, binascii.Error):
+            continue  # non-sha line
+        pack_shas.add(sha)
+    return pack_shas
+
+
+class TestPack(PackTests):
+    """Compatibility tests for reading and writing pack files."""
+
+    def setUp(self):
+        require_git_version((1, 5, 0))
+        super(TestPack, self).setUp()
+        self._tempdir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, self._tempdir)
+
+    def test_copy(self):
+        with self.get_pack(pack1_sha) as origpack:
+            self.assertSucceeds(origpack.index.check)
+            pack_path = os.path.join(self._tempdir, "Elch")
+            write_pack(pack_path, origpack.pack_tuples())
+            output = run_git_or_fail(['verify-pack', '-v', pack_path])
+            orig_shas = set(o.id for o in origpack.iterobjects())
+            self.assertEqual(orig_shas, _git_verify_pack_object_list(output))
+
+    def test_deltas_work(self):
+        with self.get_pack(pack1_sha) as orig_pack:
+            orig_blob = orig_pack[a_sha]
+            new_blob = Blob()
+            new_blob.data = orig_blob.data + b'x'
+            all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None)]
+        pack_path = os.path.join(self._tempdir, 'pack_with_deltas')
+        write_pack(pack_path, all_to_pack, deltify=True)
+        output = run_git_or_fail(['verify-pack', '-v', pack_path])
+        self.assertEqual(set(x[0].id for x in all_to_pack),
+                         _git_verify_pack_object_list(output))
+        # We specifically made a new blob that should be a delta
+        # against the blob a_sha, so make sure we really got only 3
+        # non-delta objects:
+        got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
+        self.assertEqual(
+            3, got_non_delta,
+            'Expected 3 non-delta objects, got %d' % got_non_delta)
+
+    def test_delta_medium_object(self):
+        # This tests an object set that will have a copy operation
+        # 2**20 in size.
+        with self.get_pack(pack1_sha) as orig_pack:
+            orig_blob = orig_pack[a_sha]
+            new_blob = Blob()
+            new_blob.data = orig_blob.data + (b'x' * 2 ** 20)
+            new_blob_2 = Blob()
+            new_blob_2.data = new_blob.data + b'y'
+            all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
+                                                           (new_blob_2, None)]
+        pack_path = os.path.join(self._tempdir, 'pack_with_deltas')
+        write_pack(pack_path, all_to_pack, deltify=True)
+        output = run_git_or_fail(['verify-pack', '-v', pack_path])
+        self.assertEqual(set(x[0].id for x in all_to_pack),
+                         _git_verify_pack_object_list(output))
+        # We specifically made a new blob that should be a delta
+        # against the blob a_sha, so make sure we really got only 3
+        # non-delta objects:
+        got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
+        self.assertEqual(
+            3, got_non_delta,
+            'Expected 3 non-delta objects, got %d' % got_non_delta)
+        # We expect one object to have a delta chain length of two
+        # (new_blob_2), so let's verify that actually happens:
+        self.assertIn(b'chain length = 2', output)
+
+    # This test is SUPER slow: over 80 seconds on a 2012-era
+    # laptop. This is because SequenceMatcher is worst-case quadratic
+    # on the input size. It's impractical to produce deltas for
+    # objects this large, but it's still worth doing the right thing
+    # when it happens.
+    def test_delta_large_object(self):
+        # This tests an object set that will have a copy operation
+        # 2**25 in size. This is a copy large enough that it requires
+        # two copy operations in git's binary delta format.
+        raise SkipTest('skipping slow, large test')
+        with self.get_pack(pack1_sha) as orig_pack:
+            new_blob = Blob()
+            new_blob.data = 'big blob' + ('x' * 2 ** 25)
+            new_blob_2 = Blob()
+            new_blob_2.data = new_blob.data + 'y'
+            all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
+                                                           (new_blob_2, None)]
+        pack_path = os.path.join(self._tempdir, "pack_with_deltas")
+        write_pack(pack_path, all_to_pack, deltify=True)
+        output = run_git_or_fail(['verify-pack', '-v', pack_path])
+        self.assertEqual(set(x[0].id for x in all_to_pack),
+                         _git_verify_pack_object_list(output))
+        # We specifically made a new blob that should be a delta
+        # against the blob a_sha, so make sure we really got only 4
+        # non-delta objects:
+        got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
+        self.assertEqual(
+            4, got_non_delta,
+            'Expected 4 non-delta objects, got %d' % got_non_delta)

+ 217 - 0
dulwich/tests/compat/test_repository.py

@@ -0,0 +1,217 @@
+# test_repo.py -- Git repo compatibility tests
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests for dulwich repositories."""
+
+
+from io import BytesIO
+from itertools import chain
+import os
+import tempfile
+
+from dulwich.objects import (
+    hex_to_sha,
+    )
+from dulwich.repo import (
+    check_ref_format,
+    Repo,
+    )
+from dulwich.tests.compat.utils import (
+    require_git_version,
+    rmtree_ro,
+    run_git_or_fail,
+    CompatTestCase,
+    )
+
+
+class ObjectStoreTestCase(CompatTestCase):
+    """Tests for git repository compatibility."""
+
+    def setUp(self):
+        super(ObjectStoreTestCase, self).setUp()
+        self._repo = self.import_repo('server_new.export')
+
+    def _run_git(self, args):
+        return run_git_or_fail(args, cwd=self._repo.path)
+
+    def _parse_refs(self, output):
+        refs = {}
+        for line in BytesIO(output):
+            fields = line.rstrip(b'\n').split(b' ')
+            self.assertEqual(3, len(fields))
+            refname, type_name, sha = fields
+            check_ref_format(refname[5:])
+            hex_to_sha(sha)
+            refs[refname] = (type_name, sha)
+        return refs
+
+    def _parse_objects(self, output):
+        return set(s.rstrip(b'\n').split(b' ')[0] for s in BytesIO(output))
+
+    def test_bare(self):
+        self.assertTrue(self._repo.bare)
+        self.assertFalse(os.path.exists(os.path.join(self._repo.path, '.git')))
+
+    def test_head(self):
+        output = self._run_git(['rev-parse', 'HEAD'])
+        head_sha = output.rstrip(b'\n')
+        hex_to_sha(head_sha)
+        self.assertEqual(head_sha, self._repo.refs[b'HEAD'])
+
+    def test_refs(self):
+        output = self._run_git(
+          ['for-each-ref', '--format=%(refname) %(objecttype) %(objectname)'])
+        expected_refs = self._parse_refs(output)
+
+        actual_refs = {}
+        for refname, sha in self._repo.refs.as_dict().items():
+            if refname == b'HEAD':
+                continue  # handled in test_head
+            obj = self._repo[sha]
+            self.assertEqual(sha, obj.id)
+            actual_refs[refname] = (obj.type_name, obj.id)
+        self.assertEqual(expected_refs, actual_refs)
+
+    # TODO(dborowitz): peeled ref tests
+
+    def _get_loose_shas(self):
+        output = self._run_git(
+            ['rev-list', '--all', '--objects', '--unpacked'])
+        return self._parse_objects(output)
+
+    def _get_all_shas(self):
+        output = self._run_git(['rev-list', '--all', '--objects'])
+        return self._parse_objects(output)
+
+    def assertShasMatch(self, expected_shas, actual_shas_iter):
+        actual_shas = set()
+        for sha in actual_shas_iter:
+            obj = self._repo[sha]
+            self.assertEqual(sha, obj.id)
+            actual_shas.add(sha)
+        self.assertEqual(expected_shas, actual_shas)
+
+    def test_loose_objects(self):
+        # TODO(dborowitz): This is currently not very useful since
+        # fast-imported repos only contained packed objects.
+        expected_shas = self._get_loose_shas()
+        self.assertShasMatch(expected_shas,
+                             self._repo.object_store._iter_loose_objects())
+
+    def test_packed_objects(self):
+        expected_shas = self._get_all_shas() - self._get_loose_shas()
+        self.assertShasMatch(expected_shas,
+                             chain(*self._repo.object_store.packs))
+
+    def test_all_objects(self):
+        expected_shas = self._get_all_shas()
+        self.assertShasMatch(expected_shas, iter(self._repo.object_store))
+
+
+class WorkingTreeTestCase(ObjectStoreTestCase):
+    """Test for compatibility with git-worktree."""
+
+    min_git_version = (2, 5, 0)
+
+    def create_new_worktree(self, repo_dir, branch):
+        """Create a new worktree using git-worktree.
+
+        :param repo_dir: The directory of the main working tree.
+        :param branch: The branch or commit to checkout in the new worktree.
+
+        :returns: The path to the new working tree.
+        """
+        temp_dir = tempfile.mkdtemp()
+        run_git_or_fail(['worktree', 'add', temp_dir, branch],
+                        cwd=repo_dir)
+        self.addCleanup(rmtree_ro, temp_dir)
+        return temp_dir
+
+    def setUp(self):
+        super(WorkingTreeTestCase, self).setUp()
+        self._worktree_path = self.create_new_worktree(
+            self._repo.path, 'branch')
+        self._worktree_repo = Repo(self._worktree_path)
+        self.addCleanup(self._worktree_repo.close)
+        self._mainworktree_repo = self._repo
+        self._number_of_working_tree = 2
+        self._repo = self._worktree_repo
+
+    def test_refs(self):
+        super(WorkingTreeTestCase, self).test_refs()
+        self.assertEqual(self._mainworktree_repo.refs.allkeys(),
+                         self._repo.refs.allkeys())
+
+    def test_head_equality(self):
+        self.assertNotEqual(self._repo.refs[b'HEAD'],
+                            self._mainworktree_repo.refs[b'HEAD'])
+
+    def test_bare(self):
+        self.assertFalse(self._repo.bare)
+        self.assertTrue(os.path.isfile(os.path.join(self._repo.path, '.git')))
+
+    def _parse_worktree_list(self, output):
+        worktrees = []
+        for line in BytesIO(output):
+            fields = line.rstrip(b'\n').split()
+            worktrees.append(tuple(f.decode() for f in fields))
+        return worktrees
+
+    def test_git_worktree_list(self):
+        # 'git worktree list' was introduced in 2.7.0
+        require_git_version((2, 7, 0))
+        output = run_git_or_fail(['worktree', 'list'], cwd=self._repo.path)
+        worktrees = self._parse_worktree_list(output)
+        self.assertEqual(len(worktrees), self._number_of_working_tree)
+        self.assertEqual(worktrees[0][1], '(bare)')
+        self.assertEqual(os.path.normcase(worktrees[0][0]),
+                         os.path.normcase(self._mainworktree_repo.path))
+
+        output = run_git_or_fail(
+            ['worktree', 'list'], cwd=self._mainworktree_repo.path)
+        worktrees = self._parse_worktree_list(output)
+        self.assertEqual(len(worktrees), self._number_of_working_tree)
+        self.assertEqual(worktrees[0][1], '(bare)')
+        self.assertEqual(os.path.normcase(worktrees[0][0]),
+                         os.path.normcase(self._mainworktree_repo.path))
+
+
+class InitNewWorkingDirectoryTestCase(WorkingTreeTestCase):
+    """Test compatibility of Repo.init_new_working_directory."""
+
+    min_git_version = (2, 5, 0)
+
+    def setUp(self):
+        super(InitNewWorkingDirectoryTestCase, self).setUp()
+        self._other_worktree = self._repo
+        worktree_repo_path = tempfile.mkdtemp()
+        self.addCleanup(rmtree_ro, worktree_repo_path)
+        self._repo = Repo._init_new_working_directory(
+            worktree_repo_path, self._mainworktree_repo)
+        self.addCleanup(self._repo.close)
+        self._number_of_working_tree = 3
+
+    def test_head_equality(self):
+        self.assertEqual(self._repo.refs[b'HEAD'],
+                         self._mainworktree_repo.refs[b'HEAD'])
+
+    def test_bare(self):
+        self.assertFalse(self._repo.bare)
+        self.assertTrue(os.path.isfile(os.path.join(self._repo.path, '.git')))

+ 100 - 0
dulwich/tests/compat/test_server.py

@@ -0,0 +1,100 @@
+# test_server.py -- Compatibility tests for git server.
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests between Dulwich and the cgit server.
+
+Warning: these tests should be fairly stable, but when writing/debugging new
+    tests, deadlocks may freeze the test process such that it cannot be
+    Ctrl-C'ed. On POSIX systems, you can kill the tests with Ctrl-Z, "kill %".
+"""
+
+import threading
+import os
+import sys
+
+from dulwich.server import (
+    DictBackend,
+    TCPGitServer,
+    )
+from dulwich.tests import skipIf
+from dulwich.tests.compat.server_utils import (
+    ServerTests,
+    NoSideBand64kReceivePackHandler,
+    )
+from dulwich.tests.compat.utils import (
+    CompatTestCase,
+    require_git_version,
+    )
+
+
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
+class GitServerTestCase(ServerTests, CompatTestCase):
+    """Tests for client/server compatibility.
+
+    This server test case does not use side-band-64k in git-receive-pack.
+    """
+
+    protocol = 'git'
+
+    def _handlers(self):
+        return {b'git-receive-pack': NoSideBand64kReceivePackHandler}
+
+    def _check_server(self, dul_server):
+        receive_pack_handler_cls = dul_server.handlers[b'git-receive-pack']
+        caps = receive_pack_handler_cls.capabilities()
+        self.assertFalse(b'side-band-64k' in caps)
+
+    def _start_server(self, repo):
+        backend = DictBackend({b'/': repo})
+        dul_server = TCPGitServer(backend, b'localhost', 0,
+                                  handlers=self._handlers())
+        self._check_server(dul_server)
+        self.addCleanup(dul_server.shutdown)
+        self.addCleanup(dul_server.server_close)
+        threading.Thread(target=dul_server.serve).start()
+        self._server = dul_server
+        _, port = self._server.socket.getsockname()
+        return port
+
+
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
+class GitServerSideBand64kTestCase(GitServerTestCase):
+    """Tests for client/server compatibility with side-band-64k support."""
+
+    # side-band-64k in git-receive-pack was introduced in git 1.7.0.2
+    min_git_version = (1, 7, 0, 2)
+
+    def setUp(self):
+        super(GitServerSideBand64kTestCase, self).setUp()
+        # side-band-64k is broken in the widows client.
+        # https://github.com/msysgit/git/issues/101
+        # Fix has landed for the 1.9.3 release.
+        if os.name == 'nt':
+            require_git_version((1, 9, 3))
+
+    def _handlers(self):
+        return None  # default handlers include side-band-64k
+
+    def _check_server(self, server):
+        receive_pack_handler_cls = server.handlers[b'git-receive-pack']
+        caps = receive_pack_handler_cls.capabilities()
+        self.assertTrue(b'side-band-64k' in caps)

+ 93 - 0
dulwich/tests/compat/test_utils.py

@@ -0,0 +1,93 @@
+# test_utils.py -- Tests for git compatibility utilities
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Tests for git compatibility utilities."""
+
+from dulwich.tests import (
+    SkipTest,
+    TestCase,
+    )
+from dulwich.tests.compat import utils
+
+
+class GitVersionTests(TestCase):
+
+    def setUp(self):
+        super(GitVersionTests, self).setUp()
+        self._orig_run_git = utils.run_git
+        self._version_str = None  # tests can override to set stub version
+
+        def run_git(args, **unused_kwargs):
+            self.assertEqual(['--version'], args)
+            return 0, self._version_str
+        utils.run_git = run_git
+
+    def tearDown(self):
+        super(GitVersionTests, self).tearDown()
+        utils.run_git = self._orig_run_git
+
+    def test_git_version_none(self):
+        self._version_str = b'not a git version'
+        self.assertEqual(None, utils.git_version())
+
+    def test_git_version_3(self):
+        self._version_str = b'git version 1.6.6'
+        self.assertEqual((1, 6, 6, 0), utils.git_version())
+
+    def test_git_version_4(self):
+        self._version_str = b'git version 1.7.0.2'
+        self.assertEqual((1, 7, 0, 2), utils.git_version())
+
+    def test_git_version_extra(self):
+        self._version_str = b'git version 1.7.0.3.295.gd8fa2'
+        self.assertEqual((1, 7, 0, 3), utils.git_version())
+
+    def assertRequireSucceeds(self, required_version):
+        try:
+            utils.require_git_version(required_version)
+        except SkipTest:
+            self.fail()
+
+    def assertRequireFails(self, required_version):
+        self.assertRaises(SkipTest, utils.require_git_version,
+                          required_version)
+
+    def test_require_git_version(self):
+        try:
+            self._version_str = b'git version 1.6.6'
+            self.assertRequireSucceeds((1, 6, 6))
+            self.assertRequireSucceeds((1, 6, 6, 0))
+            self.assertRequireSucceeds((1, 6, 5))
+            self.assertRequireSucceeds((1, 6, 5, 99))
+            self.assertRequireFails((1, 7, 0))
+            self.assertRequireFails((1, 7, 0, 2))
+            self.assertRaises(ValueError, utils.require_git_version,
+                              (1, 6, 6, 0, 0))
+
+            self._version_str = b'git version 1.7.0.2'
+            self.assertRequireSucceeds((1, 6, 6))
+            self.assertRequireSucceeds((1, 6, 6, 0))
+            self.assertRequireSucceeds((1, 7, 0))
+            self.assertRequireSucceeds((1, 7, 0, 2))
+            self.assertRequireFails((1, 7, 0, 3))
+            self.assertRequireFails((1, 7, 1))
+        except SkipTest as e:
+            # This test is designed to catch all SkipTest exceptions.
+            self.fail('Test unexpectedly skipped: %s' % e)

+ 206 - 0
dulwich/tests/compat/test_web.py

@@ -0,0 +1,206 @@
+# test_web.py -- Compatibility tests for the git web server.
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Compatibility tests between Dulwich and the cgit HTTP server.
+
+warning: these tests should be fairly stable, but when writing/debugging new
+    tests, deadlocks may freeze the test process such that it cannot be
+    Ctrl-C'ed. On POSIX systems, you can kill the tests with Ctrl-Z, "kill %".
+"""
+
+import threading
+from wsgiref import simple_server
+import sys
+
+from dulwich.server import (
+    DictBackend,
+    UploadPackHandler,
+    ReceivePackHandler,
+    )
+from dulwich.tests import (
+    SkipTest,
+    skipIf,
+    )
+from dulwich.web import (
+    make_wsgi_chain,
+    HTTPGitApplication,
+    WSGIRequestHandlerLogger,
+    WSGIServerLogger,
+    )
+
+from dulwich.tests.compat.server_utils import (
+    ServerTests,
+    NoSideBand64kReceivePackHandler,
+    )
+from dulwich.tests.compat.utils import (
+    CompatTestCase,
+    )
+
+
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
+class WebTests(ServerTests):
+    """Base tests for web server tests.
+
+    Contains utility and setUp/tearDown methods, but does non inherit from
+    TestCase so tests are not automatically run.
+    """
+
+    protocol = 'http'
+
+    def _start_server(self, repo):
+        backend = DictBackend({'/': repo})
+        app = self._make_app(backend)
+        dul_server = simple_server.make_server(
+          'localhost', 0, app, server_class=WSGIServerLogger,
+          handler_class=WSGIRequestHandlerLogger)
+        self.addCleanup(dul_server.shutdown)
+        self.addCleanup(dul_server.server_close)
+        threading.Thread(target=dul_server.serve_forever).start()
+        self._server = dul_server
+        _, port = dul_server.socket.getsockname()
+        return port
+
+
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
+class SmartWebTestCase(WebTests, CompatTestCase):
+    """Test cases for smart HTTP server.
+
+    This server test case does not use side-band-64k in git-receive-pack.
+    """
+
+    min_git_version = (1, 6, 6)
+
+    def _handlers(self):
+        return {b'git-receive-pack': NoSideBand64kReceivePackHandler}
+
+    def _check_app(self, app):
+        receive_pack_handler_cls = app.handlers[b'git-receive-pack']
+        caps = receive_pack_handler_cls.capabilities()
+        self.assertNotIn(b'side-band-64k', caps)
+
+    def _make_app(self, backend):
+        app = make_wsgi_chain(backend, handlers=self._handlers())
+        to_check = app
+        # peel back layers until we're at the base application
+        while not issubclass(to_check.__class__, HTTPGitApplication):
+            to_check = to_check.app
+        self._check_app(to_check)
+        return app
+
+
+def patch_capabilities(handler, caps_removed):
+    # Patch a handler's capabilities by specifying a list of them to be
+    # removed, and return the original classmethod for restoration.
+    original_capabilities = handler.capabilities
+    filtered_capabilities = tuple(
+        i for i in original_capabilities() if i not in caps_removed)
+
+    def capabilities(cls):
+        return filtered_capabilities
+    handler.capabilities = classmethod(capabilities)
+    return original_capabilities
+
+
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
+class SmartWebSideBand64kTestCase(SmartWebTestCase):
+    """Test cases for smart HTTP server with side-band-64k support."""
+
+    # side-band-64k in git-receive-pack was introduced in git 1.7.0.2
+    min_git_version = (1, 7, 0, 2)
+
+    def setUp(self):
+        self.o_uph_cap = patch_capabilities(UploadPackHandler, (b"no-done",))
+        self.o_rph_cap = patch_capabilities(ReceivePackHandler, (b"no-done",))
+        super(SmartWebSideBand64kTestCase, self).setUp()
+
+    def tearDown(self):
+        super(SmartWebSideBand64kTestCase, self).tearDown()
+        UploadPackHandler.capabilities = self.o_uph_cap
+        ReceivePackHandler.capabilities = self.o_rph_cap
+
+    def _handlers(self):
+        return None  # default handlers include side-band-64k
+
+    def _check_app(self, app):
+        receive_pack_handler_cls = app.handlers[b'git-receive-pack']
+        caps = receive_pack_handler_cls.capabilities()
+        self.assertIn(b'side-band-64k', caps)
+        self.assertNotIn(b'no-done', caps)
+
+
+class SmartWebSideBand64kNoDoneTestCase(SmartWebTestCase):
+    """Test cases for smart HTTP server with side-band-64k and no-done
+    support.
+    """
+
+    # no-done was introduced in git 1.7.4
+    min_git_version = (1, 7, 4)
+
+    def _handlers(self):
+        return None  # default handlers include side-band-64k
+
+    def _check_app(self, app):
+        receive_pack_handler_cls = app.handlers[b'git-receive-pack']
+        caps = receive_pack_handler_cls.capabilities()
+        self.assertIn(b'side-band-64k', caps)
+        self.assertIn(b'no-done', caps)
+
+
+@skipIf(sys.platform == 'win32',
+        'Broken on windows, with very long fail time.')
+class DumbWebTestCase(WebTests, CompatTestCase):
+    """Test cases for dumb HTTP server."""
+
+    def _make_app(self, backend):
+        return make_wsgi_chain(backend, dumb=True)
+
+    def test_push_to_dulwich(self):
+        # Note: remove this if dulwich implements dumb web pushing.
+        raise SkipTest('Dumb web pushing not supported.')
+
+    def test_push_to_dulwich_remove_branch(self):
+        # Note: remove this if dumb pushing is supported
+        raise SkipTest('Dumb web pushing not supported.')
+
+    def test_new_shallow_clone_from_dulwich(self):
+        # Note: remove this if C git and dulwich implement dumb web shallow
+        # clones.
+        raise SkipTest('Dumb web shallow cloning not supported.')
+
+    def test_shallow_clone_from_git_is_identical(self):
+        # Note: remove this if C git and dulwich implement dumb web shallow
+        # clones.
+        raise SkipTest('Dumb web shallow cloning not supported.')
+
+    def test_fetch_same_depth_into_shallow_clone_from_dulwich(self):
+        # Note: remove this if C git and dulwich implement dumb web shallow
+        # clones.
+        raise SkipTest('Dumb web shallow cloning not supported.')
+
+    def test_fetch_full_depth_into_shallow_clone_from_dulwich(self):
+        # Note: remove this if C git and dulwich implement dumb web shallow
+        # clones.
+        raise SkipTest('Dumb web shallow cloning not supported.')
+
+    def test_push_to_dulwich_issue_88_standard(self):
+        raise SkipTest('Dumb web pushing not supported.')

+ 256 - 0
dulwich/tests/compat/utils.py

@@ -0,0 +1,256 @@
+# utils.py -- Git compatibility utilities
+# Copyright (C) 2010 Google, Inc.
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+"""Utilities for interacting with cgit."""
+
+import errno
+import functools
+import os
+import shutil
+import socket
+import stat
+import subprocess
+import sys
+import tempfile
+import time
+
+from dulwich.repo import Repo
+from dulwich.protocol import TCP_GIT_PORT
+
+from dulwich.tests import (
+    SkipTest,
+    TestCase,
+    )
+
+_DEFAULT_GIT = 'git'
+_VERSION_LEN = 4
+_REPOS_DATA_DIR = os.path.abspath(os.path.join(
+    os.path.dirname(__file__), os.pardir, 'data', 'repos'))
+
+
+def git_version(git_path=_DEFAULT_GIT):
+    """Attempt to determine the version of git currently installed.
+
+    :param git_path: Path to the git executable; defaults to the version in
+        the system path.
+    :return: A tuple of ints of the form (major, minor, point, sub-point), or
+        None if no git installation was found.
+    """
+    try:
+        output = run_git_or_fail(['--version'], git_path=git_path)
+    except OSError:
+        return None
+    version_prefix = b'git version '
+    if not output.startswith(version_prefix):
+        return None
+
+    parts = output[len(version_prefix):].split(b'.')
+    nums = []
+    for part in parts:
+        try:
+            nums.append(int(part))
+        except ValueError:
+            break
+
+    while len(nums) < _VERSION_LEN:
+        nums.append(0)
+    return tuple(nums[:_VERSION_LEN])
+
+
+def require_git_version(required_version, git_path=_DEFAULT_GIT):
+    """Require git version >= version, or skip the calling test.
+
+    :param required_version: A tuple of ints of the form (major, minor, point,
+        sub-point); ommitted components default to 0.
+    :param git_path: Path to the git executable; defaults to the version in
+        the system path.
+    :raise ValueError: if the required version tuple has too many parts.
+    :raise SkipTest: if no suitable git version was found at the given path.
+    """
+    found_version = git_version(git_path=git_path)
+    if found_version is None:
+        raise SkipTest('Test requires git >= %s, but c git not found' %
+                       (required_version, ))
+
+    if len(required_version) > _VERSION_LEN:
+        raise ValueError('Invalid version tuple %s, expected %i parts' %
+                         (required_version, _VERSION_LEN))
+
+    required_version = list(required_version)
+    while len(found_version) < len(required_version):
+        required_version.append(0)
+    required_version = tuple(required_version)
+
+    if found_version < required_version:
+        required_version = '.'.join(map(str, required_version))
+        found_version = '.'.join(map(str, found_version))
+        raise SkipTest('Test requires git >= %s, found %s' %
+                       (required_version, found_version))
+
+
+def run_git(args, git_path=_DEFAULT_GIT, input=None, capture_stdout=False,
+            **popen_kwargs):
+    """Run a git command.
+
+    Input is piped from the input parameter and output is sent to the standard
+    streams, unless capture_stdout is set.
+
+    :param args: A list of args to the git command.
+    :param git_path: Path to to the git executable.
+    :param input: Input data to be sent to stdin.
+    :param capture_stdout: Whether to capture and return stdout.
+    :param popen_kwargs: Additional kwargs for subprocess.Popen;
+        stdin/stdout args are ignored.
+    :return: A tuple of (returncode, stdout contents). If capture_stdout is
+        False, None will be returned as stdout contents.
+    :raise OSError: if the git executable was not found.
+    """
+
+    env = popen_kwargs.pop('env', {})
+    env['LC_ALL'] = env['LANG'] = 'C'
+
+    args = [git_path] + args
+    popen_kwargs['stdin'] = subprocess.PIPE
+    if capture_stdout:
+        popen_kwargs['stdout'] = subprocess.PIPE
+    else:
+        popen_kwargs.pop('stdout', None)
+    p = subprocess.Popen(args, env=env, **popen_kwargs)
+    stdout, stderr = p.communicate(input=input)
+    return (p.returncode, stdout)
+
+
+def run_git_or_fail(args, git_path=_DEFAULT_GIT, input=None, **popen_kwargs):
+    """Run a git command, capture stdout/stderr, and fail if git fails."""
+    if 'stderr' not in popen_kwargs:
+        popen_kwargs['stderr'] = subprocess.STDOUT
+    returncode, stdout = run_git(args, git_path=git_path, input=input,
+                                 capture_stdout=True, **popen_kwargs)
+    if returncode != 0:
+        raise AssertionError("git with args %r failed with %d: %r" % (
+            args, returncode, stdout))
+    return stdout
+
+
+def import_repo_to_dir(name):
+    """Import a repo from a fast-export file in a temporary directory.
+
+    These are used rather than binary repos for compat tests because they are
+    more compact and human-editable, and we already depend on git.
+
+    :param name: The name of the repository export file, relative to
+        dulwich/tests/data/repos.
+    :returns: The path to the imported repository.
+    """
+    temp_dir = tempfile.mkdtemp()
+    export_path = os.path.join(_REPOS_DATA_DIR, name)
+    temp_repo_dir = os.path.join(temp_dir, name)
+    export_file = open(export_path, 'rb')
+    run_git_or_fail(['init', '--quiet', '--bare', temp_repo_dir])
+    run_git_or_fail(['fast-import'], input=export_file.read(),
+                    cwd=temp_repo_dir)
+    export_file.close()
+    return temp_repo_dir
+
+
+def check_for_daemon(limit=10, delay=0.1, timeout=0.1, port=TCP_GIT_PORT):
+    """Check for a running TCP daemon.
+
+    Defaults to checking 10 times with a delay of 0.1 sec between tries.
+
+    :param limit: Number of attempts before deciding no daemon is running.
+    :param delay: Delay between connection attempts.
+    :param timeout: Socket timeout for connection attempts.
+    :param port: Port on which we expect the daemon to appear.
+    :returns: A boolean, true if a daemon is running on the specified port,
+        false if not.
+    """
+    for _ in range(limit):
+        time.sleep(delay)
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        s.settimeout(delay)
+        try:
+            s.connect(('localhost', port))
+            return True
+        except socket.timeout:
+            pass
+        except socket.error as e:
+            if getattr(e, 'errno', False) and e.errno != errno.ECONNREFUSED:
+                raise
+            elif e.args[0] != errno.ECONNREFUSED:
+                raise
+        finally:
+            s.close()
+    return False
+
+
+class CompatTestCase(TestCase):
+    """Test case that requires git for compatibility checks.
+
+    Subclasses can change the git version required by overriding
+    min_git_version.
+    """
+
+    min_git_version = (1, 5, 0)
+
+    def setUp(self):
+        super(CompatTestCase, self).setUp()
+        require_git_version(self.min_git_version)
+
+    def assertObjectStoreEqual(self, store1, store2):
+        self.assertEqual(sorted(set(store1)), sorted(set(store2)))
+
+    def assertReposEqual(self, repo1, repo2):
+        self.assertEqual(repo1.get_refs(), repo2.get_refs())
+        self.assertObjectStoreEqual(repo1.object_store, repo2.object_store)
+
+    def assertReposNotEqual(self, repo1, repo2):
+        refs1 = repo1.get_refs()
+        objs1 = set(repo1.object_store)
+        refs2 = repo2.get_refs()
+        objs2 = set(repo2.object_store)
+        self.assertFalse(refs1 == refs2 and objs1 == objs2)
+
+    def import_repo(self, name):
+        """Import a repo from a fast-export file in a temporary directory.
+
+        :param name: The name of the repository export file, relative to
+            dulwich/tests/data/repos.
+        :returns: An initialized Repo object that lives in a temporary
+            directory.
+        """
+        path = import_repo_to_dir(name)
+        repo = Repo(path)
+
+        def cleanup():
+            repo.close()
+            rmtree_ro(os.path.dirname(path.rstrip(os.sep)))
+        self.addCleanup(cleanup)
+        return repo
+
+
+if sys.platform == 'win32':
+    def remove_ro(action, name, exc):
+        os.chmod(name, stat.S_IWRITE)
+        os.remove(name)
+
+    rmtree_ro = functools.partial(shutil.rmtree, onerror=remove_ro)
+else:
+    rmtree_ro = shutil.rmtree

BIN
dulwich/tests/data/blobs/11/11111111111111111111111111111111111111


BIN
dulwich/tests/data/blobs/6f/670c0fb53f9463760b7295fbb814e965fb20c8


BIN
dulwich/tests/data/blobs/95/4a536f7819d40e6f637f849ee187dd10066349


BIN
dulwich/tests/data/blobs/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391


+ 2 - 0
dulwich/tests/data/commits/0d/89f20333fbb1d2f3a94da77f4981373d8f4310

@@ -0,0 +1,2 @@
+x°╔█K
+б@]о)z░Ы53в╝в==-&░▓√ЮМ█Я╬e=╗Бi:вЗ┐н"пZ≈=╒ГH)╒╟╖░╘r┬Х┼■З░°╛║≈>╖╝4хwY╢╪Ат╞M∙ряxИ©|щЯq=┐s)&л6DhЛ6ц{YЕ┤╧m/ЭLФXg?╚

BIN
dulwich/tests/data/commits/5d/ac377bdded4c9aeb8dff595f0faeebcc8498cc


+ 2 - 0
dulwich/tests/data/commits/60/dacdc733de308bb77bb76ce0fb0f9b44c9769e

@@ -0,0 +1,2 @@
+xœ¥ŽË
+Â0E]ç+f/ÈäÑN"®]®g’	¶Ð*5"þ½¯Oð.Ï…ÃÉ—i¸€«¶¨a¶	UB¬E¤ÃRrŸ[’²P´\©öæÊ‹Î
°ÄTz靖-®zN¡0Q
)ZO¾Ä¼EÃ÷v¾,pàIopÒ[“'lÇǺ¨<ïÇ|ñfÖ¶k)P—œGXã{&K›þã0Ç÷?“y´MQ

BIN
dulwich/tests/data/indexes/index


BIN
dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.idx


BIN
dulwich/tests/data/packs/pack-bc63ddad95e7321ee734ea11a7a62d314e0d7481.pack


+ 1 - 0
dulwich/tests/data/repos/.gitattributes

@@ -0,0 +1 @@
+*.export eol=lf

+ 1 - 0
dulwich/tests/data/repos/a.git/HEAD

@@ -0,0 +1 @@
+ref: refs/heads/master

+ 2 - 0
dulwich/tests/data/repos/a.git/objects/28/237f4dc30d0d462658d6b937b08a0f0b6ef55a

@@ -0,0 +1,2 @@
+x5ÌA
+Â0…a×9Å\@™¦i›�""ÁLÚ1T"uPêéMA7�oó~å•ó»î2(0á�íHˆ\uB\]ÛMÞN‚c+ÄH�Ñõ!0ä”&5Zi-»)Ê~	œó’ß“~ Ã�§˜sœåP~G¨lÛÖ®Á†`�јkéÌüÔ÷ÀN0—

BIN
dulwich/tests/data/repos/a.git/objects/2a/72d929692c41d8554c07f6301757ba18a65d91


BIN
dulwich/tests/data/repos/a.git/objects/4e/f30bbfe26431a69c3820d3a683df54d688f2ec


BIN
dulwich/tests/data/repos/a.git/objects/4f/2e6529203aa6d44b5af6e3292c837ceda003f9


برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است