Sfoglia il codice sorgente

Imported Upstream version 0.10.0

Jelmer Vernooij 10 anni fa
parent
commit
a84207213c

+ 14 - 0
HACKING

@@ -25,3 +25,17 @@ will run the tests using unittest on Python 2.7 and higher, and using
 unittest2 (which you will need to have installed) on older versions of Python.
 unittest2 (which you will need to have installed) on older versions of Python.
 
 
  $ make check
  $ make check
+
+String Types
+------------
+Like Linux, Git treats filenames as arbitrary bytestrings. There is no prescribed
+encoding for these strings, and although it is fairly common to use UTF-8, anything
+can and is used as encoding with Git.
+
+For this reason, Dulwich internally treats git-based filenames as bytestrings. It is up
+to the Dulwich API user to encode and decode them if necessary.
+
+* git-repository related filenames: bytes
+* object sha1 digests (20 bytes long): bytes
+* object sha1 hexdigests (40 bytes long): str (bytestrings on python2, strings on python3)
+

+ 47 - 0
NEWS

@@ -1,3 +1,50 @@
+0.10.0	2015-03-22
+
+ BUG FIXES
+
+  * In dulwich.index.build_index_from_tree, by default
+    refuse to create entries that start with .git/.
+
+  * Fix running of testsuite when installed.
+    (Jelmer Vernooij, #223)
+
+  * Use a block cache in _find_content_rename_candidates(),
+    improving performance. (Mike Williams)
+
+  * Add support for ``core.protectNTFS`` setting.
+    (Jelmer Vernooij)
+
+  * Fix TypeError when fetching empty updates.
+    (Hwee Miin Koh)
+
+  * Resolve delta refs when pulling into a MemoryRepo.
+    (Max Shawabkeh, #256)
+
+  * Fix handling of tags of non-commits in missing object finder.
+    (Augie Fackler, #211)
+
+  * Explicitly disable mmap on plan9 where it doesn't work.
+    (Jeff Sickel)
+
+ IMPROVEMENTS
+
+  * New public method `Repo.reset_index`. (Jelmer Vernooij)
+
+  * Prevent duplicate parsing of loose files in objects
+    directory when reading. Thanks to David Keijser for the
+    report. (Jelmer Vernooij, #231)
+
+0.9.9	2015-03-20
+
+ SECURITY BUG FIXES
+
+  * Fix buffer overflow in C implementation of pack apply_delta().
+    (CVE-2015-0838)
+
+    Thanks to Ivan Fratric of the Google Security Team for
+    reporting this issue.
+    (Jelmer Vernooij)
+
 0.9.8	2014-11-30
 0.9.8	2014-11-30
 
 
  BUG FIXES
  BUG FIXES

+ 1 - 1
PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Metadata-Version: 1.1
 Name: dulwich
 Name: dulwich
-Version: 0.9.8
+Version: 0.10.0
 Summary: Python Git Library
 Summary: Python Git Library
 Home-page: https://samba.org/~jelmer/dulwich
 Home-page: https://samba.org/~jelmer/dulwich
 Author: Jelmer Vernooij
 Author: Jelmer Vernooij

+ 16 - 1
README.md

@@ -11,6 +11,21 @@ Author: Jelmer Vernooij <jelmer@samba.org>
 The project is named after the part of London that Mr. and Mrs. Git live in
 The project is named after the part of London that Mr. and Mrs. Git live in
 in the particular Monty Python sketch.
 in the particular Monty Python sketch.
 
 
+Installation
+------------
+
+By default, Dulwich' setup.py will attempt to build and install the optional C
+extensions. The reason for this is that they significantly improve the performance
+since some low-level operations that are executed often are much slower in CPython.
+
+If you don't want to install the C bindings, specify the --pure argument to setup.py::
+
+    $ python setup.py --pure install
+
+or if you are installing from pip:
+
+    $ pip install dulwich --global-option="--pure"
+
 Further documentation
 Further documentation
 ---------------------
 ---------------------
 
 
@@ -31,6 +46,6 @@ https://launchpad.net/~dulwich-users.
 Python3
 Python3
 -------
 -------
 
 
-The process of porting to Python3 is ongoing. Please not that although the
+The process of porting to Python3 is ongoing. Please note that although the
 test suite pass in python3, this is due to the tests of features that are not
 test suite pass in python3, this is due to the tests of features that are not
 yet ported being skipped, and *not* an indication that the port is complete.
 yet ported being skipped, and *not* an indication that the port is complete.

+ 1 - 1
dulwich.egg-info/PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Metadata-Version: 1.1
 Name: dulwich
 Name: dulwich
-Version: 0.9.8
+Version: 0.10.0
 Summary: Python Git Library
 Summary: Python Git Library
 Home-page: https://samba.org/~jelmer/dulwich
 Home-page: https://samba.org/~jelmer/dulwich
 Author: Jelmer Vernooij
 Author: Jelmer Vernooij

+ 1 - 1
dulwich/__init__.py

@@ -21,4 +21,4 @@
 
 
 """Python implementation of the Git file formats and protocols."""
 """Python implementation of the Git file formats and protocols."""
 
 
-__version__ = (0, 9, 8)
+__version__ = (0, 10, 0)

+ 5 - 1
dulwich/_pack.c

@@ -146,10 +146,14 @@ static PyObject *py_apply_delta(PyObject *self, PyObject *args)
 				break;
 				break;
 			memcpy(out+outindex, src_buf+cp_off, cp_size);
 			memcpy(out+outindex, src_buf+cp_off, cp_size);
 			outindex += cp_size;
 			outindex += cp_size;
+			dest_size -= cp_size;
 		} else if (cmd != 0) {
 		} else if (cmd != 0) {
+			if (cmd > dest_size)
+				break;
 			memcpy(out+outindex, delta+index, cmd);
 			memcpy(out+outindex, delta+index, cmd);
 			outindex += cmd;
 			outindex += cmd;
 			index += cmd;
 			index += cmd;
+			dest_size -= cmd;
 		} else {
 		} else {
 			PyErr_SetString(PyExc_ValueError, "Invalid opcode 0");
 			PyErr_SetString(PyExc_ValueError, "Invalid opcode 0");
 			Py_DECREF(ret);
 			Py_DECREF(ret);
@@ -167,7 +171,7 @@ static PyObject *py_apply_delta(PyObject *self, PyObject *args)
 		return NULL;
 		return NULL;
 	}
 	}
 
 
-	if (dest_size != outindex) {
+	if (dest_size != 0) {
 		PyErr_SetString(PyExc_ValueError, "dest size incorrect");
 		PyErr_SetString(PyExc_ValueError, "dest size incorrect");
 		Py_DECREF(ret);
 		Py_DECREF(ret);
 		return NULL;
 		return NULL;

+ 25 - 2
dulwich/client.py

@@ -616,7 +616,8 @@ class SubprocessWrapper(object):
             from msvcrt import get_osfhandle
             from msvcrt import get_osfhandle
             from win32pipe import PeekNamedPipe
             from win32pipe import PeekNamedPipe
             handle = get_osfhandle(self.proc.stdout.fileno())
             handle = get_osfhandle(self.proc.stdout.fileno())
-            return PeekNamedPipe(handle, 0)[2] != 0
+            data, total_bytes_avail, msg_bytes_left = PeekNamedPipe(handle, 0)
+            return total_bytes_avail != 0
         else:
         else:
             return _fileno_can_read(self.proc.stdout.fileno())
             return _fileno_can_read(self.proc.stdout.fileno())
 
 
@@ -679,7 +680,29 @@ class LocalGitClient(GitClient):
         :raises UpdateRefsError: if the server supports report-status
         :raises UpdateRefsError: if the server supports report-status
                                  and rejects ref updates
                                  and rejects ref updates
         """
         """
-        raise NotImplementedError(self.send_pack)
+        from dulwich.repo import Repo
+
+        target = Repo(path)
+        old_refs = target.get_refs()
+        new_refs = determine_wants(old_refs)
+
+        have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
+        want = []
+        for refname in set(new_refs.keys() + old_refs.keys()):
+            old_sha1 = old_refs.get(refname, ZERO_SHA)
+            new_sha1 = new_refs.get(refname, ZERO_SHA)
+            if new_sha1 not in have and new_sha1 != ZERO_SHA:
+                want.append(new_sha1)
+
+        if not want and old_refs == new_refs:
+            return new_refs
+
+        target.object_store.add_objects(generate_pack_contents(have, want))
+
+        for name, sha in new_refs.iteritems():
+            target.refs[name] = sha
+
+        return new_refs
 
 
     def fetch(self, path, target, determine_wants=None, progress=None):
     def fetch(self, path, target, determine_wants=None, progress=None):
         """Fetch into a target repository.
         """Fetch into a target repository.

+ 19 - 12
dulwich/diff_tree.py

@@ -17,7 +17,7 @@
 # MA  02110-1301, USA.
 # MA  02110-1301, USA.
 
 
 """Utilities for diffing files and trees."""
 """Utilities for diffing files and trees."""
-
+import sys
 from collections import (
 from collections import (
     defaultdict,
     defaultdict,
     namedtuple,
     namedtuple,
@@ -27,16 +27,20 @@ from io import BytesIO
 from itertools import chain
 from itertools import chain
 import stat
 import stat
 
 
-try:
-    from itertools import izip
-except ImportError:
-    # Python3
+if sys.version_info[0] == 3:
+    xrange = range
     izip = zip
     izip = zip
+    iteritems = lambda d: d.items()
+else:
+    from itertools import izip
+    iteritems = lambda d: d.iteritems()
+
 from dulwich.objects import (
 from dulwich.objects import (
     S_ISGITLINK,
     S_ISGITLINK,
     TreeEntry,
     TreeEntry,
     )
     )
 
 
+
 # TreeChange type constants.
 # TreeChange type constants.
 CHANGE_ADD = 'add'
 CHANGE_ADD = 'add'
 CHANGE_MODIFY = 'modify'
 CHANGE_MODIFY = 'modify'
@@ -140,7 +144,7 @@ def walk_trees(store, tree1_id, tree2_id, prune_identical=False):
     # case.
     # case.
     mode1 = tree1_id and stat.S_IFDIR or None
     mode1 = tree1_id and stat.S_IFDIR or None
     mode2 = tree2_id and stat.S_IFDIR or None
     mode2 = tree2_id and stat.S_IFDIR or None
-    todo = [(TreeEntry('', mode1, tree1_id), TreeEntry('', mode2, tree2_id))]
+    todo = [(TreeEntry(b'', mode1, tree1_id), TreeEntry(b'', mode2, tree2_id))]
     while todo:
     while todo:
         entry1, entry2 = todo.pop()
         entry1, entry2 = todo.pop()
         is_tree1 = _is_tree(entry1)
         is_tree1 = _is_tree(entry1)
@@ -261,7 +265,7 @@ def tree_changes_for_merge(store, parent_tree_ids, tree_id,
     change_type = lambda c: c.type
     change_type = lambda c: c.type
 
 
     # Yield only conflicting changes.
     # Yield only conflicting changes.
-    for _, changes in sorted(changes_by_path.iteritems()):
+    for _, changes in sorted(iteritems(changes_by_path)):
         assert len(changes) == num_parents
         assert len(changes) == num_parents
         have = [c for c in changes if c is not None]
         have = [c for c in changes if c is not None]
         if _all_eq(have, change_type, CHANGE_DELETE):
         if _all_eq(have, change_type, CHANGE_DELETE):
@@ -298,9 +302,11 @@ def _count_blocks(obj):
     block_getvalue = block.getvalue
     block_getvalue = block.getvalue
 
 
     for c in chain(*obj.as_raw_chunks()):
     for c in chain(*obj.as_raw_chunks()):
+        if sys.version_info[0] == 3:
+            c = c.to_bytes(1, 'big')
         block_write(c)
         block_write(c)
         n += 1
         n += 1
-        if c == '\n' or n == _BLOCK_SIZE:
+        if c == b'\n' or n == _BLOCK_SIZE:
             value = block_getvalue()
             value = block_getvalue()
             block_counts[hash(value)] += len(value)
             block_counts[hash(value)] += len(value)
             block_seek(0)
             block_seek(0)
@@ -324,7 +330,7 @@ def _common_bytes(blocks1, blocks2):
     if len(blocks1) > len(blocks2):
     if len(blocks1) > len(blocks2):
         blocks1, blocks2 = blocks2, blocks1
         blocks1, blocks2 = blocks2, blocks1
     score = 0
     score = 0
-    for block, count1 in blocks1.iteritems():
+    for block, count1 in iteritems(blocks1):
         count2 = blocks2.get(block)
         count2 = blocks2.get(block)
         if count2:
         if count2:
             score += min(count1, count2)
             score += min(count1, count2)
@@ -452,7 +458,7 @@ class RenameDetector(object):
 
 
         add_paths = set()
         add_paths = set()
         delete_paths = set()
         delete_paths = set()
-        for sha, sha_deletes in delete_map.iteritems():
+        for sha, sha_deletes in iteritems(delete_map):
             sha_adds = add_map[sha]
             sha_adds = add_map[sha]
             for (old, is_delete), new in izip(sha_deletes, sha_adds):
             for (old, is_delete), new in izip(sha_deletes, sha_adds):
                 if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
                 if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
@@ -498,19 +504,20 @@ class RenameDetector(object):
         if not self._should_find_content_renames():
         if not self._should_find_content_renames():
             return
             return
 
 
+        block_cache = {}
         check_paths = self._rename_threshold is not None
         check_paths = self._rename_threshold is not None
         for delete in self._deletes:
         for delete in self._deletes:
             if S_ISGITLINK(delete.old.mode):
             if S_ISGITLINK(delete.old.mode):
                 continue  # Git links don't exist in this repo.
                 continue  # Git links don't exist in this repo.
             old_sha = delete.old.sha
             old_sha = delete.old.sha
             old_obj = self._store[old_sha]
             old_obj = self._store[old_sha]
-            old_blocks = _count_blocks(old_obj)
+            block_cache[old_sha] = _count_blocks(old_obj)
             for add in self._adds:
             for add in self._adds:
                 if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
                 if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
                     continue
                     continue
                 new_obj = self._store[add.new.sha]
                 new_obj = self._store[add.new.sha]
                 score = _similarity_score(old_obj, new_obj,
                 score = _similarity_score(old_obj, new_obj,
-                                          block_cache={old_sha: old_blocks})
+                                          block_cache=block_cache)
                 if score > self._rename_threshold:
                 if score > self._rename_threshold:
                     new_type = self._rename_type(check_paths, delete, add)
                     new_type = self._rename_type(check_paths, delete, add)
                     rename = TreeChange(new_type, delete.old, add.new)
                     rename = TreeChange(new_type, delete.old, add.new)

+ 32 - 1
dulwich/index.py

@@ -431,8 +431,35 @@ def build_file_from_blob(blob, mode, target_path, honor_filemode=True):
             os.chmod(target_path, mode)
             os.chmod(target_path, mode)
 
 
 
 
+INVALID_DOTNAMES = (".git", ".", "..", "")
+
+
+def validate_path_element_default(element):
+    return element.lower() not in INVALID_DOTNAMES
+
+
+def validate_path_element_ntfs(element):
+    stripped = element.rstrip(". ").lower()
+    if stripped in INVALID_DOTNAMES:
+        return False
+    if stripped == "git~1":
+        return False
+    return True
+
+
+def validate_path(path, element_validator=validate_path_element_default):
+    """Default path validator that just checks for .git/."""
+    parts = path.split("/")
+    for p in parts:
+        if not element_validator(p):
+            return False
+    else:
+        return True
+
+
 def build_index_from_tree(prefix, index_path, object_store, tree_id,
 def build_index_from_tree(prefix, index_path, object_store, tree_id,
-                          honor_filemode=True):
+                          honor_filemode=True,
+                          validate_path_element=validate_path_element_default):
     """Generate and materialize index from a tree
     """Generate and materialize index from a tree
 
 
     :param tree_id: Tree to materialize
     :param tree_id: Tree to materialize
@@ -441,6 +468,8 @@ def build_index_from_tree(prefix, index_path, object_store, tree_id,
     :param object_store: Non-empty object store holding tree contents
     :param object_store: Non-empty object store holding tree contents
     :param honor_filemode: An optional flag to honor core.filemode setting in
     :param honor_filemode: An optional flag to honor core.filemode setting in
         config file, default is core.filemode=True, change executable bit
         config file, default is core.filemode=True, change executable bit
+    :param validate_path_element: Function to validate path elements to check out;
+        default just refuses .git and .. directories.
 
 
     :note:: existing index is wiped and contents are not merged
     :note:: existing index is wiped and contents are not merged
         in a working dir. Suiteable only for fresh clones.
         in a working dir. Suiteable only for fresh clones.
@@ -449,6 +478,8 @@ def build_index_from_tree(prefix, index_path, object_store, tree_id,
     index = Index(index_path)
     index = Index(index_path)
 
 
     for entry in object_store.iter_tree_contents(tree_id):
     for entry in object_store.iter_tree_contents(tree_id):
+        if not validate_path(entry.path):
+            continue
         full_path = os.path.join(prefix, entry.path)
         full_path = os.path.join(prefix, entry.path)
 
 
         if not os.path.exists(os.path.dirname(full_path)):
         if not os.path.exists(os.path.dirname(full_path)):

+ 2 - 1
dulwich/lru_cache.py

@@ -19,6 +19,7 @@
 
 
 _null_key = object()
 _null_key = object()
 
 
+
 class _LRUNode(object):
 class _LRUNode(object):
     """This maintains the linked-list which is the lru internals."""
     """This maintains the linked-list which is the lru internals."""
 
 
@@ -181,7 +182,7 @@ class LRUCache(object):
 
 
     def items(self):
     def items(self):
         """Get the key:value pairs as a dict."""
         """Get the key:value pairs as a dict."""
-        return dict((k, n.value) for k, n in self._cache.iteritems())
+        return dict((k, n.value) for k, n in self._cache.items())
 
 
     def cleanup(self):
     def cleanup(self):
         """Clear the cache until it shrinks to the requested size.
         """Clear the cache until it shrinks to the requested size.

+ 17 - 9
dulwich/object_store.py

@@ -745,7 +745,7 @@ class MemoryObjectStore(BaseObjectStore):
         def commit():
         def commit():
             p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
             p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
             f.close()
             f.close()
-            for obj in PackInflater.for_pack_data(p):
+            for obj in PackInflater.for_pack_data(p, self.get_raw):
                 self._data[obj.id] = obj
                 self._data[obj.id] = obj
         def abort():
         def abort():
             pass
             pass
@@ -911,7 +911,7 @@ def _collect_filetree_revs(obj_store, tree_sha, kset):
 
 
 
 
 def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
 def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
-    """Split object id list into two list with commit SHA1s and tag SHA1s.
+    """Split object id list into three lists with commit, tag, and other SHAs.
 
 
     Commits referenced by tags are included into commits
     Commits referenced by tags are included into commits
     list as well. Only SHA1s known in this repository will get
     list as well. Only SHA1s known in this repository will get
@@ -922,10 +922,11 @@ def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
     :param lst: Collection of commit and tag SHAs
     :param lst: Collection of commit and tag SHAs
     :param ignore_unknown: True to skip SHA1 missing in the repository
     :param ignore_unknown: True to skip SHA1 missing in the repository
         silently.
         silently.
-    :return: A tuple of (commits, tags) SHA1s
+    :return: A tuple of (commits, tags, others) SHA1s
     """
     """
     commits = set()
     commits = set()
     tags = set()
     tags = set()
+    others = set()
     for e in lst:
     for e in lst:
         try:
         try:
             o = obj_store[e]
             o = obj_store[e]
@@ -937,10 +938,15 @@ def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
                 commits.add(e)
                 commits.add(e)
             elif isinstance(o, Tag):
             elif isinstance(o, Tag):
                 tags.add(e)
                 tags.add(e)
-                commits.add(o.object[1])
+                tagged = o.object[1]
+                c, t, o = _split_commits_and_tags(
+                    obj_store, [tagged], ignore_unknown=ignore_unknown)
+                commits |= c
+                tags |= t
+                others |= o
             else:
             else:
-                raise KeyError('Not a commit or a tag: %s' % e)
-    return (commits, tags)
+                others.add(e)
+    return (commits, tags, others)
 
 
 
 
 class MissingObjectFinder(object):
 class MissingObjectFinder(object):
@@ -966,9 +972,9 @@ class MissingObjectFinder(object):
         # and such SHAs would get filtered out by _split_commits_and_tags,
         # and such SHAs would get filtered out by _split_commits_and_tags,
         # wants shall list only known SHAs, and otherwise
         # wants shall list only known SHAs, and otherwise
         # _split_commits_and_tags fails with KeyError
         # _split_commits_and_tags fails with KeyError
-        have_commits, have_tags = (
+        have_commits, have_tags, have_others = (
             _split_commits_and_tags(object_store, haves, True))
             _split_commits_and_tags(object_store, haves, True))
-        want_commits, want_tags = (
+        want_commits, want_tags, want_others = (
             _split_commits_and_tags(object_store, wants, False))
             _split_commits_and_tags(object_store, wants, False))
         # all_ancestors is a set of commits that shall not be sent
         # all_ancestors is a set of commits that shall not be sent
         # (complete repository up to 'haves')
         # (complete repository up to 'haves')
@@ -993,9 +999,11 @@ class MissingObjectFinder(object):
             self.sha_done.add(t)
             self.sha_done.add(t)
 
 
         missing_tags = want_tags.difference(have_tags)
         missing_tags = want_tags.difference(have_tags)
-        # in fact, what we 'want' is commits and tags
+        missing_others = want_others.difference(have_others)
+        # in fact, what we 'want' is commits, tags, and others
         # we've found missing
         # we've found missing
         wants = missing_commits.union(missing_tags)
         wants = missing_commits.union(missing_tags)
+        wants = wants.union(missing_others)
 
 
         self.objects_to_send = set([(w, None, False) for w in wants])
         self.objects_to_send = set([(w, None, False) for w in wants])
 
 

+ 133 - 146
dulwich/objects.py

@@ -25,8 +25,10 @@ from collections import namedtuple
 import os
 import os
 import posixpath
 import posixpath
 import stat
 import stat
+import sys
 import warnings
 import warnings
 import zlib
 import zlib
+from hashlib import sha1
 
 
 from dulwich.errors import (
 from dulwich.errors import (
     ChecksumMismatch,
     ChecksumMismatch,
@@ -37,24 +39,29 @@ from dulwich.errors import (
     ObjectFormatException,
     ObjectFormatException,
     )
     )
 from dulwich.file import GitFile
 from dulwich.file import GitFile
-from hashlib import sha1
 
 
-ZERO_SHA = "0" * 40
+if sys.version_info[0] == 2:
+    iteritems = lambda d: d.iteritems()
+else:
+    iteritems = lambda d: d.items()
+
+
+ZERO_SHA = b'0' * 40
 
 
 # Header fields for commits
 # Header fields for commits
-_TREE_HEADER = "tree"
-_PARENT_HEADER = "parent"
-_AUTHOR_HEADER = "author"
-_COMMITTER_HEADER = "committer"
-_ENCODING_HEADER = "encoding"
-_MERGETAG_HEADER = "mergetag"
-_GPGSIG_HEADER = "gpgsig"
+_TREE_HEADER = b'tree'
+_PARENT_HEADER = b'parent'
+_AUTHOR_HEADER = b'author'
+_COMMITTER_HEADER = b'committer'
+_ENCODING_HEADER = b'encoding'
+_MERGETAG_HEADER = b'mergetag'
+_GPGSIG_HEADER = b'gpgsig'
 
 
 # Header fields for objects
 # Header fields for objects
-_OBJECT_HEADER = "object"
-_TYPE_HEADER = "type"
-_TAG_HEADER = "tag"
-_TAGGER_HEADER = "tagger"
+_OBJECT_HEADER = b'object'
+_TYPE_HEADER = b'type'
+_TAG_HEADER = b'tag'
+_TAGGER_HEADER = b'tagger'
 
 
 
 
 S_IFGITLINK = 0o160000
 S_IFGITLINK = 0o160000
@@ -89,13 +96,18 @@ def hex_to_sha(hex):
     try:
     try:
         return binascii.unhexlify(hex)
         return binascii.unhexlify(hex)
     except TypeError as exc:
     except TypeError as exc:
-        if not isinstance(hex, str):
+        if not isinstance(hex, bytes):
             raise
             raise
         raise ValueError(exc.args[0])
         raise ValueError(exc.args[0])
 
 
 
 
 def hex_to_filename(path, hex):
 def hex_to_filename(path, hex):
     """Takes a hex sha and returns its filename relative to the given path."""
     """Takes a hex sha and returns its filename relative to the given path."""
+    # os.path.join accepts bytes or unicode, but all args must be of the same
+    # type. Make sure that hex which is expected to be bytes, is the same type
+    # as path.
+    if getattr(path, 'encode', None) is not None:
+        hex = hex.decode('ascii')
     dir = hex[:2]
     dir = hex[:2]
     file = hex[2:]
     file = hex[2:]
     # Check from object dir
     # Check from object dir
@@ -110,14 +122,14 @@ def filename_to_hex(filename):
     assert len(names) == 2, errmsg
     assert len(names) == 2, errmsg
     base, rest = names
     base, rest = names
     assert len(base) == 2 and len(rest) == 38, errmsg
     assert len(base) == 2 and len(rest) == 38, errmsg
-    hex = base + rest
+    hex = (base + rest).encode('ascii')
     hex_to_sha(hex)
     hex_to_sha(hex)
     return hex
     return hex
 
 
 
 
 def object_header(num_type, length):
 def object_header(num_type, length):
     """Return an object header for the given numeric type and text length."""
     """Return an object header for the given numeric type and text length."""
-    return "%s %d\0" % (object_class(num_type).type_name, length)
+    return object_class(num_type).type_name + b' ' + str(length).encode('ascii') + b'\0'
 
 
 
 
 def serializable_property(name, docstring=None):
 def serializable_property(name, docstring=None):
@@ -164,21 +176,30 @@ def check_identity(identity, error_msg):
     :param identity: Identity string
     :param identity: Identity string
     :param error_msg: Error message to use in exception
     :param error_msg: Error message to use in exception
     """
     """
-    email_start = identity.find("<")
-    email_end = identity.find(">")
+    email_start = identity.find(b'<')
+    email_end = identity.find(b'>')
     if (email_start < 0 or email_end < 0 or email_end <= email_start
     if (email_start < 0 or email_end < 0 or email_end <= email_start
-        or identity.find("<", email_start + 1) >= 0
-        or identity.find(">", email_end + 1) >= 0
-        or not identity.endswith(">")):
+        or identity.find(b'<', email_start + 1) >= 0
+        or identity.find(b'>', email_end + 1) >= 0
+        or not identity.endswith(b'>')):
         raise ObjectFormatException(error_msg)
         raise ObjectFormatException(error_msg)
 
 
 
 
+def git_line(*items):
+    """Formats items into a space sepreated line."""
+    return b' '.join(items) + b'\n'
+
+
 class FixedSha(object):
 class FixedSha(object):
     """SHA object that behaves like hashlib's but is given a fixed value."""
     """SHA object that behaves like hashlib's but is given a fixed value."""
 
 
     __slots__ = ('_hexsha', '_sha')
     __slots__ = ('_hexsha', '_sha')
 
 
     def __init__(self, hexsha):
     def __init__(self, hexsha):
+        if getattr(hexsha, 'encode', None) is not None:
+            hexsha = hexsha.encode('ascii')
+        if not isinstance(hexsha, bytes):
+            raise TypeError('Expected bytes for hexsha, got %r' % hexsha)
         self._hexsha = hexsha
         self._hexsha = hexsha
         self._sha = hex_to_sha(hexsha)
         self._sha = hex_to_sha(hexsha)
 
 
@@ -188,14 +209,14 @@ class FixedSha(object):
 
 
     def hexdigest(self):
     def hexdigest(self):
         """Return the hex SHA digest."""
         """Return the hex SHA digest."""
-        return self._hexsha
+        return self._hexsha.decode('ascii')
 
 
 
 
 class ShaFile(object):
 class ShaFile(object):
     """A git SHA file."""
     """A git SHA file."""
 
 
-    __slots__ = ('_needs_parsing', '_chunked_text', '_file', '_path',
-                 '_sha', '_needs_serialization', '_magic')
+    __slots__ = ('_needs_parsing', '_chunked_text', '_sha',
+                 '_needs_serialization')
 
 
     @staticmethod
     @staticmethod
     def _parse_legacy_object_header(magic, f):
     def _parse_legacy_object_header(magic, f):
@@ -209,22 +230,20 @@ class ShaFile(object):
             extra = f.read(bufsize)
             extra = f.read(bufsize)
             header += decomp.decompress(extra)
             header += decomp.decompress(extra)
             magic += extra
             magic += extra
-            end = header.find("\0", start)
+            end = header.find(b'\0', start)
             start = len(header)
             start = len(header)
         header = header[:end]
         header = header[:end]
-        type_name, size = header.split(" ", 1)
+        type_name, size = header.split(b' ', 1)
         size = int(size)  # sanity check
         size = int(size)  # sanity check
         obj_class = object_class(type_name)
         obj_class = object_class(type_name)
         if not obj_class:
         if not obj_class:
             raise ObjectFormatException("Not a known type: %s" % type_name)
             raise ObjectFormatException("Not a known type: %s" % type_name)
-        ret = obj_class()
-        ret._magic = magic
-        return ret
+        return obj_class()
 
 
     def _parse_legacy_object(self, map):
     def _parse_legacy_object(self, map):
         """Parse a legacy object, setting the raw string."""
         """Parse a legacy object, setting the raw string."""
         text = _decompress(map)
         text = _decompress(map)
-        header_end = text.find('\0')
+        header_end = text.find(b'\0')
         if header_end < 0:
         if header_end < 0:
             raise ObjectFormatException("Invalid object header, no \\0")
             raise ObjectFormatException("Invalid object header, no \\0")
         self.set_raw_string(text[header_end+1:])
         self.set_raw_string(text[header_end+1:])
@@ -243,7 +262,7 @@ class ShaFile(object):
     def as_legacy_object(self):
     def as_legacy_object(self):
         """Return string representing the object in the experimental format.
         """Return string representing the object in the experimental format.
         """
         """
-        return "".join(self.as_legacy_object_chunks())
+        return b''.join(self.as_legacy_object_chunks())
 
 
     def as_raw_chunks(self):
     def as_raw_chunks(self):
         """Return chunks with serialization of the object.
         """Return chunks with serialization of the object.
@@ -261,7 +280,7 @@ class ShaFile(object):
 
 
         :return: String object
         :return: String object
         """
         """
-        return "".join(self.as_raw_chunks())
+        return b''.join(self.as_raw_chunks())
 
 
     def __str__(self):
     def __str__(self):
         """Return raw string serialization of this object."""
         """Return raw string serialization of this object."""
@@ -278,21 +297,14 @@ class ShaFile(object):
     def _ensure_parsed(self):
     def _ensure_parsed(self):
         if self._needs_parsing:
         if self._needs_parsing:
             if not self._chunked_text:
             if not self._chunked_text:
-                if self._file is not None:
-                    self._parse_file(self._file)
-                    self._file = None
-                elif self._path is not None:
-                    self._parse_path()
-                else:
-                    raise AssertionError(
-                        "ShaFile needs either text or filename")
+                raise AssertionError("ShaFile needs chunked text")
             self._deserialize(self._chunked_text)
             self._deserialize(self._chunked_text)
             self._needs_parsing = False
             self._needs_parsing = False
 
 
     def set_raw_string(self, text, sha=None):
     def set_raw_string(self, text, sha=None):
         """Set the contents of this object from a serialized string."""
         """Set the contents of this object from a serialized string."""
-        if not isinstance(text, str):
-            raise TypeError(text)
+        if not isinstance(text, bytes):
+            raise TypeError('Expected bytes for text, got %r' % text)
         self.set_raw_chunks([text], sha)
         self.set_raw_chunks([text], sha)
 
 
     def set_raw_chunks(self, chunks, sha=None):
     def set_raw_chunks(self, chunks, sha=None):
@@ -309,46 +321,45 @@ class ShaFile(object):
     @staticmethod
     @staticmethod
     def _parse_object_header(magic, f):
     def _parse_object_header(magic, f):
         """Parse a new style object, creating it but not reading the file."""
         """Parse a new style object, creating it but not reading the file."""
-        num_type = (ord(magic[0]) >> 4) & 7
+        num_type = (ord(magic[0:1]) >> 4) & 7
         obj_class = object_class(num_type)
         obj_class = object_class(num_type)
         if not obj_class:
         if not obj_class:
             raise ObjectFormatException("Not a known type %d" % num_type)
             raise ObjectFormatException("Not a known type %d" % num_type)
-        ret = obj_class()
-        ret._magic = magic
-        return ret
+        return obj_class()
 
 
     def _parse_object(self, map):
     def _parse_object(self, map):
         """Parse a new style object, setting self._text."""
         """Parse a new style object, setting self._text."""
         # skip type and size; type must have already been determined, and
         # skip type and size; type must have already been determined, and
         # we trust zlib to fail if it's otherwise corrupted
         # we trust zlib to fail if it's otherwise corrupted
-        byte = ord(map[0])
+        byte = ord(map[0:1])
         used = 1
         used = 1
         while (byte & 0x80) != 0:
         while (byte & 0x80) != 0:
-            byte = ord(map[used])
+            byte = ord(map[used:used+1])
             used += 1
             used += 1
         raw = map[used:]
         raw = map[used:]
         self.set_raw_string(_decompress(raw))
         self.set_raw_string(_decompress(raw))
 
 
     @classmethod
     @classmethod
     def _is_legacy_object(cls, magic):
     def _is_legacy_object(cls, magic):
-        b0, b1 = map(ord, magic)
+        b0 = ord(magic[0:1])
+        b1 = ord(magic[1:2])
         word = (b0 << 8) + b1
         word = (b0 << 8) + b1
         return (b0 & 0x8F) == 0x08 and (word % 31) == 0
         return (b0 & 0x8F) == 0x08 and (word % 31) == 0
 
 
     @classmethod
     @classmethod
-    def _parse_file_header(cls, f):
-        magic = f.read(2)
-        if cls._is_legacy_object(magic):
-            return cls._parse_legacy_object_header(magic, f)
+    def _parse_file(cls, f):
+        map = f.read()
+        if cls._is_legacy_object(map):
+            obj = cls._parse_legacy_object_header(map, f)
+            obj._parse_legacy_object(map)
         else:
         else:
-            return cls._parse_object_header(magic, f)
+            obj = cls._parse_object_header(map, f)
+            obj._parse_object(map)
+        return obj
 
 
     def __init__(self):
     def __init__(self):
         """Don't call this directly"""
         """Don't call this directly"""
         self._sha = None
         self._sha = None
-        self._path = None
-        self._file = None
-        self._magic = None
         self._chunked_text = []
         self._chunked_text = []
         self._needs_parsing = False
         self._needs_parsing = False
         self._needs_serialization = True
         self._needs_serialization = True
@@ -359,40 +370,18 @@ class ShaFile(object):
     def _serialize(self):
     def _serialize(self):
         raise NotImplementedError(self._serialize)
         raise NotImplementedError(self._serialize)
 
 
-    def _parse_path(self):
-        with GitFile(self._path, 'rb') as f:
-            self._parse_file(f)
-
-    def _parse_file(self, f):
-        magic = self._magic
-        if magic is None:
-            magic = f.read(2)
-        map = magic + f.read()
-        if self._is_legacy_object(magic[:2]):
-            self._parse_legacy_object(map)
-        else:
-            self._parse_object(map)
-
     @classmethod
     @classmethod
     def from_path(cls, path):
     def from_path(cls, path):
         """Open a SHA file from disk."""
         """Open a SHA file from disk."""
         with GitFile(path, 'rb') as f:
         with GitFile(path, 'rb') as f:
-            obj = cls.from_file(f)
-            obj._path = path
-            obj._sha = FixedSha(filename_to_hex(path))
-            obj._file = None
-            obj._magic = None
-            return obj
+            return cls.from_file(f)
 
 
     @classmethod
     @classmethod
     def from_file(cls, f):
     def from_file(cls, f):
         """Get the contents of a SHA file on disk."""
         """Get the contents of a SHA file on disk."""
         try:
         try:
-            obj = cls._parse_file_header(f)
+            obj = cls._parse_file(f)
             obj._sha = None
             obj._sha = None
-            obj._needs_parsing = True
-            obj._needs_serialization = True
-            obj._file = f
             return obj
             return obj
         except (IndexError, ValueError):
         except (IndexError, ValueError):
             raise ObjectFormatException("invalid object header")
             raise ObjectFormatException("invalid object header")
@@ -499,7 +488,7 @@ class ShaFile(object):
     @property
     @property
     def id(self):
     def id(self):
         """The hex SHA of this object."""
         """The hex SHA of this object."""
-        return self.sha().hexdigest()
+        return self.sha().hexdigest().encode('ascii')
 
 
     def get_type(self):
     def get_type(self):
         """Return the type number for this object class."""
         """Return the type number for this object class."""
@@ -532,7 +521,7 @@ class Blob(ShaFile):
 
 
     __slots__ = ()
     __slots__ = ()
 
 
-    type_name = 'blob'
+    type_name = b'blob'
     type_num = 3
     type_num = 3
 
 
     def __init__(self):
     def __init__(self):
@@ -592,19 +581,19 @@ def _parse_message(chunks):
         order read from the text, possibly including duplicates. Includes a
         order read from the text, possibly including duplicates. Includes a
         field named None for the freeform tag/commit text.
         field named None for the freeform tag/commit text.
     """
     """
-    f = BytesIO("".join(chunks))
+    f = BytesIO(b''.join(chunks))
     k = None
     k = None
     v = ""
     v = ""
     for l in f:
     for l in f:
-        if l.startswith(" "):
+        if l.startswith(b' '):
             v += l[1:]
             v += l[1:]
         else:
         else:
             if k is not None:
             if k is not None:
-                yield (k, v.rstrip("\n"))
-            if l == "\n":
+                yield (k, v.rstrip(b'\n'))
+            if l == b'\n':
                 # Empty line indicates end of headers
                 # Empty line indicates end of headers
                 break
                 break
-            (k, v) = l.split(" ", 1)
+            (k, v) = l.split(b' ', 1)
     yield (None, f.read())
     yield (None, f.read())
     f.close()
     f.close()
 
 
@@ -612,7 +601,7 @@ def _parse_message(chunks):
 class Tag(ShaFile):
 class Tag(ShaFile):
     """A Git Tag object."""
     """A Git Tag object."""
 
 
-    type_name = 'tag'
+    type_name = b'tag'
     type_num = 4
     type_num = 4
 
 
     __slots__ = ('_tag_timezone_neg_utc', '_name', '_object_sha',
     __slots__ = ('_tag_timezone_neg_utc', '_name', '_object_sha',
@@ -662,18 +651,17 @@ class Tag(ShaFile):
 
 
     def _serialize(self):
     def _serialize(self):
         chunks = []
         chunks = []
-        chunks.append("%s %s\n" % (_OBJECT_HEADER, self._object_sha))
-        chunks.append("%s %s\n" % (_TYPE_HEADER, self._object_class.type_name))
-        chunks.append("%s %s\n" % (_TAG_HEADER, self._name))
+        chunks.append(git_line(_OBJECT_HEADER, self._object_sha))
+        chunks.append(git_line(_TYPE_HEADER, self._object_class.type_name))
+        chunks.append(git_line(_TAG_HEADER, self._name))
         if self._tagger:
         if self._tagger:
             if self._tag_time is None:
             if self._tag_time is None:
-                chunks.append("%s %s\n" % (_TAGGER_HEADER, self._tagger))
+                chunks.append(git_line(_TAGGER_HEADER, self._tagger))
             else:
             else:
-                chunks.append("%s %s %d %s\n" % (
-                  _TAGGER_HEADER, self._tagger, self._tag_time,
-                  format_timezone(self._tag_timezone,
-                    self._tag_timezone_neg_utc)))
-        chunks.append("\n") # To close headers
+                chunks.append(git_line(
+                    _TAGGER_HEADER, self._tagger, str(self._tag_time).encode('ascii'),
+                    format_timezone(self._tag_timezone, self._tag_timezone_neg_utc)))
+        chunks.append(b'\n') # To close headers
         chunks.append(self._message)
         chunks.append(self._message)
         return chunks
         return chunks
 
 
@@ -692,7 +680,7 @@ class Tag(ShaFile):
                 self._name = value
                 self._name = value
             elif field == _TAGGER_HEADER:
             elif field == _TAGGER_HEADER:
                 try:
                 try:
-                    sep = value.index("> ")
+                    sep = value.index(b'> ')
                 except ValueError:
                 except ValueError:
                     self._tagger = value
                     self._tagger = value
                     self._tag_time = None
                     self._tag_time = None
@@ -701,7 +689,7 @@ class Tag(ShaFile):
                 else:
                 else:
                     self._tagger = value[0:sep+1]
                     self._tagger = value[0:sep+1]
                     try:
                     try:
-                        (timetext, timezonetext) = value[sep+2:].rsplit(" ", 1)
+                        (timetext, timezonetext) = value[sep+2:].rsplit(b' ', 1)
                         self._tag_time = int(timetext)
                         self._tag_time = int(timetext)
                         self._tag_timezone, self._tag_timezone_neg_utc = \
                         self._tag_timezone, self._tag_timezone_neg_utc = \
                                 parse_timezone(timezonetext)
                                 parse_timezone(timezonetext)
@@ -744,8 +732,8 @@ class TreeEntry(namedtuple('TreeEntry', ['path', 'mode', 'sha'])):
 
 
     def in_path(self, path):
     def in_path(self, path):
         """Return a copy of this entry with the given path prepended."""
         """Return a copy of this entry with the given path prepended."""
-        if not isinstance(self.path, str):
-            raise TypeError
+        if not isinstance(self.path, bytes):
+            raise TypeError('Expected bytes for path, got %r' % path)
         return TreeEntry(posixpath.join(path, self.path), self.mode, self.sha)
         return TreeEntry(posixpath.join(path, self.path), self.mode, self.sha)
 
 
 
 
@@ -759,15 +747,15 @@ def parse_tree(text, strict=False):
     count = 0
     count = 0
     l = len(text)
     l = len(text)
     while count < l:
     while count < l:
-        mode_end = text.index(' ', count)
+        mode_end = text.index(b' ', count)
         mode_text = text[count:mode_end]
         mode_text = text[count:mode_end]
-        if strict and mode_text.startswith('0'):
+        if strict and mode_text.startswith(b'0'):
             raise ObjectFormatException("Invalid mode '%s'" % mode_text)
             raise ObjectFormatException("Invalid mode '%s'" % mode_text)
         try:
         try:
             mode = int(mode_text, 8)
             mode = int(mode_text, 8)
         except ValueError:
         except ValueError:
             raise ObjectFormatException("Invalid mode '%s'" % mode_text)
             raise ObjectFormatException("Invalid mode '%s'" % mode_text)
-        name_end = text.index('\0', mode_end)
+        name_end = text.index(b'\0', mode_end)
         name = text[mode_end+1:name_end]
         name = text[mode_end+1:name_end]
         count = name_end+21
         count = name_end+21
         sha = text[name_end+1:count]
         sha = text[name_end+1:count]
@@ -784,7 +772,7 @@ def serialize_tree(items):
     :return: Serialized tree text as chunks
     :return: Serialized tree text as chunks
     """
     """
     for name, mode, hexsha in items:
     for name, mode, hexsha in items:
-        yield "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
+        yield ("%04o" % mode).encode('ascii') + b' ' + name + b'\0' + hex_to_sha(hexsha)
 
 
 
 
 def sorted_tree_items(entries, name_order):
 def sorted_tree_items(entries, name_order):
@@ -797,14 +785,12 @@ def sorted_tree_items(entries, name_order):
     :return: Iterator over (name, mode, hexsha)
     :return: Iterator over (name, mode, hexsha)
     """
     """
     key_func = name_order and key_entry_name_order or key_entry
     key_func = name_order and key_entry_name_order or key_entry
-    for name, entry in sorted(entries.iteritems(), key=key_func):
+    for name, entry in sorted(iteritems(entries), key=key_func):
         mode, hexsha = entry
         mode, hexsha = entry
         # Stricter type checks than normal to mirror checks in the C version.
         # Stricter type checks than normal to mirror checks in the C version.
-        if not isinstance(mode, int) and not isinstance(mode, long):
-            raise TypeError('Expected integer/long for mode, got %r' % mode)
         mode = int(mode)
         mode = int(mode)
-        if not isinstance(hexsha, str):
-            raise TypeError('Expected a string for SHA, got %r' % hexsha)
+        if not isinstance(hexsha, bytes):
+            raise TypeError('Expected bytes for SHA, got %r' % hexsha)
         yield TreeEntry(name, mode, hexsha)
         yield TreeEntry(name, mode, hexsha)
 
 
 
 
@@ -815,7 +801,7 @@ def key_entry(entry):
     """
     """
     (name, value) = entry
     (name, value) = entry
     if stat.S_ISDIR(value[0]):
     if stat.S_ISDIR(value[0]):
-        name += "/"
+        name += b'/'
     return name
     return name
 
 
 
 
@@ -827,7 +813,7 @@ def key_entry_name_order(entry):
 class Tree(ShaFile):
 class Tree(ShaFile):
     """A Git tree object"""
     """A Git tree object"""
 
 
-    type_name = 'tree'
+    type_name = b'tree'
     type_num = 2
     type_num = 2
 
 
     __slots__ = ('_entries')
     __slots__ = ('_entries')
@@ -885,7 +871,7 @@ class Tree(ShaFile):
         :param name: The name of the entry, as a string.
         :param name: The name of the entry, as a string.
         :param hexsha: The hex SHA of the entry as a string.
         :param hexsha: The hex SHA of the entry as a string.
         """
         """
-        if isinstance(name, int) and isinstance(mode, str):
+        if isinstance(name, int) and isinstance(mode, bytes):
             (name, mode) = (mode, name)
             (name, mode) = (mode, name)
             warnings.warn(
             warnings.warn(
                 "Please use Tree.add(name, mode, hexsha)",
                 "Please use Tree.add(name, mode, hexsha)",
@@ -914,7 +900,7 @@ class Tree(ShaFile):
     def _deserialize(self, chunks):
     def _deserialize(self, chunks):
         """Grab the entries in the tree"""
         """Grab the entries in the tree"""
         try:
         try:
-            parsed_entries = parse_tree("".join(chunks))
+            parsed_entries = parse_tree(b''.join(chunks))
         except ValueError as e:
         except ValueError as e:
             raise ObjectFormatException(e)
             raise ObjectFormatException(e)
         # TODO: list comprehension is for efficiency in the common (small)
         # TODO: list comprehension is for efficiency in the common (small)
@@ -932,10 +918,10 @@ class Tree(ShaFile):
                          stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
                          stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
                          # TODO: optionally exclude as in git fsck --strict
                          # TODO: optionally exclude as in git fsck --strict
                          stat.S_IFREG | 0o664)
                          stat.S_IFREG | 0o664)
-        for name, mode, sha in parse_tree(''.join(self._chunked_text),
+        for name, mode, sha in parse_tree(b''.join(self._chunked_text),
                                           True):
                                           True):
             check_hexsha(sha, 'invalid sha %s' % sha)
             check_hexsha(sha, 'invalid sha %s' % sha)
-            if '/' in name or name in ('', '.', '..'):
+            if b'/' in name or name in (b'', b'.', b'..'):
                 raise ObjectFormatException('invalid name %s' % name)
                 raise ObjectFormatException('invalid name %s' % name)
 
 
             if mode not in allowed_modes:
             if mode not in allowed_modes:
@@ -969,7 +955,7 @@ class Tree(ShaFile):
         :param path: Path to lookup
         :param path: Path to lookup
         :return: A tuple of (mode, SHA) of the resulting path.
         :return: A tuple of (mode, SHA) of the resulting path.
         """
         """
-        parts = path.split('/')
+        parts = path.split(b'/')
         sha = self.id
         sha = self.id
         mode = None
         mode = None
         for p in parts:
         for p in parts:
@@ -993,13 +979,13 @@ def parse_timezone(text):
     # cgit parses the first character as the sign, and the rest
     # cgit parses the first character as the sign, and the rest
     #  as an integer (using strtol), which could also be negative.
     #  as an integer (using strtol), which could also be negative.
     #  We do the same for compatibility. See #697828.
     #  We do the same for compatibility. See #697828.
-    if not text[0] in '+-':
+    if not text[0] in b'+-':
         raise ValueError("Timezone must start with + or - (%(text)s)" % vars())
         raise ValueError("Timezone must start with + or - (%(text)s)" % vars())
-    sign = text[0]
+    sign = text[:1]
     offset = int(text[1:])
     offset = int(text[1:])
-    if sign == '-':
+    if sign == b'-':
         offset = -offset
         offset = -offset
-    unnecessary_negative_timezone = (offset >= 0 and sign == '-')
+    unnecessary_negative_timezone = (offset >= 0 and sign == b'-')
     signum = (offset < 0) and -1 or 1
     signum = (offset < 0) and -1 or 1
     offset = abs(offset)
     offset = abs(offset)
     hours = int(offset / 100)
     hours = int(offset / 100)
@@ -1022,7 +1008,7 @@ def format_timezone(offset, unnecessary_negative_timezone=False):
         offset = -offset
         offset = -offset
     else:
     else:
         sign = '+'
         sign = '+'
-    return '%c%02d%02d' % (sign, offset / 3600, (offset / 60) % 60)
+    return ('%c%02d%02d' % (sign, offset / 3600, (offset / 60) % 60)).encode('ascii')
 
 
 
 
 def parse_commit(chunks):
 def parse_commit(chunks):
@@ -1049,17 +1035,17 @@ def parse_commit(chunks):
         elif field == _PARENT_HEADER:
         elif field == _PARENT_HEADER:
             parents.append(value)
             parents.append(value)
         elif field == _AUTHOR_HEADER:
         elif field == _AUTHOR_HEADER:
-            author, timetext, timezonetext = value.rsplit(" ", 2)
+            author, timetext, timezonetext = value.rsplit(b' ', 2)
             author_time = int(timetext)
             author_time = int(timetext)
             author_info = (author, author_time, parse_timezone(timezonetext))
             author_info = (author, author_time, parse_timezone(timezonetext))
         elif field == _COMMITTER_HEADER:
         elif field == _COMMITTER_HEADER:
-            committer, timetext, timezonetext = value.rsplit(" ", 2)
+            committer, timetext, timezonetext = value.rsplit(b' ', 2)
             commit_time = int(timetext)
             commit_time = int(timetext)
             commit_info = (committer, commit_time, parse_timezone(timezonetext))
             commit_info = (committer, commit_time, parse_timezone(timezonetext))
         elif field == _ENCODING_HEADER:
         elif field == _ENCODING_HEADER:
             encoding = value
             encoding = value
         elif field == _MERGETAG_HEADER:
         elif field == _MERGETAG_HEADER:
-            mergetag.append(Tag.from_string(value + "\n"))
+            mergetag.append(Tag.from_string(value + b'\n'))
         elif field == _GPGSIG_HEADER:
         elif field == _GPGSIG_HEADER:
             gpgsig = value
             gpgsig = value
         elif field is None:
         elif field is None:
@@ -1073,7 +1059,7 @@ def parse_commit(chunks):
 class Commit(ShaFile):
 class Commit(ShaFile):
     """A git commit object"""
     """A git commit object"""
 
 
-    type_name = 'commit'
+    type_name = b'commit'
     type_num = 1
     type_num = 1
 
 
     __slots__ = ('_parents', '_encoding', '_extra', '_author_timezone_neg_utc',
     __slots__ = ('_parents', '_encoding', '_extra', '_author_timezone_neg_utc',
@@ -1146,40 +1132,41 @@ class Commit(ShaFile):
 
 
     def _serialize(self):
     def _serialize(self):
         chunks = []
         chunks = []
-        chunks.append("%s %s\n" % (_TREE_HEADER, self._tree))
+        tree_bytes = self._tree.as_raw_string() if isinstance(self._tree, Tree) else self._tree
+        chunks.append(git_line(_TREE_HEADER, tree_bytes))
         for p in self._parents:
         for p in self._parents:
-            chunks.append("%s %s\n" % (_PARENT_HEADER, p))
-        chunks.append("%s %s %s %s\n" % (
-            _AUTHOR_HEADER, self._author, str(self._author_time),
+            chunks.append(git_line(_PARENT_HEADER, p))
+        chunks.append(git_line(
+            _AUTHOR_HEADER, self._author, str(self._author_time).encode('ascii'),
             format_timezone(self._author_timezone,
             format_timezone(self._author_timezone,
-                          self._author_timezone_neg_utc)))
-        chunks.append("%s %s %s %s\n" % (
-            _COMMITTER_HEADER, self._committer, str(self._commit_time),
+                            self._author_timezone_neg_utc)))
+        chunks.append(git_line(
+            _COMMITTER_HEADER, self._committer, str(self._commit_time).encode('ascii'),
             format_timezone(self._commit_timezone,
             format_timezone(self._commit_timezone,
-                          self._commit_timezone_neg_utc)))
+                            self._commit_timezone_neg_utc)))
         if self.encoding:
         if self.encoding:
-            chunks.append("%s %s\n" % (_ENCODING_HEADER, self.encoding))
+            chunks.append(git_line(_ENCODING_HEADER, self.encoding))
         for mergetag in self.mergetag:
         for mergetag in self.mergetag:
-            mergetag_chunks = mergetag.as_raw_string().split("\n")
+            mergetag_chunks = mergetag.as_raw_string().split(b'\n')
 
 
-            chunks.append("%s %s\n" % (_MERGETAG_HEADER, mergetag_chunks[0]))
+            chunks.append(git_line(_MERGETAG_HEADER, mergetag_chunks[0]))
             # Embedded extra header needs leading space
             # Embedded extra header needs leading space
             for chunk in mergetag_chunks[1:]:
             for chunk in mergetag_chunks[1:]:
-                chunks.append(" %s\n" % chunk)
+                chunks.append(b' ' + chunk + b'\n')
 
 
             # No trailing empty line
             # No trailing empty line
-            chunks[-1] = chunks[-1].rstrip(" \n")
+            chunks[-1] = chunks[-1].rstrip(b' \n')
         for k, v in self.extra:
         for k, v in self.extra:
-            if "\n" in k or "\n" in v:
+            if b'\n' in k or b'\n' in v:
                 raise AssertionError(
                 raise AssertionError(
                     "newline in extra data: %r -> %r" % (k, v))
                     "newline in extra data: %r -> %r" % (k, v))
-            chunks.append("%s %s\n" % (k, v))
+            chunks.append(git_line(k, v))
         if self.gpgsig:
         if self.gpgsig:
-            sig_chunks = self.gpgsig.split("\n")
-            chunks.append("%s %s\n" % (_GPGSIG_HEADER, sig_chunks[0]))
+            sig_chunks = self.gpgsig.split(b'\n')
+            chunks.append(git_line(_GPGSIG_HEADER, sig_chunks[0]))
             for chunk in sig_chunks[1:]:
             for chunk in sig_chunks[1:]:
-                chunks.append(" %s\n" % chunk)
-        chunks.append("\n")  # There must be a new line after the headers
+                chunks.append(git_line(b'',  chunk))
+        chunks.append(b'\n')  # There must be a new line after the headers
         chunks.append(self._message)
         chunks.append(self._message)
         return chunks
         return chunks
 
 

+ 11 - 1
dulwich/pack.py

@@ -47,14 +47,20 @@ except ImportError:
     imap = map
     imap = map
     izip = zip
     izip = zip
 
 
+import os
+
 try:
 try:
     import mmap
     import mmap
 except ImportError:
 except ImportError:
     has_mmap = False
     has_mmap = False
 else:
 else:
     has_mmap = True
     has_mmap = True
+
+# For some reason the above try, except fails to set has_mmap = False
+if os.uname()[0] == 'Plan9':
+    has_mmap = False
+
 from hashlib import sha1
 from hashlib import sha1
-import os
 from os import (
 from os import (
     SEEK_CUR,
     SEEK_CUR,
     SEEK_END,
     SEEK_END,
@@ -1094,6 +1100,10 @@ class PackData(object):
         # TODO(dborowitz): Merge this with iterobjects, if we can change its
         # TODO(dborowitz): Merge this with iterobjects, if we can change its
         # return type.
         # return type.
         self._file.seek(self._header_size)
         self._file.seek(self._header_size)
+
+        if self._num_objects is None:
+            return
+
         for _ in xrange(self._num_objects):
         for _ in xrange(self._num_objects):
             offset = self._file.tell()
             offset = self._file.tell()
             unpacked, unused = unpack_object(
             unpacked, unused = unpack_object(

+ 3 - 6
dulwich/porcelain.py

@@ -204,8 +204,7 @@ def clone(source, target=None, bare=False, checkout=None, outstream=sys.stdout):
     r["HEAD"] = remote_refs["HEAD"]
     r["HEAD"] = remote_refs["HEAD"]
     if checkout:
     if checkout:
         outstream.write('Checking out HEAD')
         outstream.write('Checking out HEAD')
-        index.build_index_from_tree(r.path, r.index_path(),
-                                    r.object_store, r["HEAD"].tree)
+        r.reset_index()
 
 
     return r
     return r
 
 
@@ -476,9 +475,8 @@ def reset(repo, mode, committish="HEAD"):
 
 
     r = open_repo(repo)
     r = open_repo(repo)
 
 
-    indexfile = r.index_path()
     tree = r[committish].tree
     tree = r[committish].tree
-    index.build_index_from_tree(r.path, indexfile, r.object_store, tree)
+    r.reset_index()
 
 
 
 
 def push(repo, remote_location, refs_path,
 def push(repo, remote_location, refs_path,
@@ -532,9 +530,8 @@ def pull(repo, remote_location, refs_path,
     r['HEAD'] = remote_refs[refs_path]
     r['HEAD'] = remote_refs[refs_path]
 
 
     # Perform 'git checkout .' - syncs staged changes
     # Perform 'git checkout .' - syncs staged changes
-    indexfile = r.index_path()
     tree = r["HEAD"].tree
     tree = r["HEAD"].tree
-    index.build_index_from_tree(r.path, indexfile, r.object_store, tree)
+    r.reset_index()
 
 
 
 
 def status(repo="."):
 def status(repo="."):

+ 61 - 57
dulwich/refs.py

@@ -30,6 +30,7 @@ from dulwich.errors import (
     )
     )
 from dulwich.objects import (
 from dulwich.objects import (
     hex_to_sha,
     hex_to_sha,
+    git_line,
     )
     )
 from dulwich.file import (
 from dulwich.file import (
     GitFile,
     GitFile,
@@ -37,8 +38,9 @@ from dulwich.file import (
     )
     )
 
 
 
 
-SYMREF = 'ref: '
-LOCAL_BRANCH_PREFIX = 'refs/heads/'
+SYMREF = b'ref: '
+LOCAL_BRANCH_PREFIX = b'refs/heads/'
+BAD_REF_CHARS = set(b'\177 ~^:?*[')
 
 
 
 
 def check_ref_format(refname):
 def check_ref_format(refname):
@@ -53,22 +55,22 @@ def check_ref_format(refname):
     """
     """
     # These could be combined into one big expression, but are listed separately
     # These could be combined into one big expression, but are listed separately
     # to parallel [1].
     # to parallel [1].
-    if '/.' in refname or refname.startswith('.'):
+    if b'/.' in refname or refname.startswith(b'.'):
         return False
         return False
-    if '/' not in refname:
+    if b'/' not in refname:
         return False
         return False
-    if '..' in refname:
+    if b'..' in refname:
         return False
         return False
-    for c in refname:
-        if ord(c) < 0o40 or c in '\177 ~^:?*[':
+    for i, c in enumerate(refname):
+        if ord(refname[i:i+1]) < 0o40 or c in BAD_REF_CHARS:
             return False
             return False
-    if refname[-1] in '/.':
+    if refname[-1] in b'/.':
         return False
         return False
-    if refname.endswith('.lock'):
+    if refname.endswith(b'.lock'):
         return False
         return False
-    if '@{' in refname:
+    if b'@{' in refname:
         return False
         return False
-    if '\\' in refname:
+    if b'\\' in refname:
         return False
         return False
     return True
     return True
 
 
@@ -105,8 +107,8 @@ class RefsContainer(object):
         return None
         return None
 
 
     def import_refs(self, base, other):
     def import_refs(self, base, other):
-        for name, value in other.iteritems():
-            self["%s/%s" % (base, name)] = value
+        for name, value in other.items():
+            self[b'/'.join((base, name))] = value
 
 
     def allkeys(self):
     def allkeys(self):
         """All refs present in this container."""
         """All refs present in this container."""
@@ -145,10 +147,10 @@ class RefsContainer(object):
         ret = {}
         ret = {}
         keys = self.keys(base)
         keys = self.keys(base)
         if base is None:
         if base is None:
-            base = ""
+            base = b''
         for key in keys:
         for key in keys:
             try:
             try:
-                ret[key] = self[("%s/%s" % (base, key)).strip("/")]
+                ret[key] = self[(base + b'/' + key).strip(b'/')]
             except KeyError:
             except KeyError:
                 continue  # Unable to resolve
                 continue  # Unable to resolve
 
 
@@ -165,9 +167,9 @@ class RefsContainer(object):
         :param name: The name of the reference.
         :param name: The name of the reference.
         :raises KeyError: if a refname is not HEAD or is otherwise not valid.
         :raises KeyError: if a refname is not HEAD or is otherwise not valid.
         """
         """
-        if name in ('HEAD', 'refs/stash'):
+        if name in (b'HEAD', b'refs/stash'):
             return
             return
-        if not name.startswith('refs/') or not check_ref_format(name[5:]):
+        if not name.startswith(b'refs/') or not check_ref_format(name[5:]):
             raise RefFormatError(name)
             raise RefFormatError(name)
 
 
     def read_ref(self, refname):
     def read_ref(self, refname):
@@ -350,15 +352,15 @@ class InfoRefsContainer(RefsContainer):
         self._refs = {}
         self._refs = {}
         self._peeled = {}
         self._peeled = {}
         for l in f.readlines():
         for l in f.readlines():
-            sha, name = l.rstrip("\n").split("\t")
-            if name.endswith("^{}"):
+            sha, name = l.rstrip(b'\n').split(b'\t')
+            if name.endswith(b'^{}'):
                 name = name[:-3]
                 name = name[:-3]
                 if not check_ref_format(name):
                 if not check_ref_format(name):
-                    raise ValueError("invalid ref name '%s'" % name)
+                    raise ValueError("invalid ref name %r" % name)
                 self._peeled[name] = sha
                 self._peeled[name] = sha
             else:
             else:
                 if not check_ref_format(name):
                 if not check_ref_format(name):
-                    raise ValueError("invalid ref name '%s'" % name)
+                    raise ValueError("invalid ref name %r" % name)
                 self._refs[name] = sha
                 self._refs[name] = sha
 
 
     def allkeys(self):
     def allkeys(self):
@@ -389,39 +391,41 @@ class DiskRefsContainer(RefsContainer):
         return "%s(%r)" % (self.__class__.__name__, self.path)
         return "%s(%r)" % (self.__class__.__name__, self.path)
 
 
     def subkeys(self, base):
     def subkeys(self, base):
-        keys = set()
+        subkeys = set()
         path = self.refpath(base)
         path = self.refpath(base)
         for root, dirs, files in os.walk(path):
         for root, dirs, files in os.walk(path):
             dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
             dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
             for filename in files:
             for filename in files:
-                refname = ("%s/%s" % (dir, filename)).strip("/")
+                refname = (("%s/%s" % (dir, filename))
+                           .strip("/").encode('ascii'))
                 # check_ref_format requires at least one /, so we prepend the
                 # check_ref_format requires at least one /, so we prepend the
                 # base before calling it.
                 # base before calling it.
-                if check_ref_format("%s/%s" % (base, refname)):
-                    keys.add(refname)
+                if check_ref_format(base + b'/' + refname):
+                    subkeys.add(refname)
         for key in self.get_packed_refs():
         for key in self.get_packed_refs():
             if key.startswith(base):
             if key.startswith(base):
-                keys.add(key[len(base):].strip("/"))
-        return keys
+                subkeys.add(key[len(base):].strip(b'/'))
+        return subkeys
 
 
     def allkeys(self):
     def allkeys(self):
-        keys = set()
-        if os.path.exists(self.refpath("HEAD")):
-            keys.add("HEAD")
-        path = self.refpath("")
-        for root, dirs, files in os.walk(self.refpath("refs")):
+        allkeys = set()
+        if os.path.exists(self.refpath(b'HEAD')):
+            allkeys.add(b'HEAD')
+        path = self.refpath(b'')
+        for root, dirs, files in os.walk(self.refpath(b'refs')):
             dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
             dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
             for filename in files:
             for filename in files:
-                refname = ("%s/%s" % (dir, filename)).strip("/")
+                refname = ("%s/%s" % (dir, filename)).strip("/").encode('ascii')
                 if check_ref_format(refname):
                 if check_ref_format(refname):
-                    keys.add(refname)
-        keys.update(self.get_packed_refs())
-        return keys
+                    allkeys.add(refname)
+        allkeys.update(self.get_packed_refs())
+        return allkeys
 
 
     def refpath(self, name):
     def refpath(self, name):
         """Return the disk path of a ref.
         """Return the disk path of a ref.
 
 
         """
         """
+        name = name.decode('ascii')
         if os.path.sep != "/":
         if os.path.sep != "/":
             name = name.replace("/", os.path.sep)
             name = name.replace("/", os.path.sep)
         return os.path.join(self.path, name)
         return os.path.join(self.path, name)
@@ -449,7 +453,7 @@ class DiskRefsContainer(RefsContainer):
                 raise
                 raise
             with f:
             with f:
                 first_line = next(iter(f)).rstrip()
                 first_line = next(iter(f)).rstrip()
-                if (first_line.startswith("# pack-refs") and " peeled" in
+                if (first_line.startswith(b'# pack-refs') and b' peeled' in
                         first_line):
                         first_line):
                     for sha, name, peeled in read_packed_refs_with_peeled(f):
                     for sha, name, peeled in read_packed_refs_with_peeled(f):
                         self._packed_refs[name] = sha
                         self._packed_refs[name] = sha
@@ -496,7 +500,7 @@ class DiskRefsContainer(RefsContainer):
                 header = f.read(len(SYMREF))
                 header = f.read(len(SYMREF))
                 if header == SYMREF:
                 if header == SYMREF:
                     # Read only the first line
                     # Read only the first line
-                    return header + next(iter(f)).rstrip("\r\n")
+                    return header + next(iter(f)).rstrip(b'\r\n')
                 else:
                 else:
                     # Read only the first 40 bytes
                     # Read only the first 40 bytes
                     return header + f.read(40 - len(SYMREF))
                     return header + f.read(40 - len(SYMREF))
@@ -538,7 +542,7 @@ class DiskRefsContainer(RefsContainer):
         try:
         try:
             f = GitFile(filename, 'wb')
             f = GitFile(filename, 'wb')
             try:
             try:
-                f.write(SYMREF + other + '\n')
+                f.write(SYMREF + other + b'\n')
             except (IOError, OSError):
             except (IOError, OSError):
                 f.abort()
                 f.abort()
                 raise
                 raise
@@ -578,7 +582,7 @@ class DiskRefsContainer(RefsContainer):
                     f.abort()
                     f.abort()
                     raise
                     raise
             try:
             try:
-                f.write(new_ref + "\n")
+                f.write(new_ref + b'\n')
             except (OSError, IOError):
             except (OSError, IOError):
                 f.abort()
                 f.abort()
                 raise
                 raise
@@ -608,7 +612,7 @@ class DiskRefsContainer(RefsContainer):
                 f.abort()
                 f.abort()
                 return False
                 return False
             try:
             try:
-                f.write(ref + "\n")
+                f.write(ref + b'\n')
             except (OSError, IOError):
             except (OSError, IOError):
                 f.abort()
                 f.abort()
                 raise
                 raise
@@ -651,16 +655,16 @@ class DiskRefsContainer(RefsContainer):
 
 
 def _split_ref_line(line):
 def _split_ref_line(line):
     """Split a single ref line into a tuple of SHA1 and name."""
     """Split a single ref line into a tuple of SHA1 and name."""
-    fields = line.rstrip("\n").split(" ")
+    fields = line.rstrip(b'\n').split(b' ')
     if len(fields) != 2:
     if len(fields) != 2:
-        raise PackedRefsException("invalid ref line '%s'" % line)
+        raise PackedRefsException("invalid ref line %r" % line)
     sha, name = fields
     sha, name = fields
     try:
     try:
         hex_to_sha(sha)
         hex_to_sha(sha)
     except (AssertionError, TypeError) as e:
     except (AssertionError, TypeError) as e:
         raise PackedRefsException(e)
         raise PackedRefsException(e)
     if not check_ref_format(name):
     if not check_ref_format(name):
-        raise PackedRefsException("invalid ref name '%s'" % name)
+        raise PackedRefsException("invalid ref name %r" % name)
     return (sha, name)
     return (sha, name)
 
 
 
 
@@ -671,10 +675,10 @@ def read_packed_refs(f):
     :return: Iterator over tuples with SHA1s and ref names.
     :return: Iterator over tuples with SHA1s and ref names.
     """
     """
     for l in f:
     for l in f:
-        if l[0] == "#":
+        if l.startswith(b'#'):
             # Comment
             # Comment
             continue
             continue
-        if l[0] == "^":
+        if l.startswith(b'^'):
             raise PackedRefsException(
             raise PackedRefsException(
               "found peeled ref in packed-refs without peeled")
               "found peeled ref in packed-refs without peeled")
         yield _split_ref_line(l)
         yield _split_ref_line(l)
@@ -690,10 +694,10 @@ def read_packed_refs_with_peeled(f):
     """
     """
     last = None
     last = None
     for l in f:
     for l in f:
-        if l[0] == "#":
+        if l[0] == b'#':
             continue
             continue
-        l = l.rstrip("\r\n")
-        if l[0] == "^":
+        l = l.rstrip(b'\r\n')
+        if l.startswith(b'^'):
             if not last:
             if not last:
                 raise PackedRefsException("unexpected peeled ref line")
                 raise PackedRefsException("unexpected peeled ref line")
             try:
             try:
@@ -723,11 +727,11 @@ def write_packed_refs(f, packed_refs, peeled_refs=None):
     if peeled_refs is None:
     if peeled_refs is None:
         peeled_refs = {}
         peeled_refs = {}
     else:
     else:
-        f.write('# pack-refs with: peeled\n')
-    for refname in sorted(packed_refs.iterkeys()):
-        f.write('%s %s\n' % (packed_refs[refname], refname))
+        f.write(b'# pack-refs with: peeled\n')
+    for refname in sorted(packed_refs.keys()):
+        f.write(git_line(packed_refs[refname], refname))
         if refname in peeled_refs:
         if refname in peeled_refs:
-            f.write('^%s\n' % peeled_refs[refname])
+            f.write(b'^' + peeled_refs[refname] + b'\n')
 
 
 
 
 def read_info_refs(f):
 def read_info_refs(f):
@@ -743,16 +747,16 @@ def write_info_refs(refs, store):
     for name, sha in sorted(refs.items()):
     for name, sha in sorted(refs.items()):
         # get_refs() includes HEAD as a special case, but we don't want to
         # get_refs() includes HEAD as a special case, but we don't want to
         # advertise it
         # advertise it
-        if name == 'HEAD':
+        if name == b'HEAD':
             continue
             continue
         try:
         try:
             o = store[sha]
             o = store[sha]
         except KeyError:
         except KeyError:
             continue
             continue
         peeled = store.peel_sha(sha)
         peeled = store.peel_sha(sha)
-        yield '%s\t%s\n' % (o.id, name)
+        yield o.id + b'\t' + name + b'\n'
         if o.id != peeled.id:
         if o.id != peeled.id:
-            yield '%s\t%s^{}\n' % (peeled.id, name)
+            yield peeled.id + b'\t' + name + b'^{}\n'
 
 
 
 
-is_local_branch = lambda x: x.startswith("refs/heads/")
+is_local_branch = lambda x: x.startswith(b'refs/heads/')

+ 19 - 5
dulwich/repo.py

@@ -793,17 +793,31 @@ class Repo(BaseRepo):
 
 
             if not bare:
             if not bare:
                 # Checkout HEAD to target dir
                 # Checkout HEAD to target dir
-                target._build_tree()
+                target.reset_index()
 
 
         return target
         return target
 
 
-    def _build_tree(self):
-        from dulwich.index import build_index_from_tree
+    def reset_index(self, tree=None):
+        """Reset the index back to a specific tree.
+
+        :param tree: Tree SHA to reset to, None for current HEAD tree.
+        """
+        from dulwich.index import (
+            build_index_from_tree,
+            validate_path_element_default,
+            validate_path_element_ntfs,
+            )
+        if tree is None:
+            tree = self['HEAD'].tree
         config = self.get_config()
         config = self.get_config()
         honor_filemode = config.get_boolean('core', 'filemode', os.name != "nt")
         honor_filemode = config.get_boolean('core', 'filemode', os.name != "nt")
+        if config.get_boolean('core', 'core.protectNTFS', os.name == "nt"):
+            validate_path_element = validate_path_element_ntfs
+        else:
+            validate_path_element = validate_path_element_default
         return build_index_from_tree(self.path, self.index_path(),
         return build_index_from_tree(self.path, self.index_path(),
-                self.object_store, self['HEAD'].tree,
-                honor_filemode=honor_filemode)
+                self.object_store, tree, honor_filemode=honor_filemode,
+                validate_path_element=validate_path_element)
 
 
     def get_config(self):
     def get_config(self):
         """Retrieve the config object.
         """Retrieve the config object.

+ 9 - 3
dulwich/tests/__init__.py

@@ -75,8 +75,9 @@ class TestCase(_TestCase):
 class BlackboxTestCase(TestCase):
 class BlackboxTestCase(TestCase):
     """Blackbox testing."""
     """Blackbox testing."""
 
 
-    bin_directory = os.path.abspath(os.path.join(os.path.dirname(__file__),
-        "..", "..", "bin"))
+    # TODO(jelmer): Include more possible binary paths.
+    bin_directories = [os.path.abspath(os.path.join(os.path.dirname(__file__),
+        "..", "..", "bin")), '/usr/bin', '/usr/local/bin']
 
 
     def bin_path(self, name):
     def bin_path(self, name):
         """Determine the full path of a binary.
         """Determine the full path of a binary.
@@ -84,7 +85,12 @@ class BlackboxTestCase(TestCase):
         :param name: Name of the script
         :param name: Name of the script
         :return: Full path
         :return: Full path
         """
         """
-        return os.path.join(self.bin_directory, name)
+        for d in self.bin_directories:
+            p = os.path.join(d, name)
+            if os.path.isfile(p):
+                return p
+        else:
+            raise SkipTest("Unable to find binary %s" % name)
 
 
     def run_command(self, name, args):
     def run_command(self, name, args):
         """Run a Dulwich command.
         """Run a Dulwich command.

+ 35 - 1
dulwich/tests/test_client.py

@@ -18,6 +18,9 @@
 
 
 from io import BytesIO
 from io import BytesIO
 import sys
 import sys
+import shutil
+import tempfile
+
 try:
 try:
     from unittest import skipIf
     from unittest import skipIf
 except ImportError:
 except ImportError:
@@ -53,7 +56,10 @@ from dulwich.objects import (
     Commit,
     Commit,
     Tree
     Tree
     )
     )
-from dulwich.repo import MemoryRepo
+from dulwich.repo import (
+    MemoryRepo,
+    Repo,
+    )
 from dulwich.tests.utils import (
 from dulwich.tests.utils import (
     open_repo,
     open_repo,
     skipIfPY3,
     skipIfPY3,
@@ -610,3 +616,31 @@ class LocalGitClientTests(TestCase):
             graph_walker=walker, pack_data=out.write)
             graph_walker=walker, pack_data=out.write)
         # Hardcoding is not ideal, but we'll fix that some other day..
         # Hardcoding is not ideal, but we'll fix that some other day..
         self.assertTrue(out.getvalue().startswith('PACK\x00\x00\x00\x02\x00\x00\x00\x07'))
         self.assertTrue(out.getvalue().startswith('PACK\x00\x00\x00\x02\x00\x00\x00\x07'))
+
+    def test_send_pack_without_changes(self):
+        local = open_repo('a.git')
+        target = open_repo('a.git')
+        self.send_and_verify("master", local, target)
+
+    def test_send_pack_with_changes(self):
+        local = open_repo('a.git')
+        target_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, target_path)
+        target = Repo.init_bare(target_path)
+        self.send_and_verify("master", local, target)
+
+    def send_and_verify(self, branch, local, target):
+        client = LocalGitClient()
+        ref_name = "refs/heads/" + branch
+        new_refs = client.send_pack(target.path,
+                                    lambda _: { ref_name: local.refs[ref_name] },
+                                    local.object_store.generate_pack_contents)
+
+        self.assertEqual(local.refs[ref_name], new_refs[ref_name])
+
+        for name, sha in new_refs.iteritems():
+            self.assertEqual(new_refs[name], target.refs[name])
+
+        obj_local = local.get_object(new_refs[ref_name])
+        obj_target = target.get_object(new_refs[ref_name])
+        self.assertEqual(obj_local, obj_target)

File diff suppressed because it is too large
+ 393 - 391
dulwich/tests/test_diff_tree.py


+ 22 - 27
dulwich/tests/test_file.py

@@ -28,12 +28,8 @@ from dulwich.tests import (
     SkipTest,
     SkipTest,
     TestCase,
     TestCase,
     )
     )
-from dulwich.tests.utils import (
-    skipIfPY3
-    )
 
 
 
 
-@skipIfPY3
 class FancyRenameTests(TestCase):
 class FancyRenameTests(TestCase):
 
 
     def setUp(self):
     def setUp(self):
@@ -41,7 +37,7 @@ class FancyRenameTests(TestCase):
         self._tempdir = tempfile.mkdtemp()
         self._tempdir = tempfile.mkdtemp()
         self.foo = self.path('foo')
         self.foo = self.path('foo')
         self.bar = self.path('bar')
         self.bar = self.path('bar')
-        self.create(self.foo, 'foo contents')
+        self.create(self.foo, b'foo contents')
 
 
     def tearDown(self):
     def tearDown(self):
         shutil.rmtree(self._tempdir)
         shutil.rmtree(self._tempdir)
@@ -61,44 +57,43 @@ class FancyRenameTests(TestCase):
         self.assertFalse(os.path.exists(self.foo))
         self.assertFalse(os.path.exists(self.foo))
 
 
         new_f = open(self.bar, 'rb')
         new_f = open(self.bar, 'rb')
-        self.assertEqual('foo contents', new_f.read())
+        self.assertEqual(b'foo contents', new_f.read())
         new_f.close()
         new_f.close()
 
 
     def test_dest_exists(self):
     def test_dest_exists(self):
-        self.create(self.bar, 'bar contents')
+        self.create(self.bar, b'bar contents')
         fancy_rename(self.foo, self.bar)
         fancy_rename(self.foo, self.bar)
         self.assertFalse(os.path.exists(self.foo))
         self.assertFalse(os.path.exists(self.foo))
 
 
         new_f = open(self.bar, 'rb')
         new_f = open(self.bar, 'rb')
-        self.assertEqual('foo contents', new_f.read())
+        self.assertEqual(b'foo contents', new_f.read())
         new_f.close()
         new_f.close()
 
 
     def test_dest_opened(self):
     def test_dest_opened(self):
         if sys.platform != "win32":
         if sys.platform != "win32":
             raise SkipTest("platform allows overwriting open files")
             raise SkipTest("platform allows overwriting open files")
-        self.create(self.bar, 'bar contents')
+        self.create(self.bar, b'bar contents')
         dest_f = open(self.bar, 'rb')
         dest_f = open(self.bar, 'rb')
         self.assertRaises(OSError, fancy_rename, self.foo, self.bar)
         self.assertRaises(OSError, fancy_rename, self.foo, self.bar)
         dest_f.close()
         dest_f.close()
         self.assertTrue(os.path.exists(self.path('foo')))
         self.assertTrue(os.path.exists(self.path('foo')))
 
 
         new_f = open(self.foo, 'rb')
         new_f = open(self.foo, 'rb')
-        self.assertEqual('foo contents', new_f.read())
+        self.assertEqual(b'foo contents', new_f.read())
         new_f.close()
         new_f.close()
 
 
         new_f = open(self.bar, 'rb')
         new_f = open(self.bar, 'rb')
-        self.assertEqual('bar contents', new_f.read())
+        self.assertEqual(b'bar contents', new_f.read())
         new_f.close()
         new_f.close()
 
 
 
 
-@skipIfPY3
 class GitFileTests(TestCase):
 class GitFileTests(TestCase):
 
 
     def setUp(self):
     def setUp(self):
         super(GitFileTests, self).setUp()
         super(GitFileTests, self).setUp()
         self._tempdir = tempfile.mkdtemp()
         self._tempdir = tempfile.mkdtemp()
         f = open(self.path('foo'), 'wb')
         f = open(self.path('foo'), 'wb')
-        f.write('foo contents')
+        f.write(b'foo contents')
         f.close()
         f.close()
 
 
     def tearDown(self):
     def tearDown(self):
@@ -119,15 +114,15 @@ class GitFileTests(TestCase):
     def test_readonly(self):
     def test_readonly(self):
         f = GitFile(self.path('foo'), 'rb')
         f = GitFile(self.path('foo'), 'rb')
         self.assertTrue(isinstance(f, io.IOBase))
         self.assertTrue(isinstance(f, io.IOBase))
-        self.assertEqual('foo contents', f.read())
-        self.assertEqual('', f.read())
+        self.assertEqual(b'foo contents', f.read())
+        self.assertEqual(b'', f.read())
         f.seek(4)
         f.seek(4)
-        self.assertEqual('contents', f.read())
+        self.assertEqual(b'contents', f.read())
         f.close()
         f.close()
 
 
     def test_default_mode(self):
     def test_default_mode(self):
         f = GitFile(self.path('foo'))
         f = GitFile(self.path('foo'))
-        self.assertEqual('foo contents', f.read())
+        self.assertEqual(b'foo contents', f.read())
         f.close()
         f.close()
 
 
     def test_write(self):
     def test_write(self):
@@ -135,7 +130,7 @@ class GitFileTests(TestCase):
         foo_lock = '%s.lock' % foo
         foo_lock = '%s.lock' % foo
 
 
         orig_f = open(foo, 'rb')
         orig_f = open(foo, 'rb')
-        self.assertEqual(orig_f.read(), 'foo contents')
+        self.assertEqual(orig_f.read(), b'foo contents')
         orig_f.close()
         orig_f.close()
 
 
         self.assertFalse(os.path.exists(foo_lock))
         self.assertFalse(os.path.exists(foo_lock))
@@ -144,20 +139,20 @@ class GitFileTests(TestCase):
         self.assertRaises(AttributeError, getattr, f, 'not_a_file_property')
         self.assertRaises(AttributeError, getattr, f, 'not_a_file_property')
 
 
         self.assertTrue(os.path.exists(foo_lock))
         self.assertTrue(os.path.exists(foo_lock))
-        f.write('new stuff')
+        f.write(b'new stuff')
         f.seek(4)
         f.seek(4)
-        f.write('contents')
+        f.write(b'contents')
         f.close()
         f.close()
         self.assertFalse(os.path.exists(foo_lock))
         self.assertFalse(os.path.exists(foo_lock))
 
 
         new_f = open(foo, 'rb')
         new_f = open(foo, 'rb')
-        self.assertEqual('new contents', new_f.read())
+        self.assertEqual(b'new contents', new_f.read())
         new_f.close()
         new_f.close()
 
 
     def test_open_twice(self):
     def test_open_twice(self):
         foo = self.path('foo')
         foo = self.path('foo')
         f1 = GitFile(foo, 'wb')
         f1 = GitFile(foo, 'wb')
-        f1.write('new')
+        f1.write(b'new')
         try:
         try:
             f2 = GitFile(foo, 'wb')
             f2 = GitFile(foo, 'wb')
             self.fail()
             self.fail()
@@ -165,12 +160,12 @@ class GitFileTests(TestCase):
             self.assertEqual(errno.EEXIST, e.errno)
             self.assertEqual(errno.EEXIST, e.errno)
         else:
         else:
             f2.close()
             f2.close()
-        f1.write(' contents')
+        f1.write(b' contents')
         f1.close()
         f1.close()
 
 
         # Ensure trying to open twice doesn't affect original.
         # Ensure trying to open twice doesn't affect original.
         f = open(foo, 'rb')
         f = open(foo, 'rb')
-        self.assertEqual('new contents', f.read())
+        self.assertEqual(b'new contents', f.read())
         f.close()
         f.close()
 
 
     def test_abort(self):
     def test_abort(self):
@@ -178,17 +173,17 @@ class GitFileTests(TestCase):
         foo_lock = '%s.lock' % foo
         foo_lock = '%s.lock' % foo
 
 
         orig_f = open(foo, 'rb')
         orig_f = open(foo, 'rb')
-        self.assertEqual(orig_f.read(), 'foo contents')
+        self.assertEqual(orig_f.read(), b'foo contents')
         orig_f.close()
         orig_f.close()
 
 
         f = GitFile(foo, 'wb')
         f = GitFile(foo, 'wb')
-        f.write('new contents')
+        f.write(b'new contents')
         f.abort()
         f.abort()
         self.assertTrue(f.closed)
         self.assertTrue(f.closed)
         self.assertFalse(os.path.exists(foo_lock))
         self.assertFalse(os.path.exists(foo_lock))
 
 
         new_orig_f = open(foo, 'rb')
         new_orig_f = open(foo, 'rb')
-        self.assertEqual(new_orig_f.read(), 'foo contents')
+        self.assertEqual(new_orig_f.read(), b'foo contents')
         new_orig_f.close()
         new_orig_f.close()
 
 
     def test_abort_close(self):
     def test_abort_close(self):

+ 10 - 12
dulwich/tests/test_hooks.py

@@ -20,6 +20,7 @@
 import os
 import os
 import stat
 import stat
 import shutil
 import shutil
+import sys
 import tempfile
 import tempfile
 
 
 from dulwich import errors
 from dulwich import errors
@@ -31,10 +32,8 @@ from dulwich.hooks import (
 )
 )
 
 
 from dulwich.tests import TestCase
 from dulwich.tests import TestCase
-from dulwich.tests.utils import skipIfPY3
 
 
 
 
-@skipIfPY3
 class ShellHookTests(TestCase):
 class ShellHookTests(TestCase):
 
 
     def setUp(self):
     def setUp(self):
@@ -42,11 +41,11 @@ class ShellHookTests(TestCase):
             self.skipTest('shell hook tests requires POSIX shell')
             self.skipTest('shell hook tests requires POSIX shell')
 
 
     def test_hook_pre_commit(self):
     def test_hook_pre_commit(self):
-        pre_commit_fail = """#!/bin/sh
+        pre_commit_fail = b"""#!/bin/sh
 exit 1
 exit 1
 """
 """
 
 
-        pre_commit_success = """#!/bin/sh
+        pre_commit_success = b"""#!/bin/sh
 exit 0
 exit 0
 """
 """
 
 
@@ -71,11 +70,11 @@ exit 0
 
 
     def test_hook_commit_msg(self):
     def test_hook_commit_msg(self):
 
 
-        commit_msg_fail = """#!/bin/sh
+        commit_msg_fail = b"""#!/bin/sh
 exit 1
 exit 1
 """
 """
 
 
-        commit_msg_success = """#!/bin/sh
+        commit_msg_success = b"""#!/bin/sh
 exit 0
 exit 0
 """
 """
 
 
@@ -90,22 +89,21 @@ exit 0
             f.write(commit_msg_fail)
             f.write(commit_msg_fail)
         os.chmod(commit_msg, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
         os.chmod(commit_msg, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
 
 
-        self.assertRaises(errors.HookError, hook.execute, 'failed commit')
+        self.assertRaises(errors.HookError, hook.execute, b'failed commit')
 
 
         with open(commit_msg, 'wb') as f:
         with open(commit_msg, 'wb') as f:
             f.write(commit_msg_success)
             f.write(commit_msg_success)
         os.chmod(commit_msg, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
         os.chmod(commit_msg, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
 
 
-        hook.execute('empty commit')
+        hook.execute(b'empty commit')
 
 
     def test_hook_post_commit(self):
     def test_hook_post_commit(self):
 
 
         (fd, path) = tempfile.mkstemp()
         (fd, path) = tempfile.mkstemp()
-        post_commit_msg = """#!/bin/sh
-rm %(file)s
-""" % {'file': path}
+        post_commit_msg = b"""#!/bin/sh
+rm """ + path.encode(sys.getfilesystemencoding()) + b"\n"
 
 
-        post_commit_msg_fail = """#!/bin/sh
+        post_commit_msg_fail = b"""#!/bin/sh
 exit 1
 exit 1
 """
 """
 
 

+ 58 - 1
dulwich/tests/test_index.py

@@ -35,6 +35,8 @@ from dulwich.index import (
     index_entry_from_stat,
     index_entry_from_stat,
     read_index,
     read_index,
     read_index_dict,
     read_index_dict,
+    validate_path_element_default,
+    validate_path_element_ntfs,
     write_cache_time,
     write_cache_time,
     write_index,
     write_index,
     write_index_dict,
     write_index_dict,
@@ -50,7 +52,6 @@ from dulwich.repo import Repo
 from dulwich.tests import TestCase
 from dulwich.tests import TestCase
 from dulwich.tests.utils import skipIfPY3
 from dulwich.tests.utils import skipIfPY3
 
 
-
 @skipIfPY3
 @skipIfPY3
 class IndexTestCase(TestCase):
 class IndexTestCase(TestCase):
 
 
@@ -281,6 +282,43 @@ class BuildIndexTests(TestCase):
         # Verify no files
         # Verify no files
         self.assertEqual(['.git'], os.listdir(repo.path))
         self.assertEqual(['.git'], os.listdir(repo.path))
 
 
+    def test_git_dir(self):
+        if os.name != 'posix':
+            self.skipTest("test depends on POSIX shell")
+
+        repo_dir = tempfile.mkdtemp()
+        repo = Repo.init(repo_dir)
+        self.addCleanup(shutil.rmtree, repo_dir)
+
+        # Populate repo
+        filea = Blob.from_string('file a')
+        filee = Blob.from_string('d')
+
+        tree = Tree()
+        tree['.git/a'] = (stat.S_IFREG | 0o644, filea.id)
+        tree['c/e'] = (stat.S_IFREG | 0o644, filee.id)
+
+        repo.object_store.add_objects([(o, None)
+            for o in [filea, filee, tree]])
+
+        build_index_from_tree(repo.path, repo.index_path(),
+                repo.object_store, tree.id)
+
+        # Verify index entries
+        index = repo.open_index()
+        self.assertEqual(len(index), 1)
+
+        # filea
+        apath = os.path.join(repo.path, '.git', 'a')
+        self.assertFalse(os.path.exists(apath))
+
+        # filee
+        epath = os.path.join(repo.path, 'c', 'e')
+        self.assertTrue(os.path.exists(epath))
+        self.assertReasonableIndexEntry(index['c/e'],
+            stat.S_IFREG | 0o644, 1, filee.id)
+        self.assertFileContents(epath, 'd')
+
     def test_nonempty(self):
     def test_nonempty(self):
         if os.name != 'posix':
         if os.name != 'posix':
             self.skipTest("test depends on POSIX shell")
             self.skipTest("test depends on POSIX shell")
@@ -377,3 +415,22 @@ class GetUnstagedChangesTests(TestCase):
         changes = get_unstaged_changes(repo.open_index(), repo_dir)
         changes = get_unstaged_changes(repo.open_index(), repo_dir)
 
 
         self.assertEqual(list(changes), ['foo1'])
         self.assertEqual(list(changes), ['foo1'])
+
+
+class TestValidatePathElement(TestCase):
+
+    def test_default(self):
+        self.assertTrue(validate_path_element_default("bla"))
+        self.assertTrue(validate_path_element_default(".bla"))
+        self.assertFalse(validate_path_element_default(".git"))
+        self.assertFalse(validate_path_element_default(".giT"))
+        self.assertFalse(validate_path_element_default(".."))
+        self.assertTrue(validate_path_element_default("git~1"))
+
+    def test_ntfs(self):
+        self.assertTrue(validate_path_element_ntfs("bla"))
+        self.assertTrue(validate_path_element_ntfs(".bla"))
+        self.assertFalse(validate_path_element_ntfs(".git"))
+        self.assertFalse(validate_path_element_ntfs(".giT"))
+        self.assertFalse(validate_path_element_ntfs(".."))
+        self.assertFalse(validate_path_element_ntfs("git~1"))

+ 0 - 6
dulwich/tests/test_lru_cache.py

@@ -22,12 +22,7 @@ from dulwich import (
 from dulwich.tests import (
 from dulwich.tests import (
     TestCase,
     TestCase,
     )
     )
-from dulwich.tests.utils import (
-    skipIfPY3,
-    )
-
 
 
-@skipIfPY3
 class TestLRUCache(TestCase):
 class TestLRUCache(TestCase):
     """Test that LRU cache properly keeps track of entries."""
     """Test that LRU cache properly keeps track of entries."""
 
 
@@ -291,7 +286,6 @@ class TestLRUCache(TestCase):
         self.assertEqual([6, 7, 8, 9, 10, 11], sorted(cache.keys()))
         self.assertEqual([6, 7, 8, 9, 10, 11], sorted(cache.keys()))
 
 
 
 
-@skipIfPY3
 class TestLRUSizeCache(TestCase):
 class TestLRUSizeCache(TestCase):
 
 
     def test_basic_init(self):
     def test_basic_init(self):

+ 60 - 5
dulwich/tests/test_missing_obj_finder.py

@@ -25,6 +25,7 @@ from dulwich.objects import (
 from dulwich.tests import TestCase
 from dulwich.tests import TestCase
 from dulwich.tests.utils import (
 from dulwich.tests.utils import (
     make_object,
     make_object,
+    make_tag,
     build_commit_graph,
     build_commit_graph,
     skipIfPY3,
     skipIfPY3,
     )
     )
@@ -91,13 +92,12 @@ class MOFLinearRepoTest(MissingObjectFinderTest):
         self.assertMissingMatch([self.cmt(1).id], [self.cmt(3).id],
         self.assertMissingMatch([self.cmt(1).id], [self.cmt(3).id],
             self.missing_1_3)
             self.missing_1_3)
 
 
-    def test_bogus_haves_failure(self):
-        """Ensure non-existent SHA in haves are not tolerated"""
+    def test_bogus_haves(self):
+        """Ensure non-existent SHA in haves are tolerated"""
         bogus_sha = self.cmt(2).id[::-1]
         bogus_sha = self.cmt(2).id[::-1]
         haves = [self.cmt(1).id, bogus_sha]
         haves = [self.cmt(1).id, bogus_sha]
         wants = [self.cmt(3).id]
         wants = [self.cmt(3).id]
-        self.assertRaises(KeyError, self.store.find_missing_objects,
-            self.store, haves, wants)
+        self.assertMissingMatch(haves, wants, self.missing_1_3)
 
 
     def test_bogus_wants_failure(self):
     def test_bogus_wants_failure(self):
         """Ensure non-existent SHA in wants are not tolerated"""
         """Ensure non-existent SHA in wants are not tolerated"""
@@ -105,7 +105,7 @@ class MOFLinearRepoTest(MissingObjectFinderTest):
         haves = [self.cmt(1).id]
         haves = [self.cmt(1).id]
         wants = [self.cmt(3).id, bogus_sha]
         wants = [self.cmt(3).id, bogus_sha]
         self.assertRaises(KeyError, self.store.find_missing_objects,
         self.assertRaises(KeyError, self.store.find_missing_objects,
-            self.store, haves, wants)
+            haves, wants)
 
 
     def test_no_changes(self):
     def test_no_changes(self):
         self.assertMissingMatch([self.cmt(3).id], [self.cmt(3).id], [])
         self.assertMissingMatch([self.cmt(3).id], [self.cmt(3).id], [])
@@ -195,3 +195,58 @@ class MOFMergeForkRepoTest(MissingObjectFinderTest):
               self.cmt(7).id, self.cmt(6).id, self.cmt(4).id,
               self.cmt(7).id, self.cmt(6).id, self.cmt(4).id,
               self.cmt(7).tree, self.cmt(6).tree, self.cmt(4).tree,
               self.cmt(7).tree, self.cmt(6).tree, self.cmt(4).tree,
               self.f1_4_id])
               self.f1_4_id])
+
+
+class MOFTagsTest(MissingObjectFinderTest):
+    def setUp(self):
+        super(MOFTagsTest, self).setUp()
+        f1_1 = make_object(Blob, data='f1')
+        commit_spec = [[1]]
+        trees = {1: [('f1', f1_1)]}
+        self.commits = build_commit_graph(self.store, commit_spec, trees)
+
+        self._normal_tag = make_tag(self.cmt(1))
+        self.store.add_object(self._normal_tag)
+
+        self._tag_of_tag = make_tag(self._normal_tag)
+        self.store.add_object(self._tag_of_tag)
+
+        self._tag_of_tree = make_tag(self.store[self.cmt(1).tree])
+        self.store.add_object(self._tag_of_tree)
+
+        self._tag_of_blob = make_tag(f1_1)
+        self.store.add_object(self._tag_of_blob)
+
+        self._tag_of_tag_of_blob = make_tag(self._tag_of_blob)
+        self.store.add_object(self._tag_of_tag_of_blob)
+
+        self.f1_1_id = f1_1.id
+
+    def test_tagged_commit(self):
+        # The user already has the tagged commit, all they want is the tag,
+        # so send them only the tag object.
+        self.assertMissingMatch([self.cmt(1).id], [self._normal_tag.id],
+                                [self._normal_tag.id])
+
+    # The remaining cases are unusual, but do happen in the wild.
+    def test_tagged_tag(self):
+        # User already has tagged tag, send only tag of tag
+        self.assertMissingMatch([self._normal_tag.id], [self._tag_of_tag.id],
+                                [self._tag_of_tag.id])
+        # User needs both tags, but already has commit
+        self.assertMissingMatch([self.cmt(1).id], [self._tag_of_tag.id],
+                                [self._normal_tag.id, self._tag_of_tag.id])
+
+    def test_tagged_tree(self):
+        self.assertMissingMatch(
+            [], [self._tag_of_tree.id],
+            [self._tag_of_tree.id, self.cmt(1).tree, self.f1_1_id])
+
+    def test_tagged_blob(self):
+        self.assertMissingMatch([], [self._tag_of_blob.id],
+                                [self._tag_of_blob.id, self.f1_1_id])
+
+    def test_tagged_tagged_blob(self):
+        self.assertMissingMatch([], [self._tag_of_tag_of_blob.id],
+                                [self._tag_of_tag_of_blob.id,
+                                 self._tag_of_blob.id, self.f1_1_id])

+ 9 - 8
dulwich/tests/test_object_store.py

@@ -53,12 +53,13 @@ from dulwich.tests import (
     )
     )
 from dulwich.tests.utils import (
 from dulwich.tests.utils import (
     make_object,
     make_object,
+    make_tag,
     build_pack,
     build_pack,
     skipIfPY3,
     skipIfPY3,
     )
     )
 
 
 
 
-testobject = make_object(Blob, data="yummy data")
+testobject = make_object(Blob, data=b"yummy data")
 
 
 
 
 class ObjectStoreTests(object):
 class ObjectStoreTests(object):
@@ -84,7 +85,7 @@ class ObjectStoreTests(object):
         self.store.add_objects([])
         self.store.add_objects([])
 
 
     def test_add_commit(self):
     def test_add_commit(self):
-        # TODO: Argh, no way to construct Git commit objects without 
+        # TODO: Argh, no way to construct Git commit objects without
         # access to a serialized form.
         # access to a serialized form.
         self.store.add_objects([])
         self.store.add_objects([])
 
 
@@ -169,10 +170,7 @@ class ObjectStoreTests(object):
         self.assertEqual(expected, list(actual))
         self.assertEqual(expected, list(actual))
 
 
     def make_tag(self, name, obj):
     def make_tag(self, name, obj):
-        tag = make_object(Tag, name=name, message='',
-                          tag_time=12345, tag_timezone=0,
-                          tagger='Test Tagger <test@example.com>',
-                          object=(object_class(obj.type_name), obj.id))
+        tag = make_tag(obj, name=name)
         self.store.add_object(tag)
         self.store.add_object(tag)
         return tag
         return tag
 
 
@@ -214,6 +212,11 @@ class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
         else:
         else:
             commit()
             commit()
 
 
+    def test_add_pack_emtpy(self):
+        o = MemoryObjectStore()
+        f, commit, abort = o.add_pack()
+        commit()
+
     def test_add_thin_pack(self):
     def test_add_thin_pack(self):
         o = MemoryObjectStore()
         o = MemoryObjectStore()
         blob = make_object(Blob, data='yummy data')
         blob = make_object(Blob, data='yummy data')
@@ -396,8 +399,6 @@ class TreeLookupPathTests(TestCase):
     def test_lookup_not_tree(self):
     def test_lookup_not_tree(self):
         self.assertRaises(NotTreeError, tree_lookup_path, self.get_object, self.tree_id, 'ad/b/j')
         self.assertRaises(NotTreeError, tree_lookup_path, self.get_object, self.tree_id, 'ad/b/j')
 
 
-# TODO: MissingObjectFinderTests
-
 @skipIfPY3
 @skipIfPY3
 class ObjectStoreGraphWalkerTests(TestCase):
 class ObjectStoreGraphWalkerTests(TestCase):
 
 

File diff suppressed because it is too large
+ 287 - 283
dulwich/tests/test_objects.py


+ 17 - 11
dulwich/tests/test_pack.py

@@ -191,6 +191,14 @@ class TestPackDeltas(TestCase):
         self._test_roundtrip(self.test_string_huge + self.test_string1,
         self._test_roundtrip(self.test_string_huge + self.test_string1,
                              self.test_string_huge + self.test_string2)
                              self.test_string_huge + self.test_string2)
 
 
+    def test_dest_overflow(self):
+        self.assertRaises(
+            ValueError,
+            apply_delta, 'a'*0x10000, '\x80\x80\x04\x80\x80\x04\x80' + 'a'*0x10000)
+        self.assertRaises(
+            ValueError,
+            apply_delta, '', '\x00\x80\x02\xb0\x11\x11')
+
 
 
 @skipIfPY3
 @skipIfPY3
 class TestPackData(PackTests):
 class TestPackData(PackTests):
@@ -1041,22 +1049,20 @@ class DeltaChainIteratorTests(TestCase):
             self.assertEqual((sorted([b2.id, b3.id]),), (sorted(e.args[0]),))
             self.assertEqual((sorted([b2.id, b3.id]),), (sorted(e.args[0]),))
 
 
 
 
-@skipIfPY3
 class DeltaEncodeSizeTests(TestCase):
 class DeltaEncodeSizeTests(TestCase):
 
 
     def test_basic(self):
     def test_basic(self):
-        self.assertEquals('\x00', _delta_encode_size(0))
-        self.assertEquals('\x01', _delta_encode_size(1))
-        self.assertEquals('\xfa\x01', _delta_encode_size(250))
-        self.assertEquals('\xe8\x07', _delta_encode_size(1000))
-        self.assertEquals('\xa0\x8d\x06', _delta_encode_size(100000))
+        self.assertEqual('\x00', _delta_encode_size(0))
+        self.assertEqual('\x01', _delta_encode_size(1))
+        self.assertEqual('\xfa\x01', _delta_encode_size(250))
+        self.assertEqual('\xe8\x07', _delta_encode_size(1000))
+        self.assertEqual('\xa0\x8d\x06', _delta_encode_size(100000))
 
 
 
 
-@skipIfPY3
 class EncodeCopyOperationTests(TestCase):
 class EncodeCopyOperationTests(TestCase):
 
 
     def test_basic(self):
     def test_basic(self):
-        self.assertEquals('\x80', _encode_copy_operation(0, 0))
-        self.assertEquals('\x91\x01\x0a', _encode_copy_operation(1, 10))
-        self.assertEquals('\xb1\x64\xe8\x03', _encode_copy_operation(100, 1000))
-        self.assertEquals('\x93\xe8\x03\x01', _encode_copy_operation(1000, 1))
+        self.assertEqual('\x80', _encode_copy_operation(0, 0))
+        self.assertEqual('\x91\x01\x0a', _encode_copy_operation(1, 10))
+        self.assertEqual('\xb1\x64\xe8\x03', _encode_copy_operation(100, 1000))
+        self.assertEqual('\x93\xe8\x03\x01', _encode_copy_operation(1000, 1))

+ 217 - 197
dulwich/tests/test_refs.py

@@ -56,89 +56,106 @@ class CheckRefFormatTests(TestCase):
     """
     """
 
 
     def test_valid(self):
     def test_valid(self):
-        self.assertTrue(check_ref_format('heads/foo'))
-        self.assertTrue(check_ref_format('foo/bar/baz'))
-        self.assertTrue(check_ref_format('refs///heads/foo'))
-        self.assertTrue(check_ref_format('foo./bar'))
-        self.assertTrue(check_ref_format('heads/foo@bar'))
-        self.assertTrue(check_ref_format('heads/fix.lock.error'))
+        self.assertTrue(check_ref_format(b'heads/foo'))
+        self.assertTrue(check_ref_format(b'foo/bar/baz'))
+        self.assertTrue(check_ref_format(b'refs///heads/foo'))
+        self.assertTrue(check_ref_format(b'foo./bar'))
+        self.assertTrue(check_ref_format(b'heads/foo@bar'))
+        self.assertTrue(check_ref_format(b'heads/fix.lock.error'))
 
 
     def test_invalid(self):
     def test_invalid(self):
-        self.assertFalse(check_ref_format('foo'))
-        self.assertFalse(check_ref_format('heads/foo/'))
-        self.assertFalse(check_ref_format('./foo'))
-        self.assertFalse(check_ref_format('.refs/foo'))
-        self.assertFalse(check_ref_format('heads/foo..bar'))
-        self.assertFalse(check_ref_format('heads/foo?bar'))
-        self.assertFalse(check_ref_format('heads/foo.lock'))
-        self.assertFalse(check_ref_format('heads/v@{ation'))
-        self.assertFalse(check_ref_format('heads/foo\bar'))
-
-
-ONES = "1" * 40
-TWOS = "2" * 40
-THREES = "3" * 40
-FOURS = "4" * 40
-
-@skipIfPY3
+        self.assertFalse(check_ref_format(b'foo'))
+        self.assertFalse(check_ref_format(b'heads/foo/'))
+        self.assertFalse(check_ref_format(b'./foo'))
+        self.assertFalse(check_ref_format(b'.refs/foo'))
+        self.assertFalse(check_ref_format(b'heads/foo..bar'))
+        self.assertFalse(check_ref_format(b'heads/foo?bar'))
+        self.assertFalse(check_ref_format(b'heads/foo.lock'))
+        self.assertFalse(check_ref_format(b'heads/v@{ation'))
+        self.assertFalse(check_ref_format(b'heads/foo\bar'))
+
+
+ONES = b'1' * 40
+TWOS = b'2' * 40
+THREES = b'3' * 40
+FOURS = b'4' * 40
+
 class PackedRefsFileTests(TestCase):
 class PackedRefsFileTests(TestCase):
 
 
     def test_split_ref_line_errors(self):
     def test_split_ref_line_errors(self):
         self.assertRaises(errors.PackedRefsException, _split_ref_line,
         self.assertRaises(errors.PackedRefsException, _split_ref_line,
-                          'singlefield')
+                          b'singlefield')
         self.assertRaises(errors.PackedRefsException, _split_ref_line,
         self.assertRaises(errors.PackedRefsException, _split_ref_line,
-                          'badsha name')
+                          b'badsha name')
         self.assertRaises(errors.PackedRefsException, _split_ref_line,
         self.assertRaises(errors.PackedRefsException, _split_ref_line,
-                          '%s bad/../refname' % ONES)
+                          ONES + b' bad/../refname')
 
 
     def test_read_without_peeled(self):
     def test_read_without_peeled(self):
-        f = BytesIO('# comment\n%s ref/1\n%s ref/2' % (ONES, TWOS))
-        self.assertEqual([(ONES, 'ref/1'), (TWOS, 'ref/2')],
+        f = BytesIO(b'\n'.join([
+            b'# comment',
+            ONES + b' ref/1',
+            TWOS + b' ref/2']))
+        self.assertEqual([(ONES, b'ref/1'), (TWOS, b'ref/2')],
                          list(read_packed_refs(f)))
                          list(read_packed_refs(f)))
 
 
     def test_read_without_peeled_errors(self):
     def test_read_without_peeled_errors(self):
-        f = BytesIO('%s ref/1\n^%s' % (ONES, TWOS))
+        f = BytesIO(b'\n'.join([
+            ONES + b' ref/1',
+            b'^' + TWOS]))
         self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
         self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
 
 
     def test_read_with_peeled(self):
     def test_read_with_peeled(self):
-        f = BytesIO('%s ref/1\n%s ref/2\n^%s\n%s ref/4' % (
-          ONES, TWOS, THREES, FOURS))
+        f = BytesIO(b'\n'.join([
+            ONES + b' ref/1',
+            TWOS + b' ref/2',
+            b'^' + THREES,
+            FOURS + b' ref/4']))
         self.assertEqual([
         self.assertEqual([
-          (ONES, 'ref/1', None),
-          (TWOS, 'ref/2', THREES),
-          (FOURS, 'ref/4', None),
-          ], list(read_packed_refs_with_peeled(f)))
+            (ONES, b'ref/1', None),
+            (TWOS, b'ref/2', THREES),
+            (FOURS, b'ref/4', None),
+            ], list(read_packed_refs_with_peeled(f)))
 
 
     def test_read_with_peeled_errors(self):
     def test_read_with_peeled_errors(self):
-        f = BytesIO('^%s\n%s ref/1' % (TWOS, ONES))
+        f = BytesIO(b'\n'.join([
+            b'^' + TWOS,
+            ONES + b' ref/1']))
         self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
         self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
 
 
-        f = BytesIO('%s ref/1\n^%s\n^%s' % (ONES, TWOS, THREES))
+        f = BytesIO(b'\n'.join([
+            ONES + b' ref/1',
+            b'^' + TWOS,
+            b'^' + THREES]))
         self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
         self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
 
 
     def test_write_with_peeled(self):
     def test_write_with_peeled(self):
         f = BytesIO()
         f = BytesIO()
-        write_packed_refs(f, {'ref/1': ONES, 'ref/2': TWOS},
-                          {'ref/1': THREES})
+        write_packed_refs(f, {b'ref/1': ONES, b'ref/2': TWOS},
+                          {b'ref/1': THREES})
         self.assertEqual(
         self.assertEqual(
-          "# pack-refs with: peeled\n%s ref/1\n^%s\n%s ref/2\n" % (
-          ONES, THREES, TWOS), f.getvalue())
+            b'\n'.join([b'# pack-refs with: peeled',
+                        ONES + b' ref/1',
+                        b'^' + THREES,
+                        TWOS + b' ref/2']) + b'\n',
+            f.getvalue())
 
 
     def test_write_without_peeled(self):
     def test_write_without_peeled(self):
         f = BytesIO()
         f = BytesIO()
-        write_packed_refs(f, {'ref/1': ONES, 'ref/2': TWOS})
-        self.assertEqual("%s ref/1\n%s ref/2\n" % (ONES, TWOS), f.getvalue())
+        write_packed_refs(f, {b'ref/1': ONES, b'ref/2': TWOS})
+        self.assertEqual(b'\n'.join([ONES + b' ref/1',
+                                     TWOS + b' ref/2']) + b'\n',
+                         f.getvalue())
 
 
 
 
 # Dict of refs that we expect all RefsContainerTests subclasses to define.
 # Dict of refs that we expect all RefsContainerTests subclasses to define.
 _TEST_REFS = {
 _TEST_REFS = {
-  'HEAD': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-  'refs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-  'refs/heads/master': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-  'refs/heads/packed': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-  'refs/tags/refs-0.1': 'df6800012397fb85c56e7418dd4eb9405dee075c',
-  'refs/tags/refs-0.2': '3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8',
-  }
+    b'HEAD': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+    b'refs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+    b'refs/heads/master': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+    b'refs/heads/packed': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+    b'refs/tags/refs-0.1': b'df6800012397fb85c56e7418dd4eb9405dee075c',
+    b'refs/tags/refs-0.2': b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8',
+    }
 
 
 
 
 class RefsContainerTests(object):
 class RefsContainerTests(object):
@@ -147,100 +164,103 @@ class RefsContainerTests(object):
         actual_keys = set(self._refs.keys())
         actual_keys = set(self._refs.keys())
         self.assertEqual(set(self._refs.allkeys()), actual_keys)
         self.assertEqual(set(self._refs.allkeys()), actual_keys)
         # ignore the symref loop if it exists
         # ignore the symref loop if it exists
-        actual_keys.discard('refs/heads/loop')
-        self.assertEqual(set(_TEST_REFS.iterkeys()), actual_keys)
+        actual_keys.discard(b'refs/heads/loop')
+        self.assertEqual(set(_TEST_REFS.keys()), actual_keys)
 
 
-        actual_keys = self._refs.keys('refs/heads')
-        actual_keys.discard('loop')
+        actual_keys = self._refs.keys(b'refs/heads')
+        actual_keys.discard(b'loop')
         self.assertEqual(
         self.assertEqual(
-            ['40-char-ref-aaaaaaaaaaaaaaaaaa', 'master', 'packed'],
+            [b'40-char-ref-aaaaaaaaaaaaaaaaaa', b'master', b'packed'],
             sorted(actual_keys))
             sorted(actual_keys))
-        self.assertEqual(['refs-0.1', 'refs-0.2'],
-                         sorted(self._refs.keys('refs/tags')))
+        self.assertEqual([b'refs-0.1', b'refs-0.2'],
+                         sorted(self._refs.keys(b'refs/tags')))
 
 
     def test_as_dict(self):
     def test_as_dict(self):
         # refs/heads/loop does not show up even if it exists
         # refs/heads/loop does not show up even if it exists
         self.assertEqual(_TEST_REFS, self._refs.as_dict())
         self.assertEqual(_TEST_REFS, self._refs.as_dict())
 
 
     def test_setitem(self):
     def test_setitem(self):
-        self._refs['refs/some/ref'] = '42d06bd4b77fed026b154d16493e5deab78f02ec'
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/some/ref'])
-        self.assertRaises(errors.RefFormatError, self._refs.__setitem__,
-                          'notrefs/foo', '42d06bd4b77fed026b154d16493e5deab78f02ec')
+        self._refs[b'refs/some/ref'] = b'42d06bd4b77fed026b154d16493e5deab78f02ec'
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/some/ref'])
+        self.assertRaises(
+            errors.RefFormatError, self._refs.__setitem__,
+            b'notrefs/foo', b'42d06bd4b77fed026b154d16493e5deab78f02ec')
 
 
     def test_set_if_equals(self):
     def test_set_if_equals(self):
-        nines = '9' * 40
-        self.assertFalse(self._refs.set_if_equals('HEAD', 'c0ffee', nines))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['HEAD'])
+        nines = b'9' * 40
+        self.assertFalse(self._refs.set_if_equals(b'HEAD', b'c0ffee', nines))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'HEAD'])
 
 
         self.assertTrue(self._refs.set_if_equals(
         self.assertTrue(self._refs.set_if_equals(
-          'HEAD', '42d06bd4b77fed026b154d16493e5deab78f02ec', nines))
-        self.assertEqual(nines, self._refs['HEAD'])
+            b'HEAD', b'42d06bd4b77fed026b154d16493e5deab78f02ec', nines))
+        self.assertEqual(nines, self._refs[b'HEAD'])
 
 
-        self.assertTrue(self._refs.set_if_equals('refs/heads/master', None,
+        self.assertTrue(self._refs.set_if_equals(b'refs/heads/master', None,
                                                  nines))
                                                  nines))
-        self.assertEqual(nines, self._refs['refs/heads/master'])
+        self.assertEqual(nines, self._refs[b'refs/heads/master'])
 
 
     def test_add_if_new(self):
     def test_add_if_new(self):
-        nines = '9' * 40
-        self.assertFalse(self._refs.add_if_new('refs/heads/master', nines))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/heads/master'])
+        nines = b'9' * 40
+        self.assertFalse(self._refs.add_if_new(b'refs/heads/master', nines))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/master'])
 
 
-        self.assertTrue(self._refs.add_if_new('refs/some/ref', nines))
-        self.assertEqual(nines, self._refs['refs/some/ref'])
+        self.assertTrue(self._refs.add_if_new(b'refs/some/ref', nines))
+        self.assertEqual(nines, self._refs[b'refs/some/ref'])
 
 
     def test_set_symbolic_ref(self):
     def test_set_symbolic_ref(self):
-        self._refs.set_symbolic_ref('refs/heads/symbolic', 'refs/heads/master')
-        self.assertEqual('ref: refs/heads/master',
-                         self._refs.read_loose_ref('refs/heads/symbolic'))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/heads/symbolic'])
+        self._refs.set_symbolic_ref(b'refs/heads/symbolic',
+                                    b'refs/heads/master')
+        self.assertEqual(b'ref: refs/heads/master',
+                         self._refs.read_loose_ref(b'refs/heads/symbolic'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/symbolic'])
 
 
     def test_set_symbolic_ref_overwrite(self):
     def test_set_symbolic_ref_overwrite(self):
-        nines = '9' * 40
-        self.assertFalse('refs/heads/symbolic' in self._refs)
-        self._refs['refs/heads/symbolic'] = nines
-        self.assertEqual(nines, self._refs.read_loose_ref('refs/heads/symbolic'))
-        self._refs.set_symbolic_ref('refs/heads/symbolic', 'refs/heads/master')
-        self.assertEqual('ref: refs/heads/master',
-                         self._refs.read_loose_ref('refs/heads/symbolic'))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/heads/symbolic'])
+        nines = b'9' * 40
+        self.assertFalse(b'refs/heads/symbolic' in self._refs)
+        self._refs[b'refs/heads/symbolic'] = nines
+        self.assertEqual(nines,
+                         self._refs.read_loose_ref(b'refs/heads/symbolic'))
+        self._refs.set_symbolic_ref(b'refs/heads/symbolic',
+                                    b'refs/heads/master')
+        self.assertEqual(b'ref: refs/heads/master',
+                         self._refs.read_loose_ref(b'refs/heads/symbolic'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/symbolic'])
 
 
     def test_check_refname(self):
     def test_check_refname(self):
-        self._refs._check_refname('HEAD')
-        self._refs._check_refname('refs/stash')
-        self._refs._check_refname('refs/heads/foo')
+        self._refs._check_refname(b'HEAD')
+        self._refs._check_refname(b'refs/stash')
+        self._refs._check_refname(b'refs/heads/foo')
 
 
         self.assertRaises(errors.RefFormatError, self._refs._check_refname,
         self.assertRaises(errors.RefFormatError, self._refs._check_refname,
-                          'refs')
+                          b'refs')
         self.assertRaises(errors.RefFormatError, self._refs._check_refname,
         self.assertRaises(errors.RefFormatError, self._refs._check_refname,
-                          'notrefs/foo')
+                          b'notrefs/foo')
 
 
     def test_contains(self):
     def test_contains(self):
-        self.assertTrue('refs/heads/master' in self._refs)
-        self.assertFalse('refs/heads/bar' in self._refs)
+        self.assertTrue(b'refs/heads/master' in self._refs)
+        self.assertFalse(b'refs/heads/bar' in self._refs)
 
 
     def test_delitem(self):
     def test_delitem(self):
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                          self._refs['refs/heads/master'])
-        del self._refs['refs/heads/master']
-        self.assertRaises(KeyError, lambda: self._refs['refs/heads/master'])
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/master'])
+        del self._refs[b'refs/heads/master']
+        self.assertRaises(KeyError, lambda: self._refs[b'refs/heads/master'])
 
 
     def test_remove_if_equals(self):
     def test_remove_if_equals(self):
-        self.assertFalse(self._refs.remove_if_equals('HEAD', 'c0ffee'))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['HEAD'])
+        self.assertFalse(self._refs.remove_if_equals(b'HEAD', b'c0ffee'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'HEAD'])
         self.assertTrue(self._refs.remove_if_equals(
         self.assertTrue(self._refs.remove_if_equals(
-          'refs/tags/refs-0.2', '3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8'))
-        self.assertFalse('refs/tags/refs-0.2' in self._refs)
+            b'refs/tags/refs-0.2', b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8'))
+        self.assertFalse(b'refs/tags/refs-0.2' in self._refs)
 
 
 
 
 
 
-@skipIfPY3
 class DictRefsContainerTests(RefsContainerTests, TestCase):
 class DictRefsContainerTests(RefsContainerTests, TestCase):
 
 
     def setUp(self):
     def setUp(self):
@@ -250,13 +270,12 @@ class DictRefsContainerTests(RefsContainerTests, TestCase):
     def test_invalid_refname(self):
     def test_invalid_refname(self):
         # FIXME: Move this test into RefsContainerTests, but requires
         # FIXME: Move this test into RefsContainerTests, but requires
         # some way of injecting invalid refs.
         # some way of injecting invalid refs.
-        self._refs._refs["refs/stash"] = "00" * 20
+        self._refs._refs[b'refs/stash'] = b'00' * 20
         expected_refs = dict(_TEST_REFS)
         expected_refs = dict(_TEST_REFS)
-        expected_refs["refs/stash"] = "00" * 20
+        expected_refs[b'refs/stash'] = b'00' * 20
         self.assertEqual(expected_refs, self._refs.as_dict())
         self.assertEqual(expected_refs, self._refs.as_dict())
 
 
 
 
-@skipIfPY3
 class DiskRefsContainerTests(RefsContainerTests, TestCase):
 class DiskRefsContainerTests(RefsContainerTests, TestCase):
 
 
     def setUp(self):
     def setUp(self):
@@ -270,39 +289,39 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
 
 
     def test_get_packed_refs(self):
     def test_get_packed_refs(self):
         self.assertEqual({
         self.assertEqual({
-          'refs/heads/packed': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-          'refs/tags/refs-0.1': 'df6800012397fb85c56e7418dd4eb9405dee075c',
-          }, self._refs.get_packed_refs())
+            b'refs/heads/packed': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+            b'refs/tags/refs-0.1': b'df6800012397fb85c56e7418dd4eb9405dee075c',
+            }, self._refs.get_packed_refs())
 
 
     def test_get_peeled_not_packed(self):
     def test_get_peeled_not_packed(self):
         # not packed
         # not packed
-        self.assertEqual(None, self._refs.get_peeled('refs/tags/refs-0.2'))
-        self.assertEqual('3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8',
-                         self._refs['refs/tags/refs-0.2'])
+        self.assertEqual(None, self._refs.get_peeled(b'refs/tags/refs-0.2'))
+        self.assertEqual(b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8',
+                         self._refs[b'refs/tags/refs-0.2'])
 
 
         # packed, known not peelable
         # packed, known not peelable
-        self.assertEqual(self._refs['refs/heads/packed'],
-                         self._refs.get_peeled('refs/heads/packed'))
+        self.assertEqual(self._refs[b'refs/heads/packed'],
+                         self._refs.get_peeled(b'refs/heads/packed'))
 
 
         # packed, peeled
         # packed, peeled
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs.get_peeled('refs/tags/refs-0.1'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs.get_peeled(b'refs/tags/refs-0.1'))
 
 
     def test_setitem(self):
     def test_setitem(self):
         RefsContainerTests.test_setitem(self)
         RefsContainerTests.test_setitem(self)
         f = open(os.path.join(self._refs.path, 'refs', 'some', 'ref'), 'rb')
         f = open(os.path.join(self._refs.path, 'refs', 'some', 'ref'), 'rb')
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                          f.read()[:40])
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         f.read()[:40])
         f.close()
         f.close()
 
 
     def test_setitem_symbolic(self):
     def test_setitem_symbolic(self):
-        ones = '1' * 40
-        self._refs['HEAD'] = ones
-        self.assertEqual(ones, self._refs['HEAD'])
+        ones = b'1' * 40
+        self._refs[b'HEAD'] = ones
+        self.assertEqual(ones, self._refs[b'HEAD'])
 
 
         # ensure HEAD was not modified
         # ensure HEAD was not modified
         f = open(os.path.join(self._refs.path, 'HEAD'), 'rb')
         f = open(os.path.join(self._refs.path, 'HEAD'), 'rb')
-        self.assertEqual('ref: refs/heads/master', next(iter(f)).rstrip('\n'))
+        self.assertEqual(b'ref: refs/heads/master', next(iter(f)).rstrip(b'\n'))
         f.close()
         f.close()
 
 
         # ensure the symbolic link was written through
         # ensure the symbolic link was written through
@@ -314,20 +333,22 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         RefsContainerTests.test_set_if_equals(self)
         RefsContainerTests.test_set_if_equals(self)
 
 
         # ensure symref was followed
         # ensure symref was followed
-        self.assertEqual('9' * 40, self._refs['refs/heads/master'])
+        self.assertEqual(b'9' * 40, self._refs[b'refs/heads/master'])
 
 
         # ensure lockfile was deleted
         # ensure lockfile was deleted
         self.assertFalse(os.path.exists(
         self.assertFalse(os.path.exists(
-          os.path.join(self._refs.path, 'refs', 'heads', 'master.lock')))
+            os.path.join(self._refs.path, 'refs', 'heads', 'master.lock')))
         self.assertFalse(os.path.exists(
         self.assertFalse(os.path.exists(
-          os.path.join(self._refs.path, 'HEAD.lock')))
+            os.path.join(self._refs.path, 'HEAD.lock')))
 
 
     def test_add_if_new_packed(self):
     def test_add_if_new_packed(self):
         # don't overwrite packed ref
         # don't overwrite packed ref
-        self.assertFalse(self._refs.add_if_new('refs/tags/refs-0.1', '9' * 40))
-        self.assertEqual('df6800012397fb85c56e7418dd4eb9405dee075c',
-                         self._refs['refs/tags/refs-0.1'])
+        self.assertFalse(self._refs.add_if_new(b'refs/tags/refs-0.1',
+                                               b'9' * 40))
+        self.assertEqual(b'df6800012397fb85c56e7418dd4eb9405dee075c',
+                         self._refs[b'refs/tags/refs-0.1'])
 
 
+    @skipIfPY3
     def test_add_if_new_symbolic(self):
     def test_add_if_new_symbolic(self):
         # Use an empty repo instead of the default.
         # Use an empty repo instead of the default.
         tear_down_repo(self._repo)
         tear_down_repo(self._repo)
@@ -336,53 +357,53 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         self._repo = Repo.init(repo_dir)
         self._repo = Repo.init(repo_dir)
         refs = self._repo.refs
         refs = self._repo.refs
 
 
-        nines = '9' * 40
-        self.assertEqual('ref: refs/heads/master', refs.read_ref('HEAD'))
-        self.assertFalse('refs/heads/master' in refs)
-        self.assertTrue(refs.add_if_new('HEAD', nines))
-        self.assertEqual('ref: refs/heads/master', refs.read_ref('HEAD'))
-        self.assertEqual(nines, refs['HEAD'])
-        self.assertEqual(nines, refs['refs/heads/master'])
-        self.assertFalse(refs.add_if_new('HEAD', '1' * 40))
-        self.assertEqual(nines, refs['HEAD'])
-        self.assertEqual(nines, refs['refs/heads/master'])
+        nines = b'9' * 40
+        self.assertEqual(b'ref: refs/heads/master', refs.read_ref(b'HEAD'))
+        self.assertFalse(b'refs/heads/master' in refs)
+        self.assertTrue(refs.add_if_new(b'HEAD', nines))
+        self.assertEqual(b'ref: refs/heads/master', refs.read_ref(b'HEAD'))
+        self.assertEqual(nines, refs[b'HEAD'])
+        self.assertEqual(nines, refs[b'refs/heads/master'])
+        self.assertFalse(refs.add_if_new(b'HEAD', b'1' * 40))
+        self.assertEqual(nines, refs[b'HEAD'])
+        self.assertEqual(nines, refs[b'refs/heads/master'])
 
 
     def test_follow(self):
     def test_follow(self):
-        self.assertEqual(
-          ('refs/heads/master', '42d06bd4b77fed026b154d16493e5deab78f02ec'),
-          self._refs._follow('HEAD'))
-        self.assertEqual(
-          ('refs/heads/master', '42d06bd4b77fed026b154d16493e5deab78f02ec'),
-          self._refs._follow('refs/heads/master'))
-        self.assertRaises(KeyError, self._refs._follow, 'refs/heads/loop')
+        self.assertEqual((b'refs/heads/master',
+                          b'42d06bd4b77fed026b154d16493e5deab78f02ec'),
+                         self._refs._follow(b'HEAD'))
+        self.assertEqual((b'refs/heads/master',
+                          b'42d06bd4b77fed026b154d16493e5deab78f02ec'),
+                         self._refs._follow(b'refs/heads/master'))
+        self.assertRaises(KeyError, self._refs._follow, b'refs/heads/loop')
 
 
     def test_delitem(self):
     def test_delitem(self):
         RefsContainerTests.test_delitem(self)
         RefsContainerTests.test_delitem(self)
         ref_file = os.path.join(self._refs.path, 'refs', 'heads', 'master')
         ref_file = os.path.join(self._refs.path, 'refs', 'heads', 'master')
         self.assertFalse(os.path.exists(ref_file))
         self.assertFalse(os.path.exists(ref_file))
-        self.assertFalse('refs/heads/master' in self._refs.get_packed_refs())
+        self.assertFalse(b'refs/heads/master' in self._refs.get_packed_refs())
 
 
     def test_delitem_symbolic(self):
     def test_delitem_symbolic(self):
-        self.assertEqual('ref: refs/heads/master',
-                          self._refs.read_loose_ref('HEAD'))
-        del self._refs['HEAD']
-        self.assertRaises(KeyError, lambda: self._refs['HEAD'])
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/heads/master'])
+        self.assertEqual(b'ref: refs/heads/master',
+                         self._refs.read_loose_ref(b'HEAD'))
+        del self._refs[b'HEAD']
+        self.assertRaises(KeyError, lambda: self._refs[b'HEAD'])
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/master'])
         self.assertFalse(os.path.exists(os.path.join(self._refs.path, 'HEAD')))
         self.assertFalse(os.path.exists(os.path.join(self._refs.path, 'HEAD')))
 
 
     def test_remove_if_equals_symref(self):
     def test_remove_if_equals_symref(self):
         # HEAD is a symref, so shouldn't equal its dereferenced value
         # HEAD is a symref, so shouldn't equal its dereferenced value
         self.assertFalse(self._refs.remove_if_equals(
         self.assertFalse(self._refs.remove_if_equals(
-          'HEAD', '42d06bd4b77fed026b154d16493e5deab78f02ec'))
+            b'HEAD', b'42d06bd4b77fed026b154d16493e5deab78f02ec'))
         self.assertTrue(self._refs.remove_if_equals(
         self.assertTrue(self._refs.remove_if_equals(
-          'refs/heads/master', '42d06bd4b77fed026b154d16493e5deab78f02ec'))
-        self.assertRaises(KeyError, lambda: self._refs['refs/heads/master'])
+            b'refs/heads/master', b'42d06bd4b77fed026b154d16493e5deab78f02ec'))
+        self.assertRaises(KeyError, lambda: self._refs[b'refs/heads/master'])
 
 
         # HEAD is now a broken symref
         # HEAD is now a broken symref
-        self.assertRaises(KeyError, lambda: self._refs['HEAD'])
-        self.assertEqual('ref: refs/heads/master',
-                          self._refs.read_loose_ref('HEAD'))
+        self.assertRaises(KeyError, lambda: self._refs[b'HEAD'])
+        self.assertEqual(b'ref: refs/heads/master',
+                         self._refs.read_loose_ref(b'HEAD'))
 
 
         self.assertFalse(os.path.exists(
         self.assertFalse(os.path.exists(
             os.path.join(self._refs.path, 'refs', 'heads', 'master.lock')))
             os.path.join(self._refs.path, 'refs', 'heads', 'master.lock')))
@@ -395,48 +416,47 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         refs_data = f.read()
         refs_data = f.read()
         f.close()
         f.close()
         f = GitFile(refs_file, 'wb')
         f = GitFile(refs_file, 'wb')
-        f.write('\n'.join(l for l in refs_data.split('\n')
-                          if not l or l[0] not in '#^'))
+        f.write(b'\n'.join(l for l in refs_data.split(b'\n')
+                           if not l or l[0] not in b'#^'))
         f.close()
         f.close()
         self._repo = Repo(self._repo.path)
         self._repo = Repo(self._repo.path)
         refs = self._repo.refs
         refs = self._repo.refs
         self.assertTrue(refs.remove_if_equals(
         self.assertTrue(refs.remove_if_equals(
-          'refs/heads/packed', '42d06bd4b77fed026b154d16493e5deab78f02ec'))
+            b'refs/heads/packed', b'42d06bd4b77fed026b154d16493e5deab78f02ec'))
 
 
     def test_remove_if_equals_packed(self):
     def test_remove_if_equals_packed(self):
         # test removing ref that is only packed
         # test removing ref that is only packed
-        self.assertEqual('df6800012397fb85c56e7418dd4eb9405dee075c',
-                         self._refs['refs/tags/refs-0.1'])
+        self.assertEqual(b'df6800012397fb85c56e7418dd4eb9405dee075c',
+                         self._refs[b'refs/tags/refs-0.1'])
         self.assertTrue(
         self.assertTrue(
-          self._refs.remove_if_equals('refs/tags/refs-0.1',
-          'df6800012397fb85c56e7418dd4eb9405dee075c'))
-        self.assertRaises(KeyError, lambda: self._refs['refs/tags/refs-0.1'])
+            self._refs.remove_if_equals(
+                b'refs/tags/refs-0.1',
+                b'df6800012397fb85c56e7418dd4eb9405dee075c'))
+        self.assertRaises(KeyError, lambda: self._refs[b'refs/tags/refs-0.1'])
 
 
     def test_read_ref(self):
     def test_read_ref(self):
-        self.assertEqual('ref: refs/heads/master', self._refs.read_ref("HEAD"))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-            self._refs.read_ref("refs/heads/packed"))
-        self.assertEqual(None,
-            self._refs.read_ref("nonexistant"))
+        self.assertEqual(b'ref: refs/heads/master', self._refs.read_ref(b'HEAD'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs.read_ref(b'refs/heads/packed'))
+        self.assertEqual(None, self._refs.read_ref(b'nonexistant'))
 
 
 
 
 _TEST_REFS_SERIALIZED = (
 _TEST_REFS_SERIALIZED = (
-'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa\n'
-'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/master\n'
-'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/packed\n'
-'df6800012397fb85c56e7418dd4eb9405dee075c\trefs/tags/refs-0.1\n'
-'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8\trefs/tags/refs-0.2\n')
+    b'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa\n'
+    b'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/master\n'
+    b'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/packed\n'
+    b'df6800012397fb85c56e7418dd4eb9405dee075c\trefs/tags/refs-0.1\n'
+    b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8\trefs/tags/refs-0.2\n')
 
 
 
 
-@skipIfPY3
 class InfoRefsContainerTests(TestCase):
 class InfoRefsContainerTests(TestCase):
 
 
     def test_invalid_refname(self):
     def test_invalid_refname(self):
-        text = _TEST_REFS_SERIALIZED + '00' * 20 + '\trefs/stash\n'
+        text = _TEST_REFS_SERIALIZED + b'00' * 20 + b'\trefs/stash\n'
         refs = InfoRefsContainer(BytesIO(text))
         refs = InfoRefsContainer(BytesIO(text))
         expected_refs = dict(_TEST_REFS)
         expected_refs = dict(_TEST_REFS)
-        del expected_refs['HEAD']
-        expected_refs["refs/stash"] = "00" * 20
+        del expected_refs[b'HEAD']
+        expected_refs[b'refs/stash'] = b'00' * 20
         self.assertEqual(expected_refs, refs.as_dict())
         self.assertEqual(expected_refs, refs.as_dict())
 
 
     def test_keys(self):
     def test_keys(self):
@@ -444,34 +464,34 @@ class InfoRefsContainerTests(TestCase):
         actual_keys = set(refs.keys())
         actual_keys = set(refs.keys())
         self.assertEqual(set(refs.allkeys()), actual_keys)
         self.assertEqual(set(refs.allkeys()), actual_keys)
         # ignore the symref loop if it exists
         # ignore the symref loop if it exists
-        actual_keys.discard('refs/heads/loop')
+        actual_keys.discard(b'refs/heads/loop')
         expected_refs = dict(_TEST_REFS)
         expected_refs = dict(_TEST_REFS)
-        del expected_refs['HEAD']
-        self.assertEqual(set(expected_refs.iterkeys()), actual_keys)
+        del expected_refs[b'HEAD']
+        self.assertEqual(set(expected_refs.keys()), actual_keys)
 
 
-        actual_keys = refs.keys('refs/heads')
-        actual_keys.discard('loop')
+        actual_keys = refs.keys(b'refs/heads')
+        actual_keys.discard(b'loop')
         self.assertEqual(
         self.assertEqual(
-            ['40-char-ref-aaaaaaaaaaaaaaaaaa', 'master', 'packed'],
+            [b'40-char-ref-aaaaaaaaaaaaaaaaaa', b'master', b'packed'],
             sorted(actual_keys))
             sorted(actual_keys))
-        self.assertEqual(['refs-0.1', 'refs-0.2'],
-                         sorted(refs.keys('refs/tags')))
+        self.assertEqual([b'refs-0.1', b'refs-0.2'],
+                         sorted(refs.keys(b'refs/tags')))
 
 
     def test_as_dict(self):
     def test_as_dict(self):
         refs = InfoRefsContainer(BytesIO(_TEST_REFS_SERIALIZED))
         refs = InfoRefsContainer(BytesIO(_TEST_REFS_SERIALIZED))
         # refs/heads/loop does not show up even if it exists
         # refs/heads/loop does not show up even if it exists
         expected_refs = dict(_TEST_REFS)
         expected_refs = dict(_TEST_REFS)
-        del expected_refs['HEAD']
+        del expected_refs[b'HEAD']
         self.assertEqual(expected_refs, refs.as_dict())
         self.assertEqual(expected_refs, refs.as_dict())
 
 
     def test_contains(self):
     def test_contains(self):
         refs = InfoRefsContainer(BytesIO(_TEST_REFS_SERIALIZED))
         refs = InfoRefsContainer(BytesIO(_TEST_REFS_SERIALIZED))
-        self.assertTrue('refs/heads/master' in refs)
-        self.assertFalse('refs/heads/bar' in refs)
+        self.assertTrue(b'refs/heads/master' in refs)
+        self.assertFalse(b'refs/heads/bar' in refs)
 
 
     def test_get_peeled(self):
     def test_get_peeled(self):
         refs = InfoRefsContainer(BytesIO(_TEST_REFS_SERIALIZED))
         refs = InfoRefsContainer(BytesIO(_TEST_REFS_SERIALIZED))
         # refs/heads/loop does not show up even if it exists
         # refs/heads/loop does not show up even if it exists
         self.assertEqual(
         self.assertEqual(
-            _TEST_REFS['refs/heads/master'],
-            refs.get_peeled('refs/heads/master'))
+            _TEST_REFS[b'refs/heads/master'],
+            refs.get_peeled(b'refs/heads/master'))

+ 2 - 4
dulwich/tests/test_server.py

@@ -59,6 +59,7 @@ from dulwich.tests import TestCase
 from dulwich.tests.utils import (
 from dulwich.tests.utils import (
     make_commit,
     make_commit,
     make_object,
     make_object,
+    make_tag,
     skipIfPY3,
     skipIfPY3,
     )
     )
 from dulwich.protocol import (
 from dulwich.protocol import (
@@ -280,10 +281,7 @@ class FindShallowTests(TestCase):
 
 
     def test_tag(self):
     def test_tag(self):
         c1, c2 = self.make_linear_commits(2)
         c1, c2 = self.make_linear_commits(2)
-        tag = make_object(Tag, name='tag', message='',
-                          tagger='Tagger <test@example.com>',
-                          tag_time=12345, tag_timezone=0,
-                          object=(Commit, c2.id))
+        tag = make_tag(c2, name='tag')
         self._store.add_object(tag)
         self._store.add_object(tag)
 
 
         self.assertEqual((set([c1.id]), set([c2.id])),
         self.assertEqual((set([c1.id]), set([c2.id])),

+ 89 - 89
dulwich/tests/test_walk.py

@@ -162,111 +162,111 @@ class WalkerTest(TestCase):
         self.assertWalkYields([c3], [c3.id], max_entries=1, reverse=True)
         self.assertWalkYields([c3], [c3.id], max_entries=1, reverse=True)
 
 
     def test_changes_one_parent(self):
     def test_changes_one_parent(self):
-        blob_a1 = make_object(Blob, data='a1')
-        blob_a2 = make_object(Blob, data='a2')
-        blob_b2 = make_object(Blob, data='b2')
+        blob_a1 = make_object(Blob, data=b'a1')
+        blob_a2 = make_object(Blob, data=b'a2')
+        blob_b2 = make_object(Blob, data=b'b2')
         c1, c2 = self.make_linear_commits(
         c1, c2 = self.make_linear_commits(
-          2, trees={1: [('a', blob_a1)],
-                    2: [('a', blob_a2), ('b', blob_b2)]})
-        e1 = TestWalkEntry(c1, [TreeChange.add(('a', F, blob_a1.id))])
-        e2 = TestWalkEntry(c2, [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
-                                           ('a', F, blob_a2.id)),
-                                TreeChange.add(('b', F, blob_b2.id))])
+            2, trees={1: [(b'a', blob_a1)],
+                      2: [(b'a', blob_a2), (b'b', blob_b2)]})
+        e1 = TestWalkEntry(c1, [TreeChange.add((b'a', F, blob_a1.id))])
+        e2 = TestWalkEntry(c2, [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
+                                           (b'a', F, blob_a2.id)),
+                                TreeChange.add((b'b', F, blob_b2.id))])
         self.assertWalkYields([e2, e1], [c2.id])
         self.assertWalkYields([e2, e1], [c2.id])
 
 
     def test_changes_multiple_parents(self):
     def test_changes_multiple_parents(self):
-        blob_a1 = make_object(Blob, data='a1')
-        blob_b2 = make_object(Blob, data='b2')
-        blob_a3 = make_object(Blob, data='a3')
+        blob_a1 = make_object(Blob, data=b'a1')
+        blob_b2 = make_object(Blob, data=b'b2')
+        blob_a3 = make_object(Blob, data=b'a3')
         c1, c2, c3 = self.make_commits(
         c1, c2, c3 = self.make_commits(
-          [[1], [2], [3, 1, 2]],
-          trees={1: [('a', blob_a1)], 2: [('b', blob_b2)],
-                 3: [('a', blob_a3), ('b', blob_b2)]})
+            [[1], [2], [3, 1, 2]],
+            trees={1: [(b'a', blob_a1)], 2: [(b'b', blob_b2)],
+                   3: [(b'a', blob_a3), (b'b', blob_b2)]})
         # a is a modify/add conflict and b is not conflicted.
         # a is a modify/add conflict and b is not conflicted.
         changes = [[
         changes = [[
-          TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id), ('a', F, blob_a3.id)),
-          TreeChange.add(('a', F, blob_a3.id)),
-          ]]
+            TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a3.id)),
+            TreeChange.add((b'a', F, blob_a3.id)),
+        ]]
         self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
         self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
                               exclude=[c1.id, c2.id])
                               exclude=[c1.id, c2.id])
 
 
     def test_path_matches(self):
     def test_path_matches(self):
-        walker = Walker(None, [], paths=['foo', 'bar', 'baz/quux'])
-        self.assertTrue(walker._path_matches('foo'))
-        self.assertTrue(walker._path_matches('foo/a'))
-        self.assertTrue(walker._path_matches('foo/a/b'))
-        self.assertTrue(walker._path_matches('bar'))
-        self.assertTrue(walker._path_matches('baz/quux'))
-        self.assertTrue(walker._path_matches('baz/quux/a'))
+        walker = Walker(None, [], paths=[b'foo', b'bar', b'baz/quux'])
+        self.assertTrue(walker._path_matches(b'foo'))
+        self.assertTrue(walker._path_matches(b'foo/a'))
+        self.assertTrue(walker._path_matches(b'foo/a/b'))
+        self.assertTrue(walker._path_matches(b'bar'))
+        self.assertTrue(walker._path_matches(b'baz/quux'))
+        self.assertTrue(walker._path_matches(b'baz/quux/a'))
 
 
         self.assertFalse(walker._path_matches(None))
         self.assertFalse(walker._path_matches(None))
-        self.assertFalse(walker._path_matches('oops'))
-        self.assertFalse(walker._path_matches('fool'))
-        self.assertFalse(walker._path_matches('baz'))
-        self.assertFalse(walker._path_matches('baz/quu'))
+        self.assertFalse(walker._path_matches(b'oops'))
+        self.assertFalse(walker._path_matches(b'fool'))
+        self.assertFalse(walker._path_matches(b'baz'))
+        self.assertFalse(walker._path_matches(b'baz/quu'))
 
 
     def test_paths(self):
     def test_paths(self):
-        blob_a1 = make_object(Blob, data='a1')
-        blob_b2 = make_object(Blob, data='b2')
-        blob_a3 = make_object(Blob, data='a3')
-        blob_b3 = make_object(Blob, data='b3')
+        blob_a1 = make_object(Blob, data=b'a1')
+        blob_b2 = make_object(Blob, data=b'b2')
+        blob_a3 = make_object(Blob, data=b'a3')
+        blob_b3 = make_object(Blob, data=b'b3')
         c1, c2, c3 = self.make_linear_commits(
         c1, c2, c3 = self.make_linear_commits(
-          3, trees={1: [('a', blob_a1)],
-                    2: [('a', blob_a1), ('x/b', blob_b2)],
-                    3: [('a', blob_a3), ('x/b', blob_b3)]})
+            3, trees={1: [(b'a', blob_a1)],
+                      2: [(b'a', blob_a1), (b'x/b', blob_b2)],
+                      3: [(b'a', blob_a3), (b'x/b', blob_b3)]})
 
 
         self.assertWalkYields([c3, c2, c1], [c3.id])
         self.assertWalkYields([c3, c2, c1], [c3.id])
-        self.assertWalkYields([c3, c1], [c3.id], paths=['a'])
-        self.assertWalkYields([c3, c2], [c3.id], paths=['x/b'])
+        self.assertWalkYields([c3, c1], [c3.id], paths=[b'a'])
+        self.assertWalkYields([c3, c2], [c3.id], paths=[b'x/b'])
 
 
         # All changes are included, not just for requested paths.
         # All changes are included, not just for requested paths.
         changes = [
         changes = [
-          TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
-                     ('a', F, blob_a3.id)),
-          TreeChange(CHANGE_MODIFY, ('x/b', F, blob_b2.id),
-                     ('x/b', F, blob_b3.id)),
-          ]
+            TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
+                       (b'a', F, blob_a3.id)),
+            TreeChange(CHANGE_MODIFY, (b'x/b', F, blob_b2.id),
+                       (b'x/b', F, blob_b3.id)),
+        ]
         self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
         self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
-                              max_entries=1, paths=['a'])
+                              max_entries=1, paths=[b'a'])
 
 
     def test_paths_subtree(self):
     def test_paths_subtree(self):
-        blob_a = make_object(Blob, data='a')
-        blob_b = make_object(Blob, data='b')
+        blob_a = make_object(Blob, data=b'a')
+        blob_b = make_object(Blob, data=b'b')
         c1, c2, c3 = self.make_linear_commits(
         c1, c2, c3 = self.make_linear_commits(
-          3, trees={1: [('x/a', blob_a)],
-                    2: [('b', blob_b), ('x/a', blob_a)],
-                    3: [('b', blob_b), ('x/a', blob_a), ('x/b', blob_b)]})
-        self.assertWalkYields([c2], [c3.id], paths=['b'])
-        self.assertWalkYields([c3, c1], [c3.id], paths=['x'])
+            3, trees={1: [(b'x/a', blob_a)],
+                      2: [(b'b', blob_b), (b'x/a', blob_a)],
+                      3: [(b'b', blob_b), (b'x/a', blob_a), (b'x/b', blob_b)]})
+        self.assertWalkYields([c2], [c3.id], paths=[b'b'])
+        self.assertWalkYields([c3, c1], [c3.id], paths=[b'x'])
 
 
     def test_paths_max_entries(self):
     def test_paths_max_entries(self):
-        blob_a = make_object(Blob, data='a')
-        blob_b = make_object(Blob, data='b')
+        blob_a = make_object(Blob, data=b'a')
+        blob_b = make_object(Blob, data=b'b')
         c1, c2 = self.make_linear_commits(
         c1, c2 = self.make_linear_commits(
-          2, trees={1: [('a', blob_a)],
-                    2: [('a', blob_a), ('b', blob_b)]})
-        self.assertWalkYields([c2], [c2.id], paths=['b'], max_entries=1)
-        self.assertWalkYields([c1], [c1.id], paths=['a'], max_entries=1)
+            2, trees={1: [(b'a', blob_a)],
+                      2: [(b'a', blob_a), (b'b', blob_b)]})
+        self.assertWalkYields([c2], [c2.id], paths=[b'b'], max_entries=1)
+        self.assertWalkYields([c1], [c1.id], paths=[b'a'], max_entries=1)
 
 
     def test_paths_merge(self):
     def test_paths_merge(self):
-        blob_a1 = make_object(Blob, data='a1')
-        blob_a2 = make_object(Blob, data='a2')
-        blob_a3 = make_object(Blob, data='a3')
+        blob_a1 = make_object(Blob, data=b'a1')
+        blob_a2 = make_object(Blob, data=b'a2')
+        blob_a3 = make_object(Blob, data=b'a3')
         x1, y2, m3, m4 = self.make_commits(
         x1, y2, m3, m4 = self.make_commits(
-          [[1], [2], [3, 1, 2], [4, 1, 2]],
-          trees={1: [('a', blob_a1)],
-                 2: [('a', blob_a2)],
-                 3: [('a', blob_a3)],
-                 4: [('a', blob_a1)]})  # Non-conflicting
-        self.assertWalkYields([m3, y2, x1], [m3.id], paths=['a'])
-        self.assertWalkYields([y2, x1], [m4.id], paths=['a'])
+            [[1], [2], [3, 1, 2], [4, 1, 2]],
+            trees={1: [(b'a', blob_a1)],
+                   2: [(b'a', blob_a2)],
+                   3: [(b'a', blob_a3)],
+                   4: [(b'a', blob_a1)]})  # Non-conflicting
+        self.assertWalkYields([m3, y2, x1], [m3.id], paths=[b'a'])
+        self.assertWalkYields([y2, x1], [m4.id], paths=[b'a'])
 
 
     def test_changes_with_renames(self):
     def test_changes_with_renames(self):
-        blob = make_object(Blob, data='blob')
+        blob = make_object(Blob, data=b'blob')
         c1, c2 = self.make_linear_commits(
         c1, c2 = self.make_linear_commits(
-          2, trees={1: [('a', blob)], 2: [('b', blob)]})
-        entry_a = ('a', F, blob.id)
-        entry_b = ('b', F, blob.id)
+            2, trees={1: [(b'a', blob)], 2: [(b'b', blob)]})
+        entry_a = (b'a', F, blob.id)
+        entry_b = (b'b', F, blob.id)
         changes_without_renames = [TreeChange.delete(entry_a),
         changes_without_renames = [TreeChange.delete(entry_a),
                                    TreeChange.add(entry_b)]
                                    TreeChange.add(entry_b)]
         changes_with_renames = [TreeChange(CHANGE_RENAME, entry_a, entry_b)]
         changes_with_renames = [TreeChange(CHANGE_RENAME, entry_a, entry_b)]
@@ -278,37 +278,37 @@ class WalkerTest(TestCase):
           rename_detector=detector)
           rename_detector=detector)
 
 
     def test_follow_rename(self):
     def test_follow_rename(self):
-        blob = make_object(Blob, data='blob')
-        names = ['a', 'a', 'b', 'b', 'c', 'c']
+        blob = make_object(Blob, data=b'blob')
+        names = [b'a', b'a', b'b', b'b', b'c', b'c']
 
 
         trees = dict((i + 1, [(n, blob, F)]) for i, n in enumerate(names))
         trees = dict((i + 1, [(n, blob, F)]) for i, n in enumerate(names))
         c1, c2, c3, c4, c5, c6 = self.make_linear_commits(6, trees=trees)
         c1, c2, c3, c4, c5, c6 = self.make_linear_commits(6, trees=trees)
-        self.assertWalkYields([c5], [c6.id], paths=['c'])
+        self.assertWalkYields([c5], [c6.id], paths=[b'c'])
 
 
         e = lambda n: (n, F, blob.id)
         e = lambda n: (n, F, blob.id)
         self.assertWalkYields(
         self.assertWalkYields(
-          [TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e('b'), e('c'))]),
-           TestWalkEntry(c3, [TreeChange(CHANGE_RENAME, e('a'), e('b'))]),
-           TestWalkEntry(c1, [TreeChange.add(e('a'))])],
-          [c6.id], paths=['c'], follow=True)
+            [TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e(b'b'), e(b'c'))]),
+             TestWalkEntry(c3, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'b'))]),
+             TestWalkEntry(c1, [TreeChange.add(e(b'a'))])],
+            [c6.id], paths=[b'c'], follow=True)
 
 
     def test_follow_rename_remove_path(self):
     def test_follow_rename_remove_path(self):
-        blob = make_object(Blob, data='blob')
+        blob = make_object(Blob, data=b'blob')
         _, _, _, c4, c5, c6 = self.make_linear_commits(
         _, _, _, c4, c5, c6 = self.make_linear_commits(
-          6, trees={1: [('a', blob), ('c', blob)],
-                    2: [],
-                    3: [],
-                    4: [('b', blob)],
-                    5: [('a', blob)],
-                    6: [('c', blob)]})
+            6, trees={1: [(b'a', blob), (b'c', blob)],
+                      2: [],
+                      3: [],
+                      4: [(b'b', blob)],
+                      5: [(b'a', blob)],
+                      6: [(b'c', blob)]})
 
 
         e = lambda n: (n, F, blob.id)
         e = lambda n: (n, F, blob.id)
         # Once the path changes to b, we aren't interested in a or c anymore.
         # Once the path changes to b, we aren't interested in a or c anymore.
         self.assertWalkYields(
         self.assertWalkYields(
-          [TestWalkEntry(c6, [TreeChange(CHANGE_RENAME, e('a'), e('c'))]),
-           TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e('b'), e('a'))]),
-           TestWalkEntry(c4, [TreeChange.add(e('b'))])],
-          [c6.id], paths=['c'], follow=True)
+            [TestWalkEntry(c6, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'c'))]),
+             TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e(b'b'), e(b'a'))]),
+             TestWalkEntry(c4, [TreeChange.add(e(b'b'))])],
+            [c6.id], paths=[b'c'], follow=True)
 
 
     def test_since(self):
     def test_since(self):
         c1, c2, c3 = self.make_linear_commits(3)
         c1, c2, c3 = self.make_linear_commits(3)

+ 2 - 6
dulwich/tests/test_web.py

@@ -61,6 +61,7 @@ from dulwich.web import (
 
 
 from dulwich.tests.utils import (
 from dulwich.tests.utils import (
     make_object,
     make_object,
+    make_tag,
     skipIfPY3,
     skipIfPY3,
     )
     )
 
 
@@ -229,12 +230,7 @@ class DumbHandlersTestCase(WebTestCase):
         blob2 = make_object(Blob, data='2')
         blob2 = make_object(Blob, data='2')
         blob3 = make_object(Blob, data='3')
         blob3 = make_object(Blob, data='3')
 
 
-        tag1 = make_object(Tag, name='tag-tag',
-                           tagger='Test <test@example.com>',
-                           tag_time=12345,
-                           tag_timezone=0,
-                           message='message',
-                           object=(Blob, blob2.id))
+        tag1 = make_tag(blob2, name='tag-tag')
 
 
         objects = [blob1, blob2, blob3, tag1]
         objects = [blob1, blob2, blob3, tag1]
         refs = {
         refs = {

+ 28 - 4
dulwich/tests/utils.py

@@ -36,6 +36,8 @@ from dulwich.index import (
 from dulwich.objects import (
 from dulwich.objects import (
     FixedSha,
     FixedSha,
     Commit,
     Commit,
+    Tag,
+    object_class,
     )
     )
 from dulwich.pack import (
 from dulwich.pack import (
     OFS_DELTA,
     OFS_DELTA,
@@ -100,6 +102,7 @@ def make_object(cls, **attrs):
         __dict__ instead of __slots__.
         __dict__ instead of __slots__.
         """
         """
         pass
         pass
+    TestObject.__name__ = 'TestObject_' + cls.__name__
 
 
     obj = TestObject()
     obj = TestObject()
     for name, value in attrs.items():
     for name, value in attrs.items():
@@ -119,19 +122,40 @@ def make_commit(**attrs):
     :return: A newly initialized Commit object.
     :return: A newly initialized Commit object.
     """
     """
     default_time = int(time.mktime(datetime.datetime(2010, 1, 1).timetuple()))
     default_time = int(time.mktime(datetime.datetime(2010, 1, 1).timetuple()))
-    all_attrs = {'author': 'Test Author <test@nodomain.com>',
+    all_attrs = {'author': b'Test Author <test@nodomain.com>',
                  'author_time': default_time,
                  'author_time': default_time,
                  'author_timezone': 0,
                  'author_timezone': 0,
-                 'committer': 'Test Committer <test@nodomain.com>',
+                 'committer': b'Test Committer <test@nodomain.com>',
                  'commit_time': default_time,
                  'commit_time': default_time,
                  'commit_timezone': 0,
                  'commit_timezone': 0,
-                 'message': 'Test message.',
+                 'message': b'Test message.',
                  'parents': [],
                  'parents': [],
-                 'tree': '0' * 40}
+                 'tree': b'0' * 40}
     all_attrs.update(attrs)
     all_attrs.update(attrs)
     return make_object(Commit, **all_attrs)
     return make_object(Commit, **all_attrs)
 
 
 
 
+def make_tag(target, **attrs):
+    """Make a Tag object with a default set of values.
+
+    :param target: object to be tagged (Commit, Blob, Tree, etc)
+    :param attrs: dict of attributes to overwrite from the default values.
+    :return: A newly initialized Tag object.
+    """
+    target_id = target.id
+    target_type = object_class(target.type_name)
+    default_time = int(time.mktime(datetime.datetime(2010, 1, 1).timetuple()))
+    all_attrs = {'tagger': 'Test Author <test@nodomain.com>',
+                 'tag_time': default_time,
+                 'tag_timezone': 0,
+                 'message': 'Test message.',
+                 'object': (target_type, target_id),
+                 'name': 'Test Tag',
+                 }
+    all_attrs.update(attrs)
+    return make_object(Tag, **all_attrs)
+
+
 def functest_builder(method, func):
 def functest_builder(method, func):
     """Generate a test method that tests the given function."""
     """Generate a test method that tests the given function."""
 
 

+ 1 - 1
dulwich/walk.py

@@ -251,7 +251,7 @@ class Walker(object):
             if changed_path == followed_path:
             if changed_path == followed_path:
                 return True
                 return True
             if (changed_path.startswith(followed_path) and
             if (changed_path.startswith(followed_path) and
-                changed_path[len(followed_path)] == '/'):
+                    changed_path[len(followed_path)] == b'/'[0]):
                 return True
                 return True
         return False
         return False
 
 

+ 2 - 1
setup.py

@@ -8,7 +8,7 @@ except ImportError:
     from distutils.core import setup, Extension
     from distutils.core import setup, Extension
 from distutils.core import Distribution
 from distutils.core import Distribution
 
 
-dulwich_version_string = '0.9.8'
+dulwich_version_string = '0.10.0'
 
 
 include_dirs = []
 include_dirs = []
 # Windows MSVC support
 # Windows MSVC support
@@ -76,6 +76,7 @@ setup(name='dulwich',
       in the particular Monty Python sketch.
       in the particular Monty Python sketch.
       """,
       """,
       packages=['dulwich', 'dulwich.tests', 'dulwich.tests.compat', 'dulwich.contrib'],
       packages=['dulwich', 'dulwich.tests', 'dulwich.tests.compat', 'dulwich.contrib'],
+      package_data={'': ['../docs/tutorial/*.txt']},
       scripts=['bin/dulwich', 'bin/dul-receive-pack', 'bin/dul-upload-pack'],
       scripts=['bin/dulwich', 'bin/dul-receive-pack', 'bin/dul-upload-pack'],
       classifiers=[
       classifiers=[
           'Development Status :: 4 - Beta',
           'Development Status :: 4 - Beta',

Some files were not shown because too many files changed in this diff