Browse Source

Imported Upstream version 0.10.0

Jelmer Vernooij 10 years ago
parent
commit
a84207213c

+ 14 - 0
HACKING

@@ -25,3 +25,17 @@ will run the tests using unittest on Python 2.7 and higher, and using
 unittest2 (which you will need to have installed) on older versions of Python.
 
  $ make check
+
+String Types
+------------
+Like Linux, Git treats filenames as arbitrary bytestrings. There is no prescribed
+encoding for these strings, and although it is fairly common to use UTF-8, anything
+can and is used as encoding with Git.
+
+For this reason, Dulwich internally treats git-based filenames as bytestrings. It is up
+to the Dulwich API user to encode and decode them if necessary.
+
+* git-repository related filenames: bytes
+* object sha1 digests (20 bytes long): bytes
+* object sha1 hexdigests (40 bytes long): str (bytestrings on python2, strings on python3)
+

+ 47 - 0
NEWS

@@ -1,3 +1,50 @@
+0.10.0	2015-03-22
+
+ BUG FIXES
+
+  * In dulwich.index.build_index_from_tree, by default
+    refuse to create entries that start with .git/.
+
+  * Fix running of testsuite when installed.
+    (Jelmer Vernooij, #223)
+
+  * Use a block cache in _find_content_rename_candidates(),
+    improving performance. (Mike Williams)
+
+  * Add support for ``core.protectNTFS`` setting.
+    (Jelmer Vernooij)
+
+  * Fix TypeError when fetching empty updates.
+    (Hwee Miin Koh)
+
+  * Resolve delta refs when pulling into a MemoryRepo.
+    (Max Shawabkeh, #256)
+
+  * Fix handling of tags of non-commits in missing object finder.
+    (Augie Fackler, #211)
+
+  * Explicitly disable mmap on plan9 where it doesn't work.
+    (Jeff Sickel)
+
+ IMPROVEMENTS
+
+  * New public method `Repo.reset_index`. (Jelmer Vernooij)
+
+  * Prevent duplicate parsing of loose files in objects
+    directory when reading. Thanks to David Keijser for the
+    report. (Jelmer Vernooij, #231)
+
+0.9.9	2015-03-20
+
+ SECURITY BUG FIXES
+
+  * Fix buffer overflow in C implementation of pack apply_delta().
+    (CVE-2015-0838)
+
+    Thanks to Ivan Fratric of the Google Security Team for
+    reporting this issue.
+    (Jelmer Vernooij)
+
 0.9.8	2014-11-30
 
  BUG FIXES

+ 1 - 1
PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: dulwich
-Version: 0.9.8
+Version: 0.10.0
 Summary: Python Git Library
 Home-page: https://samba.org/~jelmer/dulwich
 Author: Jelmer Vernooij

+ 16 - 1
README.md

@@ -11,6 +11,21 @@ Author: Jelmer Vernooij <jelmer@samba.org>
 The project is named after the part of London that Mr. and Mrs. Git live in
 in the particular Monty Python sketch.
 
+Installation
+------------
+
+By default, Dulwich' setup.py will attempt to build and install the optional C
+extensions. The reason for this is that they significantly improve the performance
+since some low-level operations that are executed often are much slower in CPython.
+
+If you don't want to install the C bindings, specify the --pure argument to setup.py::
+
+    $ python setup.py --pure install
+
+or if you are installing from pip:
+
+    $ pip install dulwich --global-option="--pure"
+
 Further documentation
 ---------------------
 
@@ -31,6 +46,6 @@ https://launchpad.net/~dulwich-users.
 Python3
 -------
 
-The process of porting to Python3 is ongoing. Please not that although the
+The process of porting to Python3 is ongoing. Please note that although the
 test suite pass in python3, this is due to the tests of features that are not
 yet ported being skipped, and *not* an indication that the port is complete.

+ 1 - 1
dulwich.egg-info/PKG-INFO

@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: dulwich
-Version: 0.9.8
+Version: 0.10.0
 Summary: Python Git Library
 Home-page: https://samba.org/~jelmer/dulwich
 Author: Jelmer Vernooij

+ 1 - 1
dulwich/__init__.py

@@ -21,4 +21,4 @@
 
 """Python implementation of the Git file formats and protocols."""
 
-__version__ = (0, 9, 8)
+__version__ = (0, 10, 0)

+ 5 - 1
dulwich/_pack.c

@@ -146,10 +146,14 @@ static PyObject *py_apply_delta(PyObject *self, PyObject *args)
 				break;
 			memcpy(out+outindex, src_buf+cp_off, cp_size);
 			outindex += cp_size;
+			dest_size -= cp_size;
 		} else if (cmd != 0) {
+			if (cmd > dest_size)
+				break;
 			memcpy(out+outindex, delta+index, cmd);
 			outindex += cmd;
 			index += cmd;
+			dest_size -= cmd;
 		} else {
 			PyErr_SetString(PyExc_ValueError, "Invalid opcode 0");
 			Py_DECREF(ret);
@@ -167,7 +171,7 @@ static PyObject *py_apply_delta(PyObject *self, PyObject *args)
 		return NULL;
 	}
 
-	if (dest_size != outindex) {
+	if (dest_size != 0) {
 		PyErr_SetString(PyExc_ValueError, "dest size incorrect");
 		Py_DECREF(ret);
 		return NULL;

+ 25 - 2
dulwich/client.py

@@ -616,7 +616,8 @@ class SubprocessWrapper(object):
             from msvcrt import get_osfhandle
             from win32pipe import PeekNamedPipe
             handle = get_osfhandle(self.proc.stdout.fileno())
-            return PeekNamedPipe(handle, 0)[2] != 0
+            data, total_bytes_avail, msg_bytes_left = PeekNamedPipe(handle, 0)
+            return total_bytes_avail != 0
         else:
             return _fileno_can_read(self.proc.stdout.fileno())
 
@@ -679,7 +680,29 @@ class LocalGitClient(GitClient):
         :raises UpdateRefsError: if the server supports report-status
                                  and rejects ref updates
         """
-        raise NotImplementedError(self.send_pack)
+        from dulwich.repo import Repo
+
+        target = Repo(path)
+        old_refs = target.get_refs()
+        new_refs = determine_wants(old_refs)
+
+        have = [sha1 for sha1 in old_refs.values() if sha1 != ZERO_SHA]
+        want = []
+        for refname in set(new_refs.keys() + old_refs.keys()):
+            old_sha1 = old_refs.get(refname, ZERO_SHA)
+            new_sha1 = new_refs.get(refname, ZERO_SHA)
+            if new_sha1 not in have and new_sha1 != ZERO_SHA:
+                want.append(new_sha1)
+
+        if not want and old_refs == new_refs:
+            return new_refs
+
+        target.object_store.add_objects(generate_pack_contents(have, want))
+
+        for name, sha in new_refs.iteritems():
+            target.refs[name] = sha
+
+        return new_refs
 
     def fetch(self, path, target, determine_wants=None, progress=None):
         """Fetch into a target repository.

+ 19 - 12
dulwich/diff_tree.py

@@ -17,7 +17,7 @@
 # MA  02110-1301, USA.
 
 """Utilities for diffing files and trees."""
-
+import sys
 from collections import (
     defaultdict,
     namedtuple,
@@ -27,16 +27,20 @@ from io import BytesIO
 from itertools import chain
 import stat
 
-try:
-    from itertools import izip
-except ImportError:
-    # Python3
+if sys.version_info[0] == 3:
+    xrange = range
     izip = zip
+    iteritems = lambda d: d.items()
+else:
+    from itertools import izip
+    iteritems = lambda d: d.iteritems()
+
 from dulwich.objects import (
     S_ISGITLINK,
     TreeEntry,
     )
 
+
 # TreeChange type constants.
 CHANGE_ADD = 'add'
 CHANGE_MODIFY = 'modify'
@@ -140,7 +144,7 @@ def walk_trees(store, tree1_id, tree2_id, prune_identical=False):
     # case.
     mode1 = tree1_id and stat.S_IFDIR or None
     mode2 = tree2_id and stat.S_IFDIR or None
-    todo = [(TreeEntry('', mode1, tree1_id), TreeEntry('', mode2, tree2_id))]
+    todo = [(TreeEntry(b'', mode1, tree1_id), TreeEntry(b'', mode2, tree2_id))]
     while todo:
         entry1, entry2 = todo.pop()
         is_tree1 = _is_tree(entry1)
@@ -261,7 +265,7 @@ def tree_changes_for_merge(store, parent_tree_ids, tree_id,
     change_type = lambda c: c.type
 
     # Yield only conflicting changes.
-    for _, changes in sorted(changes_by_path.iteritems()):
+    for _, changes in sorted(iteritems(changes_by_path)):
         assert len(changes) == num_parents
         have = [c for c in changes if c is not None]
         if _all_eq(have, change_type, CHANGE_DELETE):
@@ -298,9 +302,11 @@ def _count_blocks(obj):
     block_getvalue = block.getvalue
 
     for c in chain(*obj.as_raw_chunks()):
+        if sys.version_info[0] == 3:
+            c = c.to_bytes(1, 'big')
         block_write(c)
         n += 1
-        if c == '\n' or n == _BLOCK_SIZE:
+        if c == b'\n' or n == _BLOCK_SIZE:
             value = block_getvalue()
             block_counts[hash(value)] += len(value)
             block_seek(0)
@@ -324,7 +330,7 @@ def _common_bytes(blocks1, blocks2):
     if len(blocks1) > len(blocks2):
         blocks1, blocks2 = blocks2, blocks1
     score = 0
-    for block, count1 in blocks1.iteritems():
+    for block, count1 in iteritems(blocks1):
         count2 = blocks2.get(block)
         if count2:
             score += min(count1, count2)
@@ -452,7 +458,7 @@ class RenameDetector(object):
 
         add_paths = set()
         delete_paths = set()
-        for sha, sha_deletes in delete_map.iteritems():
+        for sha, sha_deletes in iteritems(delete_map):
             sha_adds = add_map[sha]
             for (old, is_delete), new in izip(sha_deletes, sha_adds):
                 if stat.S_IFMT(old.mode) != stat.S_IFMT(new.mode):
@@ -498,19 +504,20 @@ class RenameDetector(object):
         if not self._should_find_content_renames():
             return
 
+        block_cache = {}
         check_paths = self._rename_threshold is not None
         for delete in self._deletes:
             if S_ISGITLINK(delete.old.mode):
                 continue  # Git links don't exist in this repo.
             old_sha = delete.old.sha
             old_obj = self._store[old_sha]
-            old_blocks = _count_blocks(old_obj)
+            block_cache[old_sha] = _count_blocks(old_obj)
             for add in self._adds:
                 if stat.S_IFMT(delete.old.mode) != stat.S_IFMT(add.new.mode):
                     continue
                 new_obj = self._store[add.new.sha]
                 score = _similarity_score(old_obj, new_obj,
-                                          block_cache={old_sha: old_blocks})
+                                          block_cache=block_cache)
                 if score > self._rename_threshold:
                     new_type = self._rename_type(check_paths, delete, add)
                     rename = TreeChange(new_type, delete.old, add.new)

+ 32 - 1
dulwich/index.py

@@ -431,8 +431,35 @@ def build_file_from_blob(blob, mode, target_path, honor_filemode=True):
             os.chmod(target_path, mode)
 
 
+INVALID_DOTNAMES = (".git", ".", "..", "")
+
+
+def validate_path_element_default(element):
+    return element.lower() not in INVALID_DOTNAMES
+
+
+def validate_path_element_ntfs(element):
+    stripped = element.rstrip(". ").lower()
+    if stripped in INVALID_DOTNAMES:
+        return False
+    if stripped == "git~1":
+        return False
+    return True
+
+
+def validate_path(path, element_validator=validate_path_element_default):
+    """Default path validator that just checks for .git/."""
+    parts = path.split("/")
+    for p in parts:
+        if not element_validator(p):
+            return False
+    else:
+        return True
+
+
 def build_index_from_tree(prefix, index_path, object_store, tree_id,
-                          honor_filemode=True):
+                          honor_filemode=True,
+                          validate_path_element=validate_path_element_default):
     """Generate and materialize index from a tree
 
     :param tree_id: Tree to materialize
@@ -441,6 +468,8 @@ def build_index_from_tree(prefix, index_path, object_store, tree_id,
     :param object_store: Non-empty object store holding tree contents
     :param honor_filemode: An optional flag to honor core.filemode setting in
         config file, default is core.filemode=True, change executable bit
+    :param validate_path_element: Function to validate path elements to check out;
+        default just refuses .git and .. directories.
 
     :note:: existing index is wiped and contents are not merged
         in a working dir. Suiteable only for fresh clones.
@@ -449,6 +478,8 @@ def build_index_from_tree(prefix, index_path, object_store, tree_id,
     index = Index(index_path)
 
     for entry in object_store.iter_tree_contents(tree_id):
+        if not validate_path(entry.path):
+            continue
         full_path = os.path.join(prefix, entry.path)
 
         if not os.path.exists(os.path.dirname(full_path)):

+ 2 - 1
dulwich/lru_cache.py

@@ -19,6 +19,7 @@
 
 _null_key = object()
 
+
 class _LRUNode(object):
     """This maintains the linked-list which is the lru internals."""
 
@@ -181,7 +182,7 @@ class LRUCache(object):
 
     def items(self):
         """Get the key:value pairs as a dict."""
-        return dict((k, n.value) for k, n in self._cache.iteritems())
+        return dict((k, n.value) for k, n in self._cache.items())
 
     def cleanup(self):
         """Clear the cache until it shrinks to the requested size.

+ 17 - 9
dulwich/object_store.py

@@ -745,7 +745,7 @@ class MemoryObjectStore(BaseObjectStore):
         def commit():
             p = PackData.from_file(BytesIO(f.getvalue()), f.tell())
             f.close()
-            for obj in PackInflater.for_pack_data(p):
+            for obj in PackInflater.for_pack_data(p, self.get_raw):
                 self._data[obj.id] = obj
         def abort():
             pass
@@ -911,7 +911,7 @@ def _collect_filetree_revs(obj_store, tree_sha, kset):
 
 
 def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
-    """Split object id list into two list with commit SHA1s and tag SHA1s.
+    """Split object id list into three lists with commit, tag, and other SHAs.
 
     Commits referenced by tags are included into commits
     list as well. Only SHA1s known in this repository will get
@@ -922,10 +922,11 @@ def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
     :param lst: Collection of commit and tag SHAs
     :param ignore_unknown: True to skip SHA1 missing in the repository
         silently.
-    :return: A tuple of (commits, tags) SHA1s
+    :return: A tuple of (commits, tags, others) SHA1s
     """
     commits = set()
     tags = set()
+    others = set()
     for e in lst:
         try:
             o = obj_store[e]
@@ -937,10 +938,15 @@ def _split_commits_and_tags(obj_store, lst, ignore_unknown=False):
                 commits.add(e)
             elif isinstance(o, Tag):
                 tags.add(e)
-                commits.add(o.object[1])
+                tagged = o.object[1]
+                c, t, o = _split_commits_and_tags(
+                    obj_store, [tagged], ignore_unknown=ignore_unknown)
+                commits |= c
+                tags |= t
+                others |= o
             else:
-                raise KeyError('Not a commit or a tag: %s' % e)
-    return (commits, tags)
+                others.add(e)
+    return (commits, tags, others)
 
 
 class MissingObjectFinder(object):
@@ -966,9 +972,9 @@ class MissingObjectFinder(object):
         # and such SHAs would get filtered out by _split_commits_and_tags,
         # wants shall list only known SHAs, and otherwise
         # _split_commits_and_tags fails with KeyError
-        have_commits, have_tags = (
+        have_commits, have_tags, have_others = (
             _split_commits_and_tags(object_store, haves, True))
-        want_commits, want_tags = (
+        want_commits, want_tags, want_others = (
             _split_commits_and_tags(object_store, wants, False))
         # all_ancestors is a set of commits that shall not be sent
         # (complete repository up to 'haves')
@@ -993,9 +999,11 @@ class MissingObjectFinder(object):
             self.sha_done.add(t)
 
         missing_tags = want_tags.difference(have_tags)
-        # in fact, what we 'want' is commits and tags
+        missing_others = want_others.difference(have_others)
+        # in fact, what we 'want' is commits, tags, and others
         # we've found missing
         wants = missing_commits.union(missing_tags)
+        wants = wants.union(missing_others)
 
         self.objects_to_send = set([(w, None, False) for w in wants])
 

+ 133 - 146
dulwich/objects.py

@@ -25,8 +25,10 @@ from collections import namedtuple
 import os
 import posixpath
 import stat
+import sys
 import warnings
 import zlib
+from hashlib import sha1
 
 from dulwich.errors import (
     ChecksumMismatch,
@@ -37,24 +39,29 @@ from dulwich.errors import (
     ObjectFormatException,
     )
 from dulwich.file import GitFile
-from hashlib import sha1
 
-ZERO_SHA = "0" * 40
+if sys.version_info[0] == 2:
+    iteritems = lambda d: d.iteritems()
+else:
+    iteritems = lambda d: d.items()
+
+
+ZERO_SHA = b'0' * 40
 
 # Header fields for commits
-_TREE_HEADER = "tree"
-_PARENT_HEADER = "parent"
-_AUTHOR_HEADER = "author"
-_COMMITTER_HEADER = "committer"
-_ENCODING_HEADER = "encoding"
-_MERGETAG_HEADER = "mergetag"
-_GPGSIG_HEADER = "gpgsig"
+_TREE_HEADER = b'tree'
+_PARENT_HEADER = b'parent'
+_AUTHOR_HEADER = b'author'
+_COMMITTER_HEADER = b'committer'
+_ENCODING_HEADER = b'encoding'
+_MERGETAG_HEADER = b'mergetag'
+_GPGSIG_HEADER = b'gpgsig'
 
 # Header fields for objects
-_OBJECT_HEADER = "object"
-_TYPE_HEADER = "type"
-_TAG_HEADER = "tag"
-_TAGGER_HEADER = "tagger"
+_OBJECT_HEADER = b'object'
+_TYPE_HEADER = b'type'
+_TAG_HEADER = b'tag'
+_TAGGER_HEADER = b'tagger'
 
 
 S_IFGITLINK = 0o160000
@@ -89,13 +96,18 @@ def hex_to_sha(hex):
     try:
         return binascii.unhexlify(hex)
     except TypeError as exc:
-        if not isinstance(hex, str):
+        if not isinstance(hex, bytes):
             raise
         raise ValueError(exc.args[0])
 
 
 def hex_to_filename(path, hex):
     """Takes a hex sha and returns its filename relative to the given path."""
+    # os.path.join accepts bytes or unicode, but all args must be of the same
+    # type. Make sure that hex which is expected to be bytes, is the same type
+    # as path.
+    if getattr(path, 'encode', None) is not None:
+        hex = hex.decode('ascii')
     dir = hex[:2]
     file = hex[2:]
     # Check from object dir
@@ -110,14 +122,14 @@ def filename_to_hex(filename):
     assert len(names) == 2, errmsg
     base, rest = names
     assert len(base) == 2 and len(rest) == 38, errmsg
-    hex = base + rest
+    hex = (base + rest).encode('ascii')
     hex_to_sha(hex)
     return hex
 
 
 def object_header(num_type, length):
     """Return an object header for the given numeric type and text length."""
-    return "%s %d\0" % (object_class(num_type).type_name, length)
+    return object_class(num_type).type_name + b' ' + str(length).encode('ascii') + b'\0'
 
 
 def serializable_property(name, docstring=None):
@@ -164,21 +176,30 @@ def check_identity(identity, error_msg):
     :param identity: Identity string
     :param error_msg: Error message to use in exception
     """
-    email_start = identity.find("<")
-    email_end = identity.find(">")
+    email_start = identity.find(b'<')
+    email_end = identity.find(b'>')
     if (email_start < 0 or email_end < 0 or email_end <= email_start
-        or identity.find("<", email_start + 1) >= 0
-        or identity.find(">", email_end + 1) >= 0
-        or not identity.endswith(">")):
+        or identity.find(b'<', email_start + 1) >= 0
+        or identity.find(b'>', email_end + 1) >= 0
+        or not identity.endswith(b'>')):
         raise ObjectFormatException(error_msg)
 
 
+def git_line(*items):
+    """Formats items into a space sepreated line."""
+    return b' '.join(items) + b'\n'
+
+
 class FixedSha(object):
     """SHA object that behaves like hashlib's but is given a fixed value."""
 
     __slots__ = ('_hexsha', '_sha')
 
     def __init__(self, hexsha):
+        if getattr(hexsha, 'encode', None) is not None:
+            hexsha = hexsha.encode('ascii')
+        if not isinstance(hexsha, bytes):
+            raise TypeError('Expected bytes for hexsha, got %r' % hexsha)
         self._hexsha = hexsha
         self._sha = hex_to_sha(hexsha)
 
@@ -188,14 +209,14 @@ class FixedSha(object):
 
     def hexdigest(self):
         """Return the hex SHA digest."""
-        return self._hexsha
+        return self._hexsha.decode('ascii')
 
 
 class ShaFile(object):
     """A git SHA file."""
 
-    __slots__ = ('_needs_parsing', '_chunked_text', '_file', '_path',
-                 '_sha', '_needs_serialization', '_magic')
+    __slots__ = ('_needs_parsing', '_chunked_text', '_sha',
+                 '_needs_serialization')
 
     @staticmethod
     def _parse_legacy_object_header(magic, f):
@@ -209,22 +230,20 @@ class ShaFile(object):
             extra = f.read(bufsize)
             header += decomp.decompress(extra)
             magic += extra
-            end = header.find("\0", start)
+            end = header.find(b'\0', start)
             start = len(header)
         header = header[:end]
-        type_name, size = header.split(" ", 1)
+        type_name, size = header.split(b' ', 1)
         size = int(size)  # sanity check
         obj_class = object_class(type_name)
         if not obj_class:
             raise ObjectFormatException("Not a known type: %s" % type_name)
-        ret = obj_class()
-        ret._magic = magic
-        return ret
+        return obj_class()
 
     def _parse_legacy_object(self, map):
         """Parse a legacy object, setting the raw string."""
         text = _decompress(map)
-        header_end = text.find('\0')
+        header_end = text.find(b'\0')
         if header_end < 0:
             raise ObjectFormatException("Invalid object header, no \\0")
         self.set_raw_string(text[header_end+1:])
@@ -243,7 +262,7 @@ class ShaFile(object):
     def as_legacy_object(self):
         """Return string representing the object in the experimental format.
         """
-        return "".join(self.as_legacy_object_chunks())
+        return b''.join(self.as_legacy_object_chunks())
 
     def as_raw_chunks(self):
         """Return chunks with serialization of the object.
@@ -261,7 +280,7 @@ class ShaFile(object):
 
         :return: String object
         """
-        return "".join(self.as_raw_chunks())
+        return b''.join(self.as_raw_chunks())
 
     def __str__(self):
         """Return raw string serialization of this object."""
@@ -278,21 +297,14 @@ class ShaFile(object):
     def _ensure_parsed(self):
         if self._needs_parsing:
             if not self._chunked_text:
-                if self._file is not None:
-                    self._parse_file(self._file)
-                    self._file = None
-                elif self._path is not None:
-                    self._parse_path()
-                else:
-                    raise AssertionError(
-                        "ShaFile needs either text or filename")
+                raise AssertionError("ShaFile needs chunked text")
             self._deserialize(self._chunked_text)
             self._needs_parsing = False
 
     def set_raw_string(self, text, sha=None):
         """Set the contents of this object from a serialized string."""
-        if not isinstance(text, str):
-            raise TypeError(text)
+        if not isinstance(text, bytes):
+            raise TypeError('Expected bytes for text, got %r' % text)
         self.set_raw_chunks([text], sha)
 
     def set_raw_chunks(self, chunks, sha=None):
@@ -309,46 +321,45 @@ class ShaFile(object):
     @staticmethod
     def _parse_object_header(magic, f):
         """Parse a new style object, creating it but not reading the file."""
-        num_type = (ord(magic[0]) >> 4) & 7
+        num_type = (ord(magic[0:1]) >> 4) & 7
         obj_class = object_class(num_type)
         if not obj_class:
             raise ObjectFormatException("Not a known type %d" % num_type)
-        ret = obj_class()
-        ret._magic = magic
-        return ret
+        return obj_class()
 
     def _parse_object(self, map):
         """Parse a new style object, setting self._text."""
         # skip type and size; type must have already been determined, and
         # we trust zlib to fail if it's otherwise corrupted
-        byte = ord(map[0])
+        byte = ord(map[0:1])
         used = 1
         while (byte & 0x80) != 0:
-            byte = ord(map[used])
+            byte = ord(map[used:used+1])
             used += 1
         raw = map[used:]
         self.set_raw_string(_decompress(raw))
 
     @classmethod
     def _is_legacy_object(cls, magic):
-        b0, b1 = map(ord, magic)
+        b0 = ord(magic[0:1])
+        b1 = ord(magic[1:2])
         word = (b0 << 8) + b1
         return (b0 & 0x8F) == 0x08 and (word % 31) == 0
 
     @classmethod
-    def _parse_file_header(cls, f):
-        magic = f.read(2)
-        if cls._is_legacy_object(magic):
-            return cls._parse_legacy_object_header(magic, f)
+    def _parse_file(cls, f):
+        map = f.read()
+        if cls._is_legacy_object(map):
+            obj = cls._parse_legacy_object_header(map, f)
+            obj._parse_legacy_object(map)
         else:
-            return cls._parse_object_header(magic, f)
+            obj = cls._parse_object_header(map, f)
+            obj._parse_object(map)
+        return obj
 
     def __init__(self):
         """Don't call this directly"""
         self._sha = None
-        self._path = None
-        self._file = None
-        self._magic = None
         self._chunked_text = []
         self._needs_parsing = False
         self._needs_serialization = True
@@ -359,40 +370,18 @@ class ShaFile(object):
     def _serialize(self):
         raise NotImplementedError(self._serialize)
 
-    def _parse_path(self):
-        with GitFile(self._path, 'rb') as f:
-            self._parse_file(f)
-
-    def _parse_file(self, f):
-        magic = self._magic
-        if magic is None:
-            magic = f.read(2)
-        map = magic + f.read()
-        if self._is_legacy_object(magic[:2]):
-            self._parse_legacy_object(map)
-        else:
-            self._parse_object(map)
-
     @classmethod
     def from_path(cls, path):
         """Open a SHA file from disk."""
         with GitFile(path, 'rb') as f:
-            obj = cls.from_file(f)
-            obj._path = path
-            obj._sha = FixedSha(filename_to_hex(path))
-            obj._file = None
-            obj._magic = None
-            return obj
+            return cls.from_file(f)
 
     @classmethod
     def from_file(cls, f):
         """Get the contents of a SHA file on disk."""
         try:
-            obj = cls._parse_file_header(f)
+            obj = cls._parse_file(f)
             obj._sha = None
-            obj._needs_parsing = True
-            obj._needs_serialization = True
-            obj._file = f
             return obj
         except (IndexError, ValueError):
             raise ObjectFormatException("invalid object header")
@@ -499,7 +488,7 @@ class ShaFile(object):
     @property
     def id(self):
         """The hex SHA of this object."""
-        return self.sha().hexdigest()
+        return self.sha().hexdigest().encode('ascii')
 
     def get_type(self):
         """Return the type number for this object class."""
@@ -532,7 +521,7 @@ class Blob(ShaFile):
 
     __slots__ = ()
 
-    type_name = 'blob'
+    type_name = b'blob'
     type_num = 3
 
     def __init__(self):
@@ -592,19 +581,19 @@ def _parse_message(chunks):
         order read from the text, possibly including duplicates. Includes a
         field named None for the freeform tag/commit text.
     """
-    f = BytesIO("".join(chunks))
+    f = BytesIO(b''.join(chunks))
     k = None
     v = ""
     for l in f:
-        if l.startswith(" "):
+        if l.startswith(b' '):
             v += l[1:]
         else:
             if k is not None:
-                yield (k, v.rstrip("\n"))
-            if l == "\n":
+                yield (k, v.rstrip(b'\n'))
+            if l == b'\n':
                 # Empty line indicates end of headers
                 break
-            (k, v) = l.split(" ", 1)
+            (k, v) = l.split(b' ', 1)
     yield (None, f.read())
     f.close()
 
@@ -612,7 +601,7 @@ def _parse_message(chunks):
 class Tag(ShaFile):
     """A Git Tag object."""
 
-    type_name = 'tag'
+    type_name = b'tag'
     type_num = 4
 
     __slots__ = ('_tag_timezone_neg_utc', '_name', '_object_sha',
@@ -662,18 +651,17 @@ class Tag(ShaFile):
 
     def _serialize(self):
         chunks = []
-        chunks.append("%s %s\n" % (_OBJECT_HEADER, self._object_sha))
-        chunks.append("%s %s\n" % (_TYPE_HEADER, self._object_class.type_name))
-        chunks.append("%s %s\n" % (_TAG_HEADER, self._name))
+        chunks.append(git_line(_OBJECT_HEADER, self._object_sha))
+        chunks.append(git_line(_TYPE_HEADER, self._object_class.type_name))
+        chunks.append(git_line(_TAG_HEADER, self._name))
         if self._tagger:
             if self._tag_time is None:
-                chunks.append("%s %s\n" % (_TAGGER_HEADER, self._tagger))
+                chunks.append(git_line(_TAGGER_HEADER, self._tagger))
             else:
-                chunks.append("%s %s %d %s\n" % (
-                  _TAGGER_HEADER, self._tagger, self._tag_time,
-                  format_timezone(self._tag_timezone,
-                    self._tag_timezone_neg_utc)))
-        chunks.append("\n") # To close headers
+                chunks.append(git_line(
+                    _TAGGER_HEADER, self._tagger, str(self._tag_time).encode('ascii'),
+                    format_timezone(self._tag_timezone, self._tag_timezone_neg_utc)))
+        chunks.append(b'\n') # To close headers
         chunks.append(self._message)
         return chunks
 
@@ -692,7 +680,7 @@ class Tag(ShaFile):
                 self._name = value
             elif field == _TAGGER_HEADER:
                 try:
-                    sep = value.index("> ")
+                    sep = value.index(b'> ')
                 except ValueError:
                     self._tagger = value
                     self._tag_time = None
@@ -701,7 +689,7 @@ class Tag(ShaFile):
                 else:
                     self._tagger = value[0:sep+1]
                     try:
-                        (timetext, timezonetext) = value[sep+2:].rsplit(" ", 1)
+                        (timetext, timezonetext) = value[sep+2:].rsplit(b' ', 1)
                         self._tag_time = int(timetext)
                         self._tag_timezone, self._tag_timezone_neg_utc = \
                                 parse_timezone(timezonetext)
@@ -744,8 +732,8 @@ class TreeEntry(namedtuple('TreeEntry', ['path', 'mode', 'sha'])):
 
     def in_path(self, path):
         """Return a copy of this entry with the given path prepended."""
-        if not isinstance(self.path, str):
-            raise TypeError
+        if not isinstance(self.path, bytes):
+            raise TypeError('Expected bytes for path, got %r' % path)
         return TreeEntry(posixpath.join(path, self.path), self.mode, self.sha)
 
 
@@ -759,15 +747,15 @@ def parse_tree(text, strict=False):
     count = 0
     l = len(text)
     while count < l:
-        mode_end = text.index(' ', count)
+        mode_end = text.index(b' ', count)
         mode_text = text[count:mode_end]
-        if strict and mode_text.startswith('0'):
+        if strict and mode_text.startswith(b'0'):
             raise ObjectFormatException("Invalid mode '%s'" % mode_text)
         try:
             mode = int(mode_text, 8)
         except ValueError:
             raise ObjectFormatException("Invalid mode '%s'" % mode_text)
-        name_end = text.index('\0', mode_end)
+        name_end = text.index(b'\0', mode_end)
         name = text[mode_end+1:name_end]
         count = name_end+21
         sha = text[name_end+1:count]
@@ -784,7 +772,7 @@ def serialize_tree(items):
     :return: Serialized tree text as chunks
     """
     for name, mode, hexsha in items:
-        yield "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
+        yield ("%04o" % mode).encode('ascii') + b' ' + name + b'\0' + hex_to_sha(hexsha)
 
 
 def sorted_tree_items(entries, name_order):
@@ -797,14 +785,12 @@ def sorted_tree_items(entries, name_order):
     :return: Iterator over (name, mode, hexsha)
     """
     key_func = name_order and key_entry_name_order or key_entry
-    for name, entry in sorted(entries.iteritems(), key=key_func):
+    for name, entry in sorted(iteritems(entries), key=key_func):
         mode, hexsha = entry
         # Stricter type checks than normal to mirror checks in the C version.
-        if not isinstance(mode, int) and not isinstance(mode, long):
-            raise TypeError('Expected integer/long for mode, got %r' % mode)
         mode = int(mode)
-        if not isinstance(hexsha, str):
-            raise TypeError('Expected a string for SHA, got %r' % hexsha)
+        if not isinstance(hexsha, bytes):
+            raise TypeError('Expected bytes for SHA, got %r' % hexsha)
         yield TreeEntry(name, mode, hexsha)
 
 
@@ -815,7 +801,7 @@ def key_entry(entry):
     """
     (name, value) = entry
     if stat.S_ISDIR(value[0]):
-        name += "/"
+        name += b'/'
     return name
 
 
@@ -827,7 +813,7 @@ def key_entry_name_order(entry):
 class Tree(ShaFile):
     """A Git tree object"""
 
-    type_name = 'tree'
+    type_name = b'tree'
     type_num = 2
 
     __slots__ = ('_entries')
@@ -885,7 +871,7 @@ class Tree(ShaFile):
         :param name: The name of the entry, as a string.
         :param hexsha: The hex SHA of the entry as a string.
         """
-        if isinstance(name, int) and isinstance(mode, str):
+        if isinstance(name, int) and isinstance(mode, bytes):
             (name, mode) = (mode, name)
             warnings.warn(
                 "Please use Tree.add(name, mode, hexsha)",
@@ -914,7 +900,7 @@ class Tree(ShaFile):
     def _deserialize(self, chunks):
         """Grab the entries in the tree"""
         try:
-            parsed_entries = parse_tree("".join(chunks))
+            parsed_entries = parse_tree(b''.join(chunks))
         except ValueError as e:
             raise ObjectFormatException(e)
         # TODO: list comprehension is for efficiency in the common (small)
@@ -932,10 +918,10 @@ class Tree(ShaFile):
                          stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
                          # TODO: optionally exclude as in git fsck --strict
                          stat.S_IFREG | 0o664)
-        for name, mode, sha in parse_tree(''.join(self._chunked_text),
+        for name, mode, sha in parse_tree(b''.join(self._chunked_text),
                                           True):
             check_hexsha(sha, 'invalid sha %s' % sha)
-            if '/' in name or name in ('', '.', '..'):
+            if b'/' in name or name in (b'', b'.', b'..'):
                 raise ObjectFormatException('invalid name %s' % name)
 
             if mode not in allowed_modes:
@@ -969,7 +955,7 @@ class Tree(ShaFile):
         :param path: Path to lookup
         :return: A tuple of (mode, SHA) of the resulting path.
         """
-        parts = path.split('/')
+        parts = path.split(b'/')
         sha = self.id
         mode = None
         for p in parts:
@@ -993,13 +979,13 @@ def parse_timezone(text):
     # cgit parses the first character as the sign, and the rest
     #  as an integer (using strtol), which could also be negative.
     #  We do the same for compatibility. See #697828.
-    if not text[0] in '+-':
+    if not text[0] in b'+-':
         raise ValueError("Timezone must start with + or - (%(text)s)" % vars())
-    sign = text[0]
+    sign = text[:1]
     offset = int(text[1:])
-    if sign == '-':
+    if sign == b'-':
         offset = -offset
-    unnecessary_negative_timezone = (offset >= 0 and sign == '-')
+    unnecessary_negative_timezone = (offset >= 0 and sign == b'-')
     signum = (offset < 0) and -1 or 1
     offset = abs(offset)
     hours = int(offset / 100)
@@ -1022,7 +1008,7 @@ def format_timezone(offset, unnecessary_negative_timezone=False):
         offset = -offset
     else:
         sign = '+'
-    return '%c%02d%02d' % (sign, offset / 3600, (offset / 60) % 60)
+    return ('%c%02d%02d' % (sign, offset / 3600, (offset / 60) % 60)).encode('ascii')
 
 
 def parse_commit(chunks):
@@ -1049,17 +1035,17 @@ def parse_commit(chunks):
         elif field == _PARENT_HEADER:
             parents.append(value)
         elif field == _AUTHOR_HEADER:
-            author, timetext, timezonetext = value.rsplit(" ", 2)
+            author, timetext, timezonetext = value.rsplit(b' ', 2)
             author_time = int(timetext)
             author_info = (author, author_time, parse_timezone(timezonetext))
         elif field == _COMMITTER_HEADER:
-            committer, timetext, timezonetext = value.rsplit(" ", 2)
+            committer, timetext, timezonetext = value.rsplit(b' ', 2)
             commit_time = int(timetext)
             commit_info = (committer, commit_time, parse_timezone(timezonetext))
         elif field == _ENCODING_HEADER:
             encoding = value
         elif field == _MERGETAG_HEADER:
-            mergetag.append(Tag.from_string(value + "\n"))
+            mergetag.append(Tag.from_string(value + b'\n'))
         elif field == _GPGSIG_HEADER:
             gpgsig = value
         elif field is None:
@@ -1073,7 +1059,7 @@ def parse_commit(chunks):
 class Commit(ShaFile):
     """A git commit object"""
 
-    type_name = 'commit'
+    type_name = b'commit'
     type_num = 1
 
     __slots__ = ('_parents', '_encoding', '_extra', '_author_timezone_neg_utc',
@@ -1146,40 +1132,41 @@ class Commit(ShaFile):
 
     def _serialize(self):
         chunks = []
-        chunks.append("%s %s\n" % (_TREE_HEADER, self._tree))
+        tree_bytes = self._tree.as_raw_string() if isinstance(self._tree, Tree) else self._tree
+        chunks.append(git_line(_TREE_HEADER, tree_bytes))
         for p in self._parents:
-            chunks.append("%s %s\n" % (_PARENT_HEADER, p))
-        chunks.append("%s %s %s %s\n" % (
-            _AUTHOR_HEADER, self._author, str(self._author_time),
+            chunks.append(git_line(_PARENT_HEADER, p))
+        chunks.append(git_line(
+            _AUTHOR_HEADER, self._author, str(self._author_time).encode('ascii'),
             format_timezone(self._author_timezone,
-                          self._author_timezone_neg_utc)))
-        chunks.append("%s %s %s %s\n" % (
-            _COMMITTER_HEADER, self._committer, str(self._commit_time),
+                            self._author_timezone_neg_utc)))
+        chunks.append(git_line(
+            _COMMITTER_HEADER, self._committer, str(self._commit_time).encode('ascii'),
             format_timezone(self._commit_timezone,
-                          self._commit_timezone_neg_utc)))
+                            self._commit_timezone_neg_utc)))
         if self.encoding:
-            chunks.append("%s %s\n" % (_ENCODING_HEADER, self.encoding))
+            chunks.append(git_line(_ENCODING_HEADER, self.encoding))
         for mergetag in self.mergetag:
-            mergetag_chunks = mergetag.as_raw_string().split("\n")
+            mergetag_chunks = mergetag.as_raw_string().split(b'\n')
 
-            chunks.append("%s %s\n" % (_MERGETAG_HEADER, mergetag_chunks[0]))
+            chunks.append(git_line(_MERGETAG_HEADER, mergetag_chunks[0]))
             # Embedded extra header needs leading space
             for chunk in mergetag_chunks[1:]:
-                chunks.append(" %s\n" % chunk)
+                chunks.append(b' ' + chunk + b'\n')
 
             # No trailing empty line
-            chunks[-1] = chunks[-1].rstrip(" \n")
+            chunks[-1] = chunks[-1].rstrip(b' \n')
         for k, v in self.extra:
-            if "\n" in k or "\n" in v:
+            if b'\n' in k or b'\n' in v:
                 raise AssertionError(
                     "newline in extra data: %r -> %r" % (k, v))
-            chunks.append("%s %s\n" % (k, v))
+            chunks.append(git_line(k, v))
         if self.gpgsig:
-            sig_chunks = self.gpgsig.split("\n")
-            chunks.append("%s %s\n" % (_GPGSIG_HEADER, sig_chunks[0]))
+            sig_chunks = self.gpgsig.split(b'\n')
+            chunks.append(git_line(_GPGSIG_HEADER, sig_chunks[0]))
             for chunk in sig_chunks[1:]:
-                chunks.append(" %s\n" % chunk)
-        chunks.append("\n")  # There must be a new line after the headers
+                chunks.append(git_line(b'',  chunk))
+        chunks.append(b'\n')  # There must be a new line after the headers
         chunks.append(self._message)
         return chunks
 

+ 11 - 1
dulwich/pack.py

@@ -47,14 +47,20 @@ except ImportError:
     imap = map
     izip = zip
 
+import os
+
 try:
     import mmap
 except ImportError:
     has_mmap = False
 else:
     has_mmap = True
+
+# For some reason the above try, except fails to set has_mmap = False
+if os.uname()[0] == 'Plan9':
+    has_mmap = False
+
 from hashlib import sha1
-import os
 from os import (
     SEEK_CUR,
     SEEK_END,
@@ -1094,6 +1100,10 @@ class PackData(object):
         # TODO(dborowitz): Merge this with iterobjects, if we can change its
         # return type.
         self._file.seek(self._header_size)
+
+        if self._num_objects is None:
+            return
+
         for _ in xrange(self._num_objects):
             offset = self._file.tell()
             unpacked, unused = unpack_object(

+ 3 - 6
dulwich/porcelain.py

@@ -204,8 +204,7 @@ def clone(source, target=None, bare=False, checkout=None, outstream=sys.stdout):
     r["HEAD"] = remote_refs["HEAD"]
     if checkout:
         outstream.write('Checking out HEAD')
-        index.build_index_from_tree(r.path, r.index_path(),
-                                    r.object_store, r["HEAD"].tree)
+        r.reset_index()
 
     return r
 
@@ -476,9 +475,8 @@ def reset(repo, mode, committish="HEAD"):
 
     r = open_repo(repo)
 
-    indexfile = r.index_path()
     tree = r[committish].tree
-    index.build_index_from_tree(r.path, indexfile, r.object_store, tree)
+    r.reset_index()
 
 
 def push(repo, remote_location, refs_path,
@@ -532,9 +530,8 @@ def pull(repo, remote_location, refs_path,
     r['HEAD'] = remote_refs[refs_path]
 
     # Perform 'git checkout .' - syncs staged changes
-    indexfile = r.index_path()
     tree = r["HEAD"].tree
-    index.build_index_from_tree(r.path, indexfile, r.object_store, tree)
+    r.reset_index()
 
 
 def status(repo="."):

+ 61 - 57
dulwich/refs.py

@@ -30,6 +30,7 @@ from dulwich.errors import (
     )
 from dulwich.objects import (
     hex_to_sha,
+    git_line,
     )
 from dulwich.file import (
     GitFile,
@@ -37,8 +38,9 @@ from dulwich.file import (
     )
 
 
-SYMREF = 'ref: '
-LOCAL_BRANCH_PREFIX = 'refs/heads/'
+SYMREF = b'ref: '
+LOCAL_BRANCH_PREFIX = b'refs/heads/'
+BAD_REF_CHARS = set(b'\177 ~^:?*[')
 
 
 def check_ref_format(refname):
@@ -53,22 +55,22 @@ def check_ref_format(refname):
     """
     # These could be combined into one big expression, but are listed separately
     # to parallel [1].
-    if '/.' in refname or refname.startswith('.'):
+    if b'/.' in refname or refname.startswith(b'.'):
         return False
-    if '/' not in refname:
+    if b'/' not in refname:
         return False
-    if '..' in refname:
+    if b'..' in refname:
         return False
-    for c in refname:
-        if ord(c) < 0o40 or c in '\177 ~^:?*[':
+    for i, c in enumerate(refname):
+        if ord(refname[i:i+1]) < 0o40 or c in BAD_REF_CHARS:
             return False
-    if refname[-1] in '/.':
+    if refname[-1] in b'/.':
         return False
-    if refname.endswith('.lock'):
+    if refname.endswith(b'.lock'):
         return False
-    if '@{' in refname:
+    if b'@{' in refname:
         return False
-    if '\\' in refname:
+    if b'\\' in refname:
         return False
     return True
 
@@ -105,8 +107,8 @@ class RefsContainer(object):
         return None
 
     def import_refs(self, base, other):
-        for name, value in other.iteritems():
-            self["%s/%s" % (base, name)] = value
+        for name, value in other.items():
+            self[b'/'.join((base, name))] = value
 
     def allkeys(self):
         """All refs present in this container."""
@@ -145,10 +147,10 @@ class RefsContainer(object):
         ret = {}
         keys = self.keys(base)
         if base is None:
-            base = ""
+            base = b''
         for key in keys:
             try:
-                ret[key] = self[("%s/%s" % (base, key)).strip("/")]
+                ret[key] = self[(base + b'/' + key).strip(b'/')]
             except KeyError:
                 continue  # Unable to resolve
 
@@ -165,9 +167,9 @@ class RefsContainer(object):
         :param name: The name of the reference.
         :raises KeyError: if a refname is not HEAD or is otherwise not valid.
         """
-        if name in ('HEAD', 'refs/stash'):
+        if name in (b'HEAD', b'refs/stash'):
             return
-        if not name.startswith('refs/') or not check_ref_format(name[5:]):
+        if not name.startswith(b'refs/') or not check_ref_format(name[5:]):
             raise RefFormatError(name)
 
     def read_ref(self, refname):
@@ -350,15 +352,15 @@ class InfoRefsContainer(RefsContainer):
         self._refs = {}
         self._peeled = {}
         for l in f.readlines():
-            sha, name = l.rstrip("\n").split("\t")
-            if name.endswith("^{}"):
+            sha, name = l.rstrip(b'\n').split(b'\t')
+            if name.endswith(b'^{}'):
                 name = name[:-3]
                 if not check_ref_format(name):
-                    raise ValueError("invalid ref name '%s'" % name)
+                    raise ValueError("invalid ref name %r" % name)
                 self._peeled[name] = sha
             else:
                 if not check_ref_format(name):
-                    raise ValueError("invalid ref name '%s'" % name)
+                    raise ValueError("invalid ref name %r" % name)
                 self._refs[name] = sha
 
     def allkeys(self):
@@ -389,39 +391,41 @@ class DiskRefsContainer(RefsContainer):
         return "%s(%r)" % (self.__class__.__name__, self.path)
 
     def subkeys(self, base):
-        keys = set()
+        subkeys = set()
         path = self.refpath(base)
         for root, dirs, files in os.walk(path):
             dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
             for filename in files:
-                refname = ("%s/%s" % (dir, filename)).strip("/")
+                refname = (("%s/%s" % (dir, filename))
+                           .strip("/").encode('ascii'))
                 # check_ref_format requires at least one /, so we prepend the
                 # base before calling it.
-                if check_ref_format("%s/%s" % (base, refname)):
-                    keys.add(refname)
+                if check_ref_format(base + b'/' + refname):
+                    subkeys.add(refname)
         for key in self.get_packed_refs():
             if key.startswith(base):
-                keys.add(key[len(base):].strip("/"))
-        return keys
+                subkeys.add(key[len(base):].strip(b'/'))
+        return subkeys
 
     def allkeys(self):
-        keys = set()
-        if os.path.exists(self.refpath("HEAD")):
-            keys.add("HEAD")
-        path = self.refpath("")
-        for root, dirs, files in os.walk(self.refpath("refs")):
+        allkeys = set()
+        if os.path.exists(self.refpath(b'HEAD')):
+            allkeys.add(b'HEAD')
+        path = self.refpath(b'')
+        for root, dirs, files in os.walk(self.refpath(b'refs')):
             dir = root[len(path):].strip(os.path.sep).replace(os.path.sep, "/")
             for filename in files:
-                refname = ("%s/%s" % (dir, filename)).strip("/")
+                refname = ("%s/%s" % (dir, filename)).strip("/").encode('ascii')
                 if check_ref_format(refname):
-                    keys.add(refname)
-        keys.update(self.get_packed_refs())
-        return keys
+                    allkeys.add(refname)
+        allkeys.update(self.get_packed_refs())
+        return allkeys
 
     def refpath(self, name):
         """Return the disk path of a ref.
 
         """
+        name = name.decode('ascii')
         if os.path.sep != "/":
             name = name.replace("/", os.path.sep)
         return os.path.join(self.path, name)
@@ -449,7 +453,7 @@ class DiskRefsContainer(RefsContainer):
                 raise
             with f:
                 first_line = next(iter(f)).rstrip()
-                if (first_line.startswith("# pack-refs") and " peeled" in
+                if (first_line.startswith(b'# pack-refs') and b' peeled' in
                         first_line):
                     for sha, name, peeled in read_packed_refs_with_peeled(f):
                         self._packed_refs[name] = sha
@@ -496,7 +500,7 @@ class DiskRefsContainer(RefsContainer):
                 header = f.read(len(SYMREF))
                 if header == SYMREF:
                     # Read only the first line
-                    return header + next(iter(f)).rstrip("\r\n")
+                    return header + next(iter(f)).rstrip(b'\r\n')
                 else:
                     # Read only the first 40 bytes
                     return header + f.read(40 - len(SYMREF))
@@ -538,7 +542,7 @@ class DiskRefsContainer(RefsContainer):
         try:
             f = GitFile(filename, 'wb')
             try:
-                f.write(SYMREF + other + '\n')
+                f.write(SYMREF + other + b'\n')
             except (IOError, OSError):
                 f.abort()
                 raise
@@ -578,7 +582,7 @@ class DiskRefsContainer(RefsContainer):
                     f.abort()
                     raise
             try:
-                f.write(new_ref + "\n")
+                f.write(new_ref + b'\n')
             except (OSError, IOError):
                 f.abort()
                 raise
@@ -608,7 +612,7 @@ class DiskRefsContainer(RefsContainer):
                 f.abort()
                 return False
             try:
-                f.write(ref + "\n")
+                f.write(ref + b'\n')
             except (OSError, IOError):
                 f.abort()
                 raise
@@ -651,16 +655,16 @@ class DiskRefsContainer(RefsContainer):
 
 def _split_ref_line(line):
     """Split a single ref line into a tuple of SHA1 and name."""
-    fields = line.rstrip("\n").split(" ")
+    fields = line.rstrip(b'\n').split(b' ')
     if len(fields) != 2:
-        raise PackedRefsException("invalid ref line '%s'" % line)
+        raise PackedRefsException("invalid ref line %r" % line)
     sha, name = fields
     try:
         hex_to_sha(sha)
     except (AssertionError, TypeError) as e:
         raise PackedRefsException(e)
     if not check_ref_format(name):
-        raise PackedRefsException("invalid ref name '%s'" % name)
+        raise PackedRefsException("invalid ref name %r" % name)
     return (sha, name)
 
 
@@ -671,10 +675,10 @@ def read_packed_refs(f):
     :return: Iterator over tuples with SHA1s and ref names.
     """
     for l in f:
-        if l[0] == "#":
+        if l.startswith(b'#'):
             # Comment
             continue
-        if l[0] == "^":
+        if l.startswith(b'^'):
             raise PackedRefsException(
               "found peeled ref in packed-refs without peeled")
         yield _split_ref_line(l)
@@ -690,10 +694,10 @@ def read_packed_refs_with_peeled(f):
     """
     last = None
     for l in f:
-        if l[0] == "#":
+        if l[0] == b'#':
             continue
-        l = l.rstrip("\r\n")
-        if l[0] == "^":
+        l = l.rstrip(b'\r\n')
+        if l.startswith(b'^'):
             if not last:
                 raise PackedRefsException("unexpected peeled ref line")
             try:
@@ -723,11 +727,11 @@ def write_packed_refs(f, packed_refs, peeled_refs=None):
     if peeled_refs is None:
         peeled_refs = {}
     else:
-        f.write('# pack-refs with: peeled\n')
-    for refname in sorted(packed_refs.iterkeys()):
-        f.write('%s %s\n' % (packed_refs[refname], refname))
+        f.write(b'# pack-refs with: peeled\n')
+    for refname in sorted(packed_refs.keys()):
+        f.write(git_line(packed_refs[refname], refname))
         if refname in peeled_refs:
-            f.write('^%s\n' % peeled_refs[refname])
+            f.write(b'^' + peeled_refs[refname] + b'\n')
 
 
 def read_info_refs(f):
@@ -743,16 +747,16 @@ def write_info_refs(refs, store):
     for name, sha in sorted(refs.items()):
         # get_refs() includes HEAD as a special case, but we don't want to
         # advertise it
-        if name == 'HEAD':
+        if name == b'HEAD':
             continue
         try:
             o = store[sha]
         except KeyError:
             continue
         peeled = store.peel_sha(sha)
-        yield '%s\t%s\n' % (o.id, name)
+        yield o.id + b'\t' + name + b'\n'
         if o.id != peeled.id:
-            yield '%s\t%s^{}\n' % (peeled.id, name)
+            yield peeled.id + b'\t' + name + b'^{}\n'
 
 
-is_local_branch = lambda x: x.startswith("refs/heads/")
+is_local_branch = lambda x: x.startswith(b'refs/heads/')

+ 19 - 5
dulwich/repo.py

@@ -793,17 +793,31 @@ class Repo(BaseRepo):
 
             if not bare:
                 # Checkout HEAD to target dir
-                target._build_tree()
+                target.reset_index()
 
         return target
 
-    def _build_tree(self):
-        from dulwich.index import build_index_from_tree
+    def reset_index(self, tree=None):
+        """Reset the index back to a specific tree.
+
+        :param tree: Tree SHA to reset to, None for current HEAD tree.
+        """
+        from dulwich.index import (
+            build_index_from_tree,
+            validate_path_element_default,
+            validate_path_element_ntfs,
+            )
+        if tree is None:
+            tree = self['HEAD'].tree
         config = self.get_config()
         honor_filemode = config.get_boolean('core', 'filemode', os.name != "nt")
+        if config.get_boolean('core', 'core.protectNTFS', os.name == "nt"):
+            validate_path_element = validate_path_element_ntfs
+        else:
+            validate_path_element = validate_path_element_default
         return build_index_from_tree(self.path, self.index_path(),
-                self.object_store, self['HEAD'].tree,
-                honor_filemode=honor_filemode)
+                self.object_store, tree, honor_filemode=honor_filemode,
+                validate_path_element=validate_path_element)
 
     def get_config(self):
         """Retrieve the config object.

+ 9 - 3
dulwich/tests/__init__.py

@@ -75,8 +75,9 @@ class TestCase(_TestCase):
 class BlackboxTestCase(TestCase):
     """Blackbox testing."""
 
-    bin_directory = os.path.abspath(os.path.join(os.path.dirname(__file__),
-        "..", "..", "bin"))
+    # TODO(jelmer): Include more possible binary paths.
+    bin_directories = [os.path.abspath(os.path.join(os.path.dirname(__file__),
+        "..", "..", "bin")), '/usr/bin', '/usr/local/bin']
 
     def bin_path(self, name):
         """Determine the full path of a binary.
@@ -84,7 +85,12 @@ class BlackboxTestCase(TestCase):
         :param name: Name of the script
         :return: Full path
         """
-        return os.path.join(self.bin_directory, name)
+        for d in self.bin_directories:
+            p = os.path.join(d, name)
+            if os.path.isfile(p):
+                return p
+        else:
+            raise SkipTest("Unable to find binary %s" % name)
 
     def run_command(self, name, args):
         """Run a Dulwich command.

+ 35 - 1
dulwich/tests/test_client.py

@@ -18,6 +18,9 @@
 
 from io import BytesIO
 import sys
+import shutil
+import tempfile
+
 try:
     from unittest import skipIf
 except ImportError:
@@ -53,7 +56,10 @@ from dulwich.objects import (
     Commit,
     Tree
     )
-from dulwich.repo import MemoryRepo
+from dulwich.repo import (
+    MemoryRepo,
+    Repo,
+    )
 from dulwich.tests.utils import (
     open_repo,
     skipIfPY3,
@@ -610,3 +616,31 @@ class LocalGitClientTests(TestCase):
             graph_walker=walker, pack_data=out.write)
         # Hardcoding is not ideal, but we'll fix that some other day..
         self.assertTrue(out.getvalue().startswith('PACK\x00\x00\x00\x02\x00\x00\x00\x07'))
+
+    def test_send_pack_without_changes(self):
+        local = open_repo('a.git')
+        target = open_repo('a.git')
+        self.send_and_verify("master", local, target)
+
+    def test_send_pack_with_changes(self):
+        local = open_repo('a.git')
+        target_path = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, target_path)
+        target = Repo.init_bare(target_path)
+        self.send_and_verify("master", local, target)
+
+    def send_and_verify(self, branch, local, target):
+        client = LocalGitClient()
+        ref_name = "refs/heads/" + branch
+        new_refs = client.send_pack(target.path,
+                                    lambda _: { ref_name: local.refs[ref_name] },
+                                    local.object_store.generate_pack_contents)
+
+        self.assertEqual(local.refs[ref_name], new_refs[ref_name])
+
+        for name, sha in new_refs.iteritems():
+            self.assertEqual(new_refs[name], target.refs[name])
+
+        obj_local = local.get_object(new_refs[ref_name])
+        obj_target = target.get_object(new_refs[ref_name])
+        self.assertEqual(obj_local, obj_target)

File diff suppressed because it is too large
+ 393 - 391
dulwich/tests/test_diff_tree.py


+ 22 - 27
dulwich/tests/test_file.py

@@ -28,12 +28,8 @@ from dulwich.tests import (
     SkipTest,
     TestCase,
     )
-from dulwich.tests.utils import (
-    skipIfPY3
-    )
 
 
-@skipIfPY3
 class FancyRenameTests(TestCase):
 
     def setUp(self):
@@ -41,7 +37,7 @@ class FancyRenameTests(TestCase):
         self._tempdir = tempfile.mkdtemp()
         self.foo = self.path('foo')
         self.bar = self.path('bar')
-        self.create(self.foo, 'foo contents')
+        self.create(self.foo, b'foo contents')
 
     def tearDown(self):
         shutil.rmtree(self._tempdir)
@@ -61,44 +57,43 @@ class FancyRenameTests(TestCase):
         self.assertFalse(os.path.exists(self.foo))
 
         new_f = open(self.bar, 'rb')
-        self.assertEqual('foo contents', new_f.read())
+        self.assertEqual(b'foo contents', new_f.read())
         new_f.close()
 
     def test_dest_exists(self):
-        self.create(self.bar, 'bar contents')
+        self.create(self.bar, b'bar contents')
         fancy_rename(self.foo, self.bar)
         self.assertFalse(os.path.exists(self.foo))
 
         new_f = open(self.bar, 'rb')
-        self.assertEqual('foo contents', new_f.read())
+        self.assertEqual(b'foo contents', new_f.read())
         new_f.close()
 
     def test_dest_opened(self):
         if sys.platform != "win32":
             raise SkipTest("platform allows overwriting open files")
-        self.create(self.bar, 'bar contents')
+        self.create(self.bar, b'bar contents')
         dest_f = open(self.bar, 'rb')
         self.assertRaises(OSError, fancy_rename, self.foo, self.bar)
         dest_f.close()
         self.assertTrue(os.path.exists(self.path('foo')))
 
         new_f = open(self.foo, 'rb')
-        self.assertEqual('foo contents', new_f.read())
+        self.assertEqual(b'foo contents', new_f.read())
         new_f.close()
 
         new_f = open(self.bar, 'rb')
-        self.assertEqual('bar contents', new_f.read())
+        self.assertEqual(b'bar contents', new_f.read())
         new_f.close()
 
 
-@skipIfPY3
 class GitFileTests(TestCase):
 
     def setUp(self):
         super(GitFileTests, self).setUp()
         self._tempdir = tempfile.mkdtemp()
         f = open(self.path('foo'), 'wb')
-        f.write('foo contents')
+        f.write(b'foo contents')
         f.close()
 
     def tearDown(self):
@@ -119,15 +114,15 @@ class GitFileTests(TestCase):
     def test_readonly(self):
         f = GitFile(self.path('foo'), 'rb')
         self.assertTrue(isinstance(f, io.IOBase))
-        self.assertEqual('foo contents', f.read())
-        self.assertEqual('', f.read())
+        self.assertEqual(b'foo contents', f.read())
+        self.assertEqual(b'', f.read())
         f.seek(4)
-        self.assertEqual('contents', f.read())
+        self.assertEqual(b'contents', f.read())
         f.close()
 
     def test_default_mode(self):
         f = GitFile(self.path('foo'))
-        self.assertEqual('foo contents', f.read())
+        self.assertEqual(b'foo contents', f.read())
         f.close()
 
     def test_write(self):
@@ -135,7 +130,7 @@ class GitFileTests(TestCase):
         foo_lock = '%s.lock' % foo
 
         orig_f = open(foo, 'rb')
-        self.assertEqual(orig_f.read(), 'foo contents')
+        self.assertEqual(orig_f.read(), b'foo contents')
         orig_f.close()
 
         self.assertFalse(os.path.exists(foo_lock))
@@ -144,20 +139,20 @@ class GitFileTests(TestCase):
         self.assertRaises(AttributeError, getattr, f, 'not_a_file_property')
 
         self.assertTrue(os.path.exists(foo_lock))
-        f.write('new stuff')
+        f.write(b'new stuff')
         f.seek(4)
-        f.write('contents')
+        f.write(b'contents')
         f.close()
         self.assertFalse(os.path.exists(foo_lock))
 
         new_f = open(foo, 'rb')
-        self.assertEqual('new contents', new_f.read())
+        self.assertEqual(b'new contents', new_f.read())
         new_f.close()
 
     def test_open_twice(self):
         foo = self.path('foo')
         f1 = GitFile(foo, 'wb')
-        f1.write('new')
+        f1.write(b'new')
         try:
             f2 = GitFile(foo, 'wb')
             self.fail()
@@ -165,12 +160,12 @@ class GitFileTests(TestCase):
             self.assertEqual(errno.EEXIST, e.errno)
         else:
             f2.close()
-        f1.write(' contents')
+        f1.write(b' contents')
         f1.close()
 
         # Ensure trying to open twice doesn't affect original.
         f = open(foo, 'rb')
-        self.assertEqual('new contents', f.read())
+        self.assertEqual(b'new contents', f.read())
         f.close()
 
     def test_abort(self):
@@ -178,17 +173,17 @@ class GitFileTests(TestCase):
         foo_lock = '%s.lock' % foo
 
         orig_f = open(foo, 'rb')
-        self.assertEqual(orig_f.read(), 'foo contents')
+        self.assertEqual(orig_f.read(), b'foo contents')
         orig_f.close()
 
         f = GitFile(foo, 'wb')
-        f.write('new contents')
+        f.write(b'new contents')
         f.abort()
         self.assertTrue(f.closed)
         self.assertFalse(os.path.exists(foo_lock))
 
         new_orig_f = open(foo, 'rb')
-        self.assertEqual(new_orig_f.read(), 'foo contents')
+        self.assertEqual(new_orig_f.read(), b'foo contents')
         new_orig_f.close()
 
     def test_abort_close(self):

+ 10 - 12
dulwich/tests/test_hooks.py

@@ -20,6 +20,7 @@
 import os
 import stat
 import shutil
+import sys
 import tempfile
 
 from dulwich import errors
@@ -31,10 +32,8 @@ from dulwich.hooks import (
 )
 
 from dulwich.tests import TestCase
-from dulwich.tests.utils import skipIfPY3
 
 
-@skipIfPY3
 class ShellHookTests(TestCase):
 
     def setUp(self):
@@ -42,11 +41,11 @@ class ShellHookTests(TestCase):
             self.skipTest('shell hook tests requires POSIX shell')
 
     def test_hook_pre_commit(self):
-        pre_commit_fail = """#!/bin/sh
+        pre_commit_fail = b"""#!/bin/sh
 exit 1
 """
 
-        pre_commit_success = """#!/bin/sh
+        pre_commit_success = b"""#!/bin/sh
 exit 0
 """
 
@@ -71,11 +70,11 @@ exit 0
 
     def test_hook_commit_msg(self):
 
-        commit_msg_fail = """#!/bin/sh
+        commit_msg_fail = b"""#!/bin/sh
 exit 1
 """
 
-        commit_msg_success = """#!/bin/sh
+        commit_msg_success = b"""#!/bin/sh
 exit 0
 """
 
@@ -90,22 +89,21 @@ exit 0
             f.write(commit_msg_fail)
         os.chmod(commit_msg, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
 
-        self.assertRaises(errors.HookError, hook.execute, 'failed commit')
+        self.assertRaises(errors.HookError, hook.execute, b'failed commit')
 
         with open(commit_msg, 'wb') as f:
             f.write(commit_msg_success)
         os.chmod(commit_msg, stat.S_IREAD | stat.S_IWRITE | stat.S_IEXEC)
 
-        hook.execute('empty commit')
+        hook.execute(b'empty commit')
 
     def test_hook_post_commit(self):
 
         (fd, path) = tempfile.mkstemp()
-        post_commit_msg = """#!/bin/sh
-rm %(file)s
-""" % {'file': path}
+        post_commit_msg = b"""#!/bin/sh
+rm """ + path.encode(sys.getfilesystemencoding()) + b"\n"
 
-        post_commit_msg_fail = """#!/bin/sh
+        post_commit_msg_fail = b"""#!/bin/sh
 exit 1
 """
 

+ 58 - 1
dulwich/tests/test_index.py

@@ -35,6 +35,8 @@ from dulwich.index import (
     index_entry_from_stat,
     read_index,
     read_index_dict,
+    validate_path_element_default,
+    validate_path_element_ntfs,
     write_cache_time,
     write_index,
     write_index_dict,
@@ -50,7 +52,6 @@ from dulwich.repo import Repo
 from dulwich.tests import TestCase
 from dulwich.tests.utils import skipIfPY3
 
-
 @skipIfPY3
 class IndexTestCase(TestCase):
 
@@ -281,6 +282,43 @@ class BuildIndexTests(TestCase):
         # Verify no files
         self.assertEqual(['.git'], os.listdir(repo.path))
 
+    def test_git_dir(self):
+        if os.name != 'posix':
+            self.skipTest("test depends on POSIX shell")
+
+        repo_dir = tempfile.mkdtemp()
+        repo = Repo.init(repo_dir)
+        self.addCleanup(shutil.rmtree, repo_dir)
+
+        # Populate repo
+        filea = Blob.from_string('file a')
+        filee = Blob.from_string('d')
+
+        tree = Tree()
+        tree['.git/a'] = (stat.S_IFREG | 0o644, filea.id)
+        tree['c/e'] = (stat.S_IFREG | 0o644, filee.id)
+
+        repo.object_store.add_objects([(o, None)
+            for o in [filea, filee, tree]])
+
+        build_index_from_tree(repo.path, repo.index_path(),
+                repo.object_store, tree.id)
+
+        # Verify index entries
+        index = repo.open_index()
+        self.assertEqual(len(index), 1)
+
+        # filea
+        apath = os.path.join(repo.path, '.git', 'a')
+        self.assertFalse(os.path.exists(apath))
+
+        # filee
+        epath = os.path.join(repo.path, 'c', 'e')
+        self.assertTrue(os.path.exists(epath))
+        self.assertReasonableIndexEntry(index['c/e'],
+            stat.S_IFREG | 0o644, 1, filee.id)
+        self.assertFileContents(epath, 'd')
+
     def test_nonempty(self):
         if os.name != 'posix':
             self.skipTest("test depends on POSIX shell")
@@ -377,3 +415,22 @@ class GetUnstagedChangesTests(TestCase):
         changes = get_unstaged_changes(repo.open_index(), repo_dir)
 
         self.assertEqual(list(changes), ['foo1'])
+
+
+class TestValidatePathElement(TestCase):
+
+    def test_default(self):
+        self.assertTrue(validate_path_element_default("bla"))
+        self.assertTrue(validate_path_element_default(".bla"))
+        self.assertFalse(validate_path_element_default(".git"))
+        self.assertFalse(validate_path_element_default(".giT"))
+        self.assertFalse(validate_path_element_default(".."))
+        self.assertTrue(validate_path_element_default("git~1"))
+
+    def test_ntfs(self):
+        self.assertTrue(validate_path_element_ntfs("bla"))
+        self.assertTrue(validate_path_element_ntfs(".bla"))
+        self.assertFalse(validate_path_element_ntfs(".git"))
+        self.assertFalse(validate_path_element_ntfs(".giT"))
+        self.assertFalse(validate_path_element_ntfs(".."))
+        self.assertFalse(validate_path_element_ntfs("git~1"))

+ 0 - 6
dulwich/tests/test_lru_cache.py

@@ -22,12 +22,7 @@ from dulwich import (
 from dulwich.tests import (
     TestCase,
     )
-from dulwich.tests.utils import (
-    skipIfPY3,
-    )
-
 
-@skipIfPY3
 class TestLRUCache(TestCase):
     """Test that LRU cache properly keeps track of entries."""
 
@@ -291,7 +286,6 @@ class TestLRUCache(TestCase):
         self.assertEqual([6, 7, 8, 9, 10, 11], sorted(cache.keys()))
 
 
-@skipIfPY3
 class TestLRUSizeCache(TestCase):
 
     def test_basic_init(self):

+ 60 - 5
dulwich/tests/test_missing_obj_finder.py

@@ -25,6 +25,7 @@ from dulwich.objects import (
 from dulwich.tests import TestCase
 from dulwich.tests.utils import (
     make_object,
+    make_tag,
     build_commit_graph,
     skipIfPY3,
     )
@@ -91,13 +92,12 @@ class MOFLinearRepoTest(MissingObjectFinderTest):
         self.assertMissingMatch([self.cmt(1).id], [self.cmt(3).id],
             self.missing_1_3)
 
-    def test_bogus_haves_failure(self):
-        """Ensure non-existent SHA in haves are not tolerated"""
+    def test_bogus_haves(self):
+        """Ensure non-existent SHA in haves are tolerated"""
         bogus_sha = self.cmt(2).id[::-1]
         haves = [self.cmt(1).id, bogus_sha]
         wants = [self.cmt(3).id]
-        self.assertRaises(KeyError, self.store.find_missing_objects,
-            self.store, haves, wants)
+        self.assertMissingMatch(haves, wants, self.missing_1_3)
 
     def test_bogus_wants_failure(self):
         """Ensure non-existent SHA in wants are not tolerated"""
@@ -105,7 +105,7 @@ class MOFLinearRepoTest(MissingObjectFinderTest):
         haves = [self.cmt(1).id]
         wants = [self.cmt(3).id, bogus_sha]
         self.assertRaises(KeyError, self.store.find_missing_objects,
-            self.store, haves, wants)
+            haves, wants)
 
     def test_no_changes(self):
         self.assertMissingMatch([self.cmt(3).id], [self.cmt(3).id], [])
@@ -195,3 +195,58 @@ class MOFMergeForkRepoTest(MissingObjectFinderTest):
               self.cmt(7).id, self.cmt(6).id, self.cmt(4).id,
               self.cmt(7).tree, self.cmt(6).tree, self.cmt(4).tree,
               self.f1_4_id])
+
+
+class MOFTagsTest(MissingObjectFinderTest):
+    def setUp(self):
+        super(MOFTagsTest, self).setUp()
+        f1_1 = make_object(Blob, data='f1')
+        commit_spec = [[1]]
+        trees = {1: [('f1', f1_1)]}
+        self.commits = build_commit_graph(self.store, commit_spec, trees)
+
+        self._normal_tag = make_tag(self.cmt(1))
+        self.store.add_object(self._normal_tag)
+
+        self._tag_of_tag = make_tag(self._normal_tag)
+        self.store.add_object(self._tag_of_tag)
+
+        self._tag_of_tree = make_tag(self.store[self.cmt(1).tree])
+        self.store.add_object(self._tag_of_tree)
+
+        self._tag_of_blob = make_tag(f1_1)
+        self.store.add_object(self._tag_of_blob)
+
+        self._tag_of_tag_of_blob = make_tag(self._tag_of_blob)
+        self.store.add_object(self._tag_of_tag_of_blob)
+
+        self.f1_1_id = f1_1.id
+
+    def test_tagged_commit(self):
+        # The user already has the tagged commit, all they want is the tag,
+        # so send them only the tag object.
+        self.assertMissingMatch([self.cmt(1).id], [self._normal_tag.id],
+                                [self._normal_tag.id])
+
+    # The remaining cases are unusual, but do happen in the wild.
+    def test_tagged_tag(self):
+        # User already has tagged tag, send only tag of tag
+        self.assertMissingMatch([self._normal_tag.id], [self._tag_of_tag.id],
+                                [self._tag_of_tag.id])
+        # User needs both tags, but already has commit
+        self.assertMissingMatch([self.cmt(1).id], [self._tag_of_tag.id],
+                                [self._normal_tag.id, self._tag_of_tag.id])
+
+    def test_tagged_tree(self):
+        self.assertMissingMatch(
+            [], [self._tag_of_tree.id],
+            [self._tag_of_tree.id, self.cmt(1).tree, self.f1_1_id])
+
+    def test_tagged_blob(self):
+        self.assertMissingMatch([], [self._tag_of_blob.id],
+                                [self._tag_of_blob.id, self.f1_1_id])
+
+    def test_tagged_tagged_blob(self):
+        self.assertMissingMatch([], [self._tag_of_tag_of_blob.id],
+                                [self._tag_of_tag_of_blob.id,
+                                 self._tag_of_blob.id, self.f1_1_id])

+ 9 - 8
dulwich/tests/test_object_store.py

@@ -53,12 +53,13 @@ from dulwich.tests import (
     )
 from dulwich.tests.utils import (
     make_object,
+    make_tag,
     build_pack,
     skipIfPY3,
     )
 
 
-testobject = make_object(Blob, data="yummy data")
+testobject = make_object(Blob, data=b"yummy data")
 
 
 class ObjectStoreTests(object):
@@ -84,7 +85,7 @@ class ObjectStoreTests(object):
         self.store.add_objects([])
 
     def test_add_commit(self):
-        # TODO: Argh, no way to construct Git commit objects without 
+        # TODO: Argh, no way to construct Git commit objects without
         # access to a serialized form.
         self.store.add_objects([])
 
@@ -169,10 +170,7 @@ class ObjectStoreTests(object):
         self.assertEqual(expected, list(actual))
 
     def make_tag(self, name, obj):
-        tag = make_object(Tag, name=name, message='',
-                          tag_time=12345, tag_timezone=0,
-                          tagger='Test Tagger <test@example.com>',
-                          object=(object_class(obj.type_name), obj.id))
+        tag = make_tag(obj, name=name)
         self.store.add_object(tag)
         return tag
 
@@ -214,6 +212,11 @@ class MemoryObjectStoreTests(ObjectStoreTests, TestCase):
         else:
             commit()
 
+    def test_add_pack_emtpy(self):
+        o = MemoryObjectStore()
+        f, commit, abort = o.add_pack()
+        commit()
+
     def test_add_thin_pack(self):
         o = MemoryObjectStore()
         blob = make_object(Blob, data='yummy data')
@@ -396,8 +399,6 @@ class TreeLookupPathTests(TestCase):
     def test_lookup_not_tree(self):
         self.assertRaises(NotTreeError, tree_lookup_path, self.get_object, self.tree_id, 'ad/b/j')
 
-# TODO: MissingObjectFinderTests
-
 @skipIfPY3
 class ObjectStoreGraphWalkerTests(TestCase):
 

File diff suppressed because it is too large
+ 287 - 283
dulwich/tests/test_objects.py


+ 17 - 11
dulwich/tests/test_pack.py

@@ -191,6 +191,14 @@ class TestPackDeltas(TestCase):
         self._test_roundtrip(self.test_string_huge + self.test_string1,
                              self.test_string_huge + self.test_string2)
 
+    def test_dest_overflow(self):
+        self.assertRaises(
+            ValueError,
+            apply_delta, 'a'*0x10000, '\x80\x80\x04\x80\x80\x04\x80' + 'a'*0x10000)
+        self.assertRaises(
+            ValueError,
+            apply_delta, '', '\x00\x80\x02\xb0\x11\x11')
+
 
 @skipIfPY3
 class TestPackData(PackTests):
@@ -1041,22 +1049,20 @@ class DeltaChainIteratorTests(TestCase):
             self.assertEqual((sorted([b2.id, b3.id]),), (sorted(e.args[0]),))
 
 
-@skipIfPY3
 class DeltaEncodeSizeTests(TestCase):
 
     def test_basic(self):
-        self.assertEquals('\x00', _delta_encode_size(0))
-        self.assertEquals('\x01', _delta_encode_size(1))
-        self.assertEquals('\xfa\x01', _delta_encode_size(250))
-        self.assertEquals('\xe8\x07', _delta_encode_size(1000))
-        self.assertEquals('\xa0\x8d\x06', _delta_encode_size(100000))
+        self.assertEqual('\x00', _delta_encode_size(0))
+        self.assertEqual('\x01', _delta_encode_size(1))
+        self.assertEqual('\xfa\x01', _delta_encode_size(250))
+        self.assertEqual('\xe8\x07', _delta_encode_size(1000))
+        self.assertEqual('\xa0\x8d\x06', _delta_encode_size(100000))
 
 
-@skipIfPY3
 class EncodeCopyOperationTests(TestCase):
 
     def test_basic(self):
-        self.assertEquals('\x80', _encode_copy_operation(0, 0))
-        self.assertEquals('\x91\x01\x0a', _encode_copy_operation(1, 10))
-        self.assertEquals('\xb1\x64\xe8\x03', _encode_copy_operation(100, 1000))
-        self.assertEquals('\x93\xe8\x03\x01', _encode_copy_operation(1000, 1))
+        self.assertEqual('\x80', _encode_copy_operation(0, 0))
+        self.assertEqual('\x91\x01\x0a', _encode_copy_operation(1, 10))
+        self.assertEqual('\xb1\x64\xe8\x03', _encode_copy_operation(100, 1000))
+        self.assertEqual('\x93\xe8\x03\x01', _encode_copy_operation(1000, 1))

+ 217 - 197
dulwich/tests/test_refs.py

@@ -56,89 +56,106 @@ class CheckRefFormatTests(TestCase):
     """
 
     def test_valid(self):
-        self.assertTrue(check_ref_format('heads/foo'))
-        self.assertTrue(check_ref_format('foo/bar/baz'))
-        self.assertTrue(check_ref_format('refs///heads/foo'))
-        self.assertTrue(check_ref_format('foo./bar'))
-        self.assertTrue(check_ref_format('heads/foo@bar'))
-        self.assertTrue(check_ref_format('heads/fix.lock.error'))
+        self.assertTrue(check_ref_format(b'heads/foo'))
+        self.assertTrue(check_ref_format(b'foo/bar/baz'))
+        self.assertTrue(check_ref_format(b'refs///heads/foo'))
+        self.assertTrue(check_ref_format(b'foo./bar'))
+        self.assertTrue(check_ref_format(b'heads/foo@bar'))
+        self.assertTrue(check_ref_format(b'heads/fix.lock.error'))
 
     def test_invalid(self):
-        self.assertFalse(check_ref_format('foo'))
-        self.assertFalse(check_ref_format('heads/foo/'))
-        self.assertFalse(check_ref_format('./foo'))
-        self.assertFalse(check_ref_format('.refs/foo'))
-        self.assertFalse(check_ref_format('heads/foo..bar'))
-        self.assertFalse(check_ref_format('heads/foo?bar'))
-        self.assertFalse(check_ref_format('heads/foo.lock'))
-        self.assertFalse(check_ref_format('heads/v@{ation'))
-        self.assertFalse(check_ref_format('heads/foo\bar'))
-
-
-ONES = "1" * 40
-TWOS = "2" * 40
-THREES = "3" * 40
-FOURS = "4" * 40
-
-@skipIfPY3
+        self.assertFalse(check_ref_format(b'foo'))
+        self.assertFalse(check_ref_format(b'heads/foo/'))
+        self.assertFalse(check_ref_format(b'./foo'))
+        self.assertFalse(check_ref_format(b'.refs/foo'))
+        self.assertFalse(check_ref_format(b'heads/foo..bar'))
+        self.assertFalse(check_ref_format(b'heads/foo?bar'))
+        self.assertFalse(check_ref_format(b'heads/foo.lock'))
+        self.assertFalse(check_ref_format(b'heads/v@{ation'))
+        self.assertFalse(check_ref_format(b'heads/foo\bar'))
+
+
+ONES = b'1' * 40
+TWOS = b'2' * 40
+THREES = b'3' * 40
+FOURS = b'4' * 40
+
 class PackedRefsFileTests(TestCase):
 
     def test_split_ref_line_errors(self):
         self.assertRaises(errors.PackedRefsException, _split_ref_line,
-                          'singlefield')
+                          b'singlefield')
         self.assertRaises(errors.PackedRefsException, _split_ref_line,
-                          'badsha name')
+                          b'badsha name')
         self.assertRaises(errors.PackedRefsException, _split_ref_line,
-                          '%s bad/../refname' % ONES)
+                          ONES + b' bad/../refname')
 
     def test_read_without_peeled(self):
-        f = BytesIO('# comment\n%s ref/1\n%s ref/2' % (ONES, TWOS))
-        self.assertEqual([(ONES, 'ref/1'), (TWOS, 'ref/2')],
+        f = BytesIO(b'\n'.join([
+            b'# comment',
+            ONES + b' ref/1',
+            TWOS + b' ref/2']))
+        self.assertEqual([(ONES, b'ref/1'), (TWOS, b'ref/2')],
                          list(read_packed_refs(f)))
 
     def test_read_without_peeled_errors(self):
-        f = BytesIO('%s ref/1\n^%s' % (ONES, TWOS))
+        f = BytesIO(b'\n'.join([
+            ONES + b' ref/1',
+            b'^' + TWOS]))
         self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
 
     def test_read_with_peeled(self):
-        f = BytesIO('%s ref/1\n%s ref/2\n^%s\n%s ref/4' % (
-          ONES, TWOS, THREES, FOURS))
+        f = BytesIO(b'\n'.join([
+            ONES + b' ref/1',
+            TWOS + b' ref/2',
+            b'^' + THREES,
+            FOURS + b' ref/4']))
         self.assertEqual([
-          (ONES, 'ref/1', None),
-          (TWOS, 'ref/2', THREES),
-          (FOURS, 'ref/4', None),
-          ], list(read_packed_refs_with_peeled(f)))
+            (ONES, b'ref/1', None),
+            (TWOS, b'ref/2', THREES),
+            (FOURS, b'ref/4', None),
+            ], list(read_packed_refs_with_peeled(f)))
 
     def test_read_with_peeled_errors(self):
-        f = BytesIO('^%s\n%s ref/1' % (TWOS, ONES))
+        f = BytesIO(b'\n'.join([
+            b'^' + TWOS,
+            ONES + b' ref/1']))
         self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
 
-        f = BytesIO('%s ref/1\n^%s\n^%s' % (ONES, TWOS, THREES))
+        f = BytesIO(b'\n'.join([
+            ONES + b' ref/1',
+            b'^' + TWOS,
+            b'^' + THREES]))
         self.assertRaises(errors.PackedRefsException, list, read_packed_refs(f))
 
     def test_write_with_peeled(self):
         f = BytesIO()
-        write_packed_refs(f, {'ref/1': ONES, 'ref/2': TWOS},
-                          {'ref/1': THREES})
+        write_packed_refs(f, {b'ref/1': ONES, b'ref/2': TWOS},
+                          {b'ref/1': THREES})
         self.assertEqual(
-          "# pack-refs with: peeled\n%s ref/1\n^%s\n%s ref/2\n" % (
-          ONES, THREES, TWOS), f.getvalue())
+            b'\n'.join([b'# pack-refs with: peeled',
+                        ONES + b' ref/1',
+                        b'^' + THREES,
+                        TWOS + b' ref/2']) + b'\n',
+            f.getvalue())
 
     def test_write_without_peeled(self):
         f = BytesIO()
-        write_packed_refs(f, {'ref/1': ONES, 'ref/2': TWOS})
-        self.assertEqual("%s ref/1\n%s ref/2\n" % (ONES, TWOS), f.getvalue())
+        write_packed_refs(f, {b'ref/1': ONES, b'ref/2': TWOS})
+        self.assertEqual(b'\n'.join([ONES + b' ref/1',
+                                     TWOS + b' ref/2']) + b'\n',
+                         f.getvalue())
 
 
 # Dict of refs that we expect all RefsContainerTests subclasses to define.
 _TEST_REFS = {
-  'HEAD': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-  'refs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-  'refs/heads/master': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-  'refs/heads/packed': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-  'refs/tags/refs-0.1': 'df6800012397fb85c56e7418dd4eb9405dee075c',
-  'refs/tags/refs-0.2': '3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8',
-  }
+    b'HEAD': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+    b'refs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+    b'refs/heads/master': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+    b'refs/heads/packed': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+    b'refs/tags/refs-0.1': b'df6800012397fb85c56e7418dd4eb9405dee075c',
+    b'refs/tags/refs-0.2': b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8',
+    }
 
 
 class RefsContainerTests(object):
@@ -147,100 +164,103 @@ class RefsContainerTests(object):
         actual_keys = set(self._refs.keys())
         self.assertEqual(set(self._refs.allkeys()), actual_keys)
         # ignore the symref loop if it exists
-        actual_keys.discard('refs/heads/loop')
-        self.assertEqual(set(_TEST_REFS.iterkeys()), actual_keys)
+        actual_keys.discard(b'refs/heads/loop')
+        self.assertEqual(set(_TEST_REFS.keys()), actual_keys)
 
-        actual_keys = self._refs.keys('refs/heads')
-        actual_keys.discard('loop')
+        actual_keys = self._refs.keys(b'refs/heads')
+        actual_keys.discard(b'loop')
         self.assertEqual(
-            ['40-char-ref-aaaaaaaaaaaaaaaaaa', 'master', 'packed'],
+            [b'40-char-ref-aaaaaaaaaaaaaaaaaa', b'master', b'packed'],
             sorted(actual_keys))
-        self.assertEqual(['refs-0.1', 'refs-0.2'],
-                         sorted(self._refs.keys('refs/tags')))
+        self.assertEqual([b'refs-0.1', b'refs-0.2'],
+                         sorted(self._refs.keys(b'refs/tags')))
 
     def test_as_dict(self):
         # refs/heads/loop does not show up even if it exists
         self.assertEqual(_TEST_REFS, self._refs.as_dict())
 
     def test_setitem(self):
-        self._refs['refs/some/ref'] = '42d06bd4b77fed026b154d16493e5deab78f02ec'
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/some/ref'])
-        self.assertRaises(errors.RefFormatError, self._refs.__setitem__,
-                          'notrefs/foo', '42d06bd4b77fed026b154d16493e5deab78f02ec')
+        self._refs[b'refs/some/ref'] = b'42d06bd4b77fed026b154d16493e5deab78f02ec'
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/some/ref'])
+        self.assertRaises(
+            errors.RefFormatError, self._refs.__setitem__,
+            b'notrefs/foo', b'42d06bd4b77fed026b154d16493e5deab78f02ec')
 
     def test_set_if_equals(self):
-        nines = '9' * 40
-        self.assertFalse(self._refs.set_if_equals('HEAD', 'c0ffee', nines))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['HEAD'])
+        nines = b'9' * 40
+        self.assertFalse(self._refs.set_if_equals(b'HEAD', b'c0ffee', nines))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'HEAD'])
 
         self.assertTrue(self._refs.set_if_equals(
-          'HEAD', '42d06bd4b77fed026b154d16493e5deab78f02ec', nines))
-        self.assertEqual(nines, self._refs['HEAD'])
+            b'HEAD', b'42d06bd4b77fed026b154d16493e5deab78f02ec', nines))
+        self.assertEqual(nines, self._refs[b'HEAD'])
 
-        self.assertTrue(self._refs.set_if_equals('refs/heads/master', None,
+        self.assertTrue(self._refs.set_if_equals(b'refs/heads/master', None,
                                                  nines))
-        self.assertEqual(nines, self._refs['refs/heads/master'])
+        self.assertEqual(nines, self._refs[b'refs/heads/master'])
 
     def test_add_if_new(self):
-        nines = '9' * 40
-        self.assertFalse(self._refs.add_if_new('refs/heads/master', nines))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/heads/master'])
+        nines = b'9' * 40
+        self.assertFalse(self._refs.add_if_new(b'refs/heads/master', nines))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/master'])
 
-        self.assertTrue(self._refs.add_if_new('refs/some/ref', nines))
-        self.assertEqual(nines, self._refs['refs/some/ref'])
+        self.assertTrue(self._refs.add_if_new(b'refs/some/ref', nines))
+        self.assertEqual(nines, self._refs[b'refs/some/ref'])
 
     def test_set_symbolic_ref(self):
-        self._refs.set_symbolic_ref('refs/heads/symbolic', 'refs/heads/master')
-        self.assertEqual('ref: refs/heads/master',
-                         self._refs.read_loose_ref('refs/heads/symbolic'))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/heads/symbolic'])
+        self._refs.set_symbolic_ref(b'refs/heads/symbolic',
+                                    b'refs/heads/master')
+        self.assertEqual(b'ref: refs/heads/master',
+                         self._refs.read_loose_ref(b'refs/heads/symbolic'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/symbolic'])
 
     def test_set_symbolic_ref_overwrite(self):
-        nines = '9' * 40
-        self.assertFalse('refs/heads/symbolic' in self._refs)
-        self._refs['refs/heads/symbolic'] = nines
-        self.assertEqual(nines, self._refs.read_loose_ref('refs/heads/symbolic'))
-        self._refs.set_symbolic_ref('refs/heads/symbolic', 'refs/heads/master')
-        self.assertEqual('ref: refs/heads/master',
-                         self._refs.read_loose_ref('refs/heads/symbolic'))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/heads/symbolic'])
+        nines = b'9' * 40
+        self.assertFalse(b'refs/heads/symbolic' in self._refs)
+        self._refs[b'refs/heads/symbolic'] = nines
+        self.assertEqual(nines,
+                         self._refs.read_loose_ref(b'refs/heads/symbolic'))
+        self._refs.set_symbolic_ref(b'refs/heads/symbolic',
+                                    b'refs/heads/master')
+        self.assertEqual(b'ref: refs/heads/master',
+                         self._refs.read_loose_ref(b'refs/heads/symbolic'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/symbolic'])
 
     def test_check_refname(self):
-        self._refs._check_refname('HEAD')
-        self._refs._check_refname('refs/stash')
-        self._refs._check_refname('refs/heads/foo')
+        self._refs._check_refname(b'HEAD')
+        self._refs._check_refname(b'refs/stash')
+        self._refs._check_refname(b'refs/heads/foo')
 
         self.assertRaises(errors.RefFormatError, self._refs._check_refname,
-                          'refs')
+                          b'refs')
         self.assertRaises(errors.RefFormatError, self._refs._check_refname,
-                          'notrefs/foo')
+                          b'notrefs/foo')
 
     def test_contains(self):
-        self.assertTrue('refs/heads/master' in self._refs)
-        self.assertFalse('refs/heads/bar' in self._refs)
+        self.assertTrue(b'refs/heads/master' in self._refs)
+        self.assertFalse(b'refs/heads/bar' in self._refs)
 
     def test_delitem(self):
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                          self._refs['refs/heads/master'])
-        del self._refs['refs/heads/master']
-        self.assertRaises(KeyError, lambda: self._refs['refs/heads/master'])
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/master'])
+        del self._refs[b'refs/heads/master']
+        self.assertRaises(KeyError, lambda: self._refs[b'refs/heads/master'])
 
     def test_remove_if_equals(self):
-        self.assertFalse(self._refs.remove_if_equals('HEAD', 'c0ffee'))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['HEAD'])
+        self.assertFalse(self._refs.remove_if_equals(b'HEAD', b'c0ffee'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'HEAD'])
         self.assertTrue(self._refs.remove_if_equals(
-          'refs/tags/refs-0.2', '3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8'))
-        self.assertFalse('refs/tags/refs-0.2' in self._refs)
+            b'refs/tags/refs-0.2', b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8'))
+        self.assertFalse(b'refs/tags/refs-0.2' in self._refs)
 
 
 
-@skipIfPY3
 class DictRefsContainerTests(RefsContainerTests, TestCase):
 
     def setUp(self):
@@ -250,13 +270,12 @@ class DictRefsContainerTests(RefsContainerTests, TestCase):
     def test_invalid_refname(self):
         # FIXME: Move this test into RefsContainerTests, but requires
         # some way of injecting invalid refs.
-        self._refs._refs["refs/stash"] = "00" * 20
+        self._refs._refs[b'refs/stash'] = b'00' * 20
         expected_refs = dict(_TEST_REFS)
-        expected_refs["refs/stash"] = "00" * 20
+        expected_refs[b'refs/stash'] = b'00' * 20
         self.assertEqual(expected_refs, self._refs.as_dict())
 
 
-@skipIfPY3
 class DiskRefsContainerTests(RefsContainerTests, TestCase):
 
     def setUp(self):
@@ -270,39 +289,39 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
 
     def test_get_packed_refs(self):
         self.assertEqual({
-          'refs/heads/packed': '42d06bd4b77fed026b154d16493e5deab78f02ec',
-          'refs/tags/refs-0.1': 'df6800012397fb85c56e7418dd4eb9405dee075c',
-          }, self._refs.get_packed_refs())
+            b'refs/heads/packed': b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+            b'refs/tags/refs-0.1': b'df6800012397fb85c56e7418dd4eb9405dee075c',
+            }, self._refs.get_packed_refs())
 
     def test_get_peeled_not_packed(self):
         # not packed
-        self.assertEqual(None, self._refs.get_peeled('refs/tags/refs-0.2'))
-        self.assertEqual('3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8',
-                         self._refs['refs/tags/refs-0.2'])
+        self.assertEqual(None, self._refs.get_peeled(b'refs/tags/refs-0.2'))
+        self.assertEqual(b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8',
+                         self._refs[b'refs/tags/refs-0.2'])
 
         # packed, known not peelable
-        self.assertEqual(self._refs['refs/heads/packed'],
-                         self._refs.get_peeled('refs/heads/packed'))
+        self.assertEqual(self._refs[b'refs/heads/packed'],
+                         self._refs.get_peeled(b'refs/heads/packed'))
 
         # packed, peeled
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs.get_peeled('refs/tags/refs-0.1'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs.get_peeled(b'refs/tags/refs-0.1'))
 
     def test_setitem(self):
         RefsContainerTests.test_setitem(self)
         f = open(os.path.join(self._refs.path, 'refs', 'some', 'ref'), 'rb')
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                          f.read()[:40])
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         f.read()[:40])
         f.close()
 
     def test_setitem_symbolic(self):
-        ones = '1' * 40
-        self._refs['HEAD'] = ones
-        self.assertEqual(ones, self._refs['HEAD'])
+        ones = b'1' * 40
+        self._refs[b'HEAD'] = ones
+        self.assertEqual(ones, self._refs[b'HEAD'])
 
         # ensure HEAD was not modified
         f = open(os.path.join(self._refs.path, 'HEAD'), 'rb')
-        self.assertEqual('ref: refs/heads/master', next(iter(f)).rstrip('\n'))
+        self.assertEqual(b'ref: refs/heads/master', next(iter(f)).rstrip(b'\n'))
         f.close()
 
         # ensure the symbolic link was written through
@@ -314,20 +333,22 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         RefsContainerTests.test_set_if_equals(self)
 
         # ensure symref was followed
-        self.assertEqual('9' * 40, self._refs['refs/heads/master'])
+        self.assertEqual(b'9' * 40, self._refs[b'refs/heads/master'])
 
         # ensure lockfile was deleted
         self.assertFalse(os.path.exists(
-          os.path.join(self._refs.path, 'refs', 'heads', 'master.lock')))
+            os.path.join(self._refs.path, 'refs', 'heads', 'master.lock')))
         self.assertFalse(os.path.exists(
-          os.path.join(self._refs.path, 'HEAD.lock')))
+            os.path.join(self._refs.path, 'HEAD.lock')))
 
     def test_add_if_new_packed(self):
         # don't overwrite packed ref
-        self.assertFalse(self._refs.add_if_new('refs/tags/refs-0.1', '9' * 40))
-        self.assertEqual('df6800012397fb85c56e7418dd4eb9405dee075c',
-                         self._refs['refs/tags/refs-0.1'])
+        self.assertFalse(self._refs.add_if_new(b'refs/tags/refs-0.1',
+                                               b'9' * 40))
+        self.assertEqual(b'df6800012397fb85c56e7418dd4eb9405dee075c',
+                         self._refs[b'refs/tags/refs-0.1'])
 
+    @skipIfPY3
     def test_add_if_new_symbolic(self):
         # Use an empty repo instead of the default.
         tear_down_repo(self._repo)
@@ -336,53 +357,53 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         self._repo = Repo.init(repo_dir)
         refs = self._repo.refs
 
-        nines = '9' * 40
-        self.assertEqual('ref: refs/heads/master', refs.read_ref('HEAD'))
-        self.assertFalse('refs/heads/master' in refs)
-        self.assertTrue(refs.add_if_new('HEAD', nines))
-        self.assertEqual('ref: refs/heads/master', refs.read_ref('HEAD'))
-        self.assertEqual(nines, refs['HEAD'])
-        self.assertEqual(nines, refs['refs/heads/master'])
-        self.assertFalse(refs.add_if_new('HEAD', '1' * 40))
-        self.assertEqual(nines, refs['HEAD'])
-        self.assertEqual(nines, refs['refs/heads/master'])
+        nines = b'9' * 40
+        self.assertEqual(b'ref: refs/heads/master', refs.read_ref(b'HEAD'))
+        self.assertFalse(b'refs/heads/master' in refs)
+        self.assertTrue(refs.add_if_new(b'HEAD', nines))
+        self.assertEqual(b'ref: refs/heads/master', refs.read_ref(b'HEAD'))
+        self.assertEqual(nines, refs[b'HEAD'])
+        self.assertEqual(nines, refs[b'refs/heads/master'])
+        self.assertFalse(refs.add_if_new(b'HEAD', b'1' * 40))
+        self.assertEqual(nines, refs[b'HEAD'])
+        self.assertEqual(nines, refs[b'refs/heads/master'])
 
     def test_follow(self):
-        self.assertEqual(
-          ('refs/heads/master', '42d06bd4b77fed026b154d16493e5deab78f02ec'),
-          self._refs._follow('HEAD'))
-        self.assertEqual(
-          ('refs/heads/master', '42d06bd4b77fed026b154d16493e5deab78f02ec'),
-          self._refs._follow('refs/heads/master'))
-        self.assertRaises(KeyError, self._refs._follow, 'refs/heads/loop')
+        self.assertEqual((b'refs/heads/master',
+                          b'42d06bd4b77fed026b154d16493e5deab78f02ec'),
+                         self._refs._follow(b'HEAD'))
+        self.assertEqual((b'refs/heads/master',
+                          b'42d06bd4b77fed026b154d16493e5deab78f02ec'),
+                         self._refs._follow(b'refs/heads/master'))
+        self.assertRaises(KeyError, self._refs._follow, b'refs/heads/loop')
 
     def test_delitem(self):
         RefsContainerTests.test_delitem(self)
         ref_file = os.path.join(self._refs.path, 'refs', 'heads', 'master')
         self.assertFalse(os.path.exists(ref_file))
-        self.assertFalse('refs/heads/master' in self._refs.get_packed_refs())
+        self.assertFalse(b'refs/heads/master' in self._refs.get_packed_refs())
 
     def test_delitem_symbolic(self):
-        self.assertEqual('ref: refs/heads/master',
-                          self._refs.read_loose_ref('HEAD'))
-        del self._refs['HEAD']
-        self.assertRaises(KeyError, lambda: self._refs['HEAD'])
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-                         self._refs['refs/heads/master'])
+        self.assertEqual(b'ref: refs/heads/master',
+                         self._refs.read_loose_ref(b'HEAD'))
+        del self._refs[b'HEAD']
+        self.assertRaises(KeyError, lambda: self._refs[b'HEAD'])
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs[b'refs/heads/master'])
         self.assertFalse(os.path.exists(os.path.join(self._refs.path, 'HEAD')))
 
     def test_remove_if_equals_symref(self):
         # HEAD is a symref, so shouldn't equal its dereferenced value
         self.assertFalse(self._refs.remove_if_equals(
-          'HEAD', '42d06bd4b77fed026b154d16493e5deab78f02ec'))
+            b'HEAD', b'42d06bd4b77fed026b154d16493e5deab78f02ec'))
         self.assertTrue(self._refs.remove_if_equals(
-          'refs/heads/master', '42d06bd4b77fed026b154d16493e5deab78f02ec'))
-        self.assertRaises(KeyError, lambda: self._refs['refs/heads/master'])
+            b'refs/heads/master', b'42d06bd4b77fed026b154d16493e5deab78f02ec'))
+        self.assertRaises(KeyError, lambda: self._refs[b'refs/heads/master'])
 
         # HEAD is now a broken symref
-        self.assertRaises(KeyError, lambda: self._refs['HEAD'])
-        self.assertEqual('ref: refs/heads/master',
-                          self._refs.read_loose_ref('HEAD'))
+        self.assertRaises(KeyError, lambda: self._refs[b'HEAD'])
+        self.assertEqual(b'ref: refs/heads/master',
+                         self._refs.read_loose_ref(b'HEAD'))
 
         self.assertFalse(os.path.exists(
             os.path.join(self._refs.path, 'refs', 'heads', 'master.lock')))
@@ -395,48 +416,47 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         refs_data = f.read()
         f.close()
         f = GitFile(refs_file, 'wb')
-        f.write('\n'.join(l for l in refs_data.split('\n')
-                          if not l or l[0] not in '#^'))
+        f.write(b'\n'.join(l for l in refs_data.split(b'\n')
+                           if not l or l[0] not in b'#^'))
         f.close()
         self._repo = Repo(self._repo.path)
         refs = self._repo.refs
         self.assertTrue(refs.remove_if_equals(
-          'refs/heads/packed', '42d06bd4b77fed026b154d16493e5deab78f02ec'))
+            b'refs/heads/packed', b'42d06bd4b77fed026b154d16493e5deab78f02ec'))
 
     def test_remove_if_equals_packed(self):
         # test removing ref that is only packed
-        self.assertEqual('df6800012397fb85c56e7418dd4eb9405dee075c',
-                         self._refs['refs/tags/refs-0.1'])
+        self.assertEqual(b'df6800012397fb85c56e7418dd4eb9405dee075c',
+                         self._refs[b'refs/tags/refs-0.1'])
         self.assertTrue(
-          self._refs.remove_if_equals('refs/tags/refs-0.1',
-          'df6800012397fb85c56e7418dd4eb9405dee075c'))
-        self.assertRaises(KeyError, lambda: self._refs['refs/tags/refs-0.1'])
+            self._refs.remove_if_equals(
+                b'refs/tags/refs-0.1',
+                b'df6800012397fb85c56e7418dd4eb9405dee075c'))
+        self.assertRaises(KeyError, lambda: self._refs[b'refs/tags/refs-0.1'])
 
     def test_read_ref(self):
-        self.assertEqual('ref: refs/heads/master', self._refs.read_ref("HEAD"))
-        self.assertEqual('42d06bd4b77fed026b154d16493e5deab78f02ec',
-            self._refs.read_ref("refs/heads/packed"))
-        self.assertEqual(None,
-            self._refs.read_ref("nonexistant"))
+        self.assertEqual(b'ref: refs/heads/master', self._refs.read_ref(b'HEAD'))
+        self.assertEqual(b'42d06bd4b77fed026b154d16493e5deab78f02ec',
+                         self._refs.read_ref(b'refs/heads/packed'))
+        self.assertEqual(None, self._refs.read_ref(b'nonexistant'))
 
 
 _TEST_REFS_SERIALIZED = (
-'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa\n'
-'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/master\n'
-'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/packed\n'
-'df6800012397fb85c56e7418dd4eb9405dee075c\trefs/tags/refs-0.1\n'
-'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8\trefs/tags/refs-0.2\n')
+    b'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/40-char-ref-aaaaaaaaaaaaaaaaaa\n'
+    b'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/master\n'
+    b'42d06bd4b77fed026b154d16493e5deab78f02ec\trefs/heads/packed\n'
+    b'df6800012397fb85c56e7418dd4eb9405dee075c\trefs/tags/refs-0.1\n'
+    b'3ec9c43c84ff242e3ef4a9fc5bc111fd780a76a8\trefs/tags/refs-0.2\n')
 
 
-@skipIfPY3
 class InfoRefsContainerTests(TestCase):
 
     def test_invalid_refname(self):
-        text = _TEST_REFS_SERIALIZED + '00' * 20 + '\trefs/stash\n'
+        text = _TEST_REFS_SERIALIZED + b'00' * 20 + b'\trefs/stash\n'
         refs = InfoRefsContainer(BytesIO(text))
         expected_refs = dict(_TEST_REFS)
-        del expected_refs['HEAD']
-        expected_refs["refs/stash"] = "00" * 20
+        del expected_refs[b'HEAD']
+        expected_refs[b'refs/stash'] = b'00' * 20
         self.assertEqual(expected_refs, refs.as_dict())
 
     def test_keys(self):
@@ -444,34 +464,34 @@ class InfoRefsContainerTests(TestCase):
         actual_keys = set(refs.keys())
         self.assertEqual(set(refs.allkeys()), actual_keys)
         # ignore the symref loop if it exists
-        actual_keys.discard('refs/heads/loop')
+        actual_keys.discard(b'refs/heads/loop')
         expected_refs = dict(_TEST_REFS)
-        del expected_refs['HEAD']
-        self.assertEqual(set(expected_refs.iterkeys()), actual_keys)
+        del expected_refs[b'HEAD']
+        self.assertEqual(set(expected_refs.keys()), actual_keys)
 
-        actual_keys = refs.keys('refs/heads')
-        actual_keys.discard('loop')
+        actual_keys = refs.keys(b'refs/heads')
+        actual_keys.discard(b'loop')
         self.assertEqual(
-            ['40-char-ref-aaaaaaaaaaaaaaaaaa', 'master', 'packed'],
+            [b'40-char-ref-aaaaaaaaaaaaaaaaaa', b'master', b'packed'],
             sorted(actual_keys))
-        self.assertEqual(['refs-0.1', 'refs-0.2'],
-                         sorted(refs.keys('refs/tags')))
+        self.assertEqual([b'refs-0.1', b'refs-0.2'],
+                         sorted(refs.keys(b'refs/tags')))
 
     def test_as_dict(self):
         refs = InfoRefsContainer(BytesIO(_TEST_REFS_SERIALIZED))
         # refs/heads/loop does not show up even if it exists
         expected_refs = dict(_TEST_REFS)
-        del expected_refs['HEAD']
+        del expected_refs[b'HEAD']
         self.assertEqual(expected_refs, refs.as_dict())
 
     def test_contains(self):
         refs = InfoRefsContainer(BytesIO(_TEST_REFS_SERIALIZED))
-        self.assertTrue('refs/heads/master' in refs)
-        self.assertFalse('refs/heads/bar' in refs)
+        self.assertTrue(b'refs/heads/master' in refs)
+        self.assertFalse(b'refs/heads/bar' in refs)
 
     def test_get_peeled(self):
         refs = InfoRefsContainer(BytesIO(_TEST_REFS_SERIALIZED))
         # refs/heads/loop does not show up even if it exists
         self.assertEqual(
-            _TEST_REFS['refs/heads/master'],
-            refs.get_peeled('refs/heads/master'))
+            _TEST_REFS[b'refs/heads/master'],
+            refs.get_peeled(b'refs/heads/master'))

+ 2 - 4
dulwich/tests/test_server.py

@@ -59,6 +59,7 @@ from dulwich.tests import TestCase
 from dulwich.tests.utils import (
     make_commit,
     make_object,
+    make_tag,
     skipIfPY3,
     )
 from dulwich.protocol import (
@@ -280,10 +281,7 @@ class FindShallowTests(TestCase):
 
     def test_tag(self):
         c1, c2 = self.make_linear_commits(2)
-        tag = make_object(Tag, name='tag', message='',
-                          tagger='Tagger <test@example.com>',
-                          tag_time=12345, tag_timezone=0,
-                          object=(Commit, c2.id))
+        tag = make_tag(c2, name='tag')
         self._store.add_object(tag)
 
         self.assertEqual((set([c1.id]), set([c2.id])),

+ 89 - 89
dulwich/tests/test_walk.py

@@ -162,111 +162,111 @@ class WalkerTest(TestCase):
         self.assertWalkYields([c3], [c3.id], max_entries=1, reverse=True)
 
     def test_changes_one_parent(self):
-        blob_a1 = make_object(Blob, data='a1')
-        blob_a2 = make_object(Blob, data='a2')
-        blob_b2 = make_object(Blob, data='b2')
+        blob_a1 = make_object(Blob, data=b'a1')
+        blob_a2 = make_object(Blob, data=b'a2')
+        blob_b2 = make_object(Blob, data=b'b2')
         c1, c2 = self.make_linear_commits(
-          2, trees={1: [('a', blob_a1)],
-                    2: [('a', blob_a2), ('b', blob_b2)]})
-        e1 = TestWalkEntry(c1, [TreeChange.add(('a', F, blob_a1.id))])
-        e2 = TestWalkEntry(c2, [TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
-                                           ('a', F, blob_a2.id)),
-                                TreeChange.add(('b', F, blob_b2.id))])
+            2, trees={1: [(b'a', blob_a1)],
+                      2: [(b'a', blob_a2), (b'b', blob_b2)]})
+        e1 = TestWalkEntry(c1, [TreeChange.add((b'a', F, blob_a1.id))])
+        e2 = TestWalkEntry(c2, [TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
+                                           (b'a', F, blob_a2.id)),
+                                TreeChange.add((b'b', F, blob_b2.id))])
         self.assertWalkYields([e2, e1], [c2.id])
 
     def test_changes_multiple_parents(self):
-        blob_a1 = make_object(Blob, data='a1')
-        blob_b2 = make_object(Blob, data='b2')
-        blob_a3 = make_object(Blob, data='a3')
+        blob_a1 = make_object(Blob, data=b'a1')
+        blob_b2 = make_object(Blob, data=b'b2')
+        blob_a3 = make_object(Blob, data=b'a3')
         c1, c2, c3 = self.make_commits(
-          [[1], [2], [3, 1, 2]],
-          trees={1: [('a', blob_a1)], 2: [('b', blob_b2)],
-                 3: [('a', blob_a3), ('b', blob_b2)]})
+            [[1], [2], [3, 1, 2]],
+            trees={1: [(b'a', blob_a1)], 2: [(b'b', blob_b2)],
+                   3: [(b'a', blob_a3), (b'b', blob_b2)]})
         # a is a modify/add conflict and b is not conflicted.
         changes = [[
-          TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id), ('a', F, blob_a3.id)),
-          TreeChange.add(('a', F, blob_a3.id)),
-          ]]
+            TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id), (b'a', F, blob_a3.id)),
+            TreeChange.add((b'a', F, blob_a3.id)),
+        ]]
         self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
                               exclude=[c1.id, c2.id])
 
     def test_path_matches(self):
-        walker = Walker(None, [], paths=['foo', 'bar', 'baz/quux'])
-        self.assertTrue(walker._path_matches('foo'))
-        self.assertTrue(walker._path_matches('foo/a'))
-        self.assertTrue(walker._path_matches('foo/a/b'))
-        self.assertTrue(walker._path_matches('bar'))
-        self.assertTrue(walker._path_matches('baz/quux'))
-        self.assertTrue(walker._path_matches('baz/quux/a'))
+        walker = Walker(None, [], paths=[b'foo', b'bar', b'baz/quux'])
+        self.assertTrue(walker._path_matches(b'foo'))
+        self.assertTrue(walker._path_matches(b'foo/a'))
+        self.assertTrue(walker._path_matches(b'foo/a/b'))
+        self.assertTrue(walker._path_matches(b'bar'))
+        self.assertTrue(walker._path_matches(b'baz/quux'))
+        self.assertTrue(walker._path_matches(b'baz/quux/a'))
 
         self.assertFalse(walker._path_matches(None))
-        self.assertFalse(walker._path_matches('oops'))
-        self.assertFalse(walker._path_matches('fool'))
-        self.assertFalse(walker._path_matches('baz'))
-        self.assertFalse(walker._path_matches('baz/quu'))
+        self.assertFalse(walker._path_matches(b'oops'))
+        self.assertFalse(walker._path_matches(b'fool'))
+        self.assertFalse(walker._path_matches(b'baz'))
+        self.assertFalse(walker._path_matches(b'baz/quu'))
 
     def test_paths(self):
-        blob_a1 = make_object(Blob, data='a1')
-        blob_b2 = make_object(Blob, data='b2')
-        blob_a3 = make_object(Blob, data='a3')
-        blob_b3 = make_object(Blob, data='b3')
+        blob_a1 = make_object(Blob, data=b'a1')
+        blob_b2 = make_object(Blob, data=b'b2')
+        blob_a3 = make_object(Blob, data=b'a3')
+        blob_b3 = make_object(Blob, data=b'b3')
         c1, c2, c3 = self.make_linear_commits(
-          3, trees={1: [('a', blob_a1)],
-                    2: [('a', blob_a1), ('x/b', blob_b2)],
-                    3: [('a', blob_a3), ('x/b', blob_b3)]})
+            3, trees={1: [(b'a', blob_a1)],
+                      2: [(b'a', blob_a1), (b'x/b', blob_b2)],
+                      3: [(b'a', blob_a3), (b'x/b', blob_b3)]})
 
         self.assertWalkYields([c3, c2, c1], [c3.id])
-        self.assertWalkYields([c3, c1], [c3.id], paths=['a'])
-        self.assertWalkYields([c3, c2], [c3.id], paths=['x/b'])
+        self.assertWalkYields([c3, c1], [c3.id], paths=[b'a'])
+        self.assertWalkYields([c3, c2], [c3.id], paths=[b'x/b'])
 
         # All changes are included, not just for requested paths.
         changes = [
-          TreeChange(CHANGE_MODIFY, ('a', F, blob_a1.id),
-                     ('a', F, blob_a3.id)),
-          TreeChange(CHANGE_MODIFY, ('x/b', F, blob_b2.id),
-                     ('x/b', F, blob_b3.id)),
-          ]
+            TreeChange(CHANGE_MODIFY, (b'a', F, blob_a1.id),
+                       (b'a', F, blob_a3.id)),
+            TreeChange(CHANGE_MODIFY, (b'x/b', F, blob_b2.id),
+                       (b'x/b', F, blob_b3.id)),
+        ]
         self.assertWalkYields([TestWalkEntry(c3, changes)], [c3.id],
-                              max_entries=1, paths=['a'])
+                              max_entries=1, paths=[b'a'])
 
     def test_paths_subtree(self):
-        blob_a = make_object(Blob, data='a')
-        blob_b = make_object(Blob, data='b')
+        blob_a = make_object(Blob, data=b'a')
+        blob_b = make_object(Blob, data=b'b')
         c1, c2, c3 = self.make_linear_commits(
-          3, trees={1: [('x/a', blob_a)],
-                    2: [('b', blob_b), ('x/a', blob_a)],
-                    3: [('b', blob_b), ('x/a', blob_a), ('x/b', blob_b)]})
-        self.assertWalkYields([c2], [c3.id], paths=['b'])
-        self.assertWalkYields([c3, c1], [c3.id], paths=['x'])
+            3, trees={1: [(b'x/a', blob_a)],
+                      2: [(b'b', blob_b), (b'x/a', blob_a)],
+                      3: [(b'b', blob_b), (b'x/a', blob_a), (b'x/b', blob_b)]})
+        self.assertWalkYields([c2], [c3.id], paths=[b'b'])
+        self.assertWalkYields([c3, c1], [c3.id], paths=[b'x'])
 
     def test_paths_max_entries(self):
-        blob_a = make_object(Blob, data='a')
-        blob_b = make_object(Blob, data='b')
+        blob_a = make_object(Blob, data=b'a')
+        blob_b = make_object(Blob, data=b'b')
         c1, c2 = self.make_linear_commits(
-          2, trees={1: [('a', blob_a)],
-                    2: [('a', blob_a), ('b', blob_b)]})
-        self.assertWalkYields([c2], [c2.id], paths=['b'], max_entries=1)
-        self.assertWalkYields([c1], [c1.id], paths=['a'], max_entries=1)
+            2, trees={1: [(b'a', blob_a)],
+                      2: [(b'a', blob_a), (b'b', blob_b)]})
+        self.assertWalkYields([c2], [c2.id], paths=[b'b'], max_entries=1)
+        self.assertWalkYields([c1], [c1.id], paths=[b'a'], max_entries=1)
 
     def test_paths_merge(self):
-        blob_a1 = make_object(Blob, data='a1')
-        blob_a2 = make_object(Blob, data='a2')
-        blob_a3 = make_object(Blob, data='a3')
+        blob_a1 = make_object(Blob, data=b'a1')
+        blob_a2 = make_object(Blob, data=b'a2')
+        blob_a3 = make_object(Blob, data=b'a3')
         x1, y2, m3, m4 = self.make_commits(
-          [[1], [2], [3, 1, 2], [4, 1, 2]],
-          trees={1: [('a', blob_a1)],
-                 2: [('a', blob_a2)],
-                 3: [('a', blob_a3)],
-                 4: [('a', blob_a1)]})  # Non-conflicting
-        self.assertWalkYields([m3, y2, x1], [m3.id], paths=['a'])
-        self.assertWalkYields([y2, x1], [m4.id], paths=['a'])
+            [[1], [2], [3, 1, 2], [4, 1, 2]],
+            trees={1: [(b'a', blob_a1)],
+                   2: [(b'a', blob_a2)],
+                   3: [(b'a', blob_a3)],
+                   4: [(b'a', blob_a1)]})  # Non-conflicting
+        self.assertWalkYields([m3, y2, x1], [m3.id], paths=[b'a'])
+        self.assertWalkYields([y2, x1], [m4.id], paths=[b'a'])
 
     def test_changes_with_renames(self):
-        blob = make_object(Blob, data='blob')
+        blob = make_object(Blob, data=b'blob')
         c1, c2 = self.make_linear_commits(
-          2, trees={1: [('a', blob)], 2: [('b', blob)]})
-        entry_a = ('a', F, blob.id)
-        entry_b = ('b', F, blob.id)
+            2, trees={1: [(b'a', blob)], 2: [(b'b', blob)]})
+        entry_a = (b'a', F, blob.id)
+        entry_b = (b'b', F, blob.id)
         changes_without_renames = [TreeChange.delete(entry_a),
                                    TreeChange.add(entry_b)]
         changes_with_renames = [TreeChange(CHANGE_RENAME, entry_a, entry_b)]
@@ -278,37 +278,37 @@ class WalkerTest(TestCase):
           rename_detector=detector)
 
     def test_follow_rename(self):
-        blob = make_object(Blob, data='blob')
-        names = ['a', 'a', 'b', 'b', 'c', 'c']
+        blob = make_object(Blob, data=b'blob')
+        names = [b'a', b'a', b'b', b'b', b'c', b'c']
 
         trees = dict((i + 1, [(n, blob, F)]) for i, n in enumerate(names))
         c1, c2, c3, c4, c5, c6 = self.make_linear_commits(6, trees=trees)
-        self.assertWalkYields([c5], [c6.id], paths=['c'])
+        self.assertWalkYields([c5], [c6.id], paths=[b'c'])
 
         e = lambda n: (n, F, blob.id)
         self.assertWalkYields(
-          [TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e('b'), e('c'))]),
-           TestWalkEntry(c3, [TreeChange(CHANGE_RENAME, e('a'), e('b'))]),
-           TestWalkEntry(c1, [TreeChange.add(e('a'))])],
-          [c6.id], paths=['c'], follow=True)
+            [TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e(b'b'), e(b'c'))]),
+             TestWalkEntry(c3, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'b'))]),
+             TestWalkEntry(c1, [TreeChange.add(e(b'a'))])],
+            [c6.id], paths=[b'c'], follow=True)
 
     def test_follow_rename_remove_path(self):
-        blob = make_object(Blob, data='blob')
+        blob = make_object(Blob, data=b'blob')
         _, _, _, c4, c5, c6 = self.make_linear_commits(
-          6, trees={1: [('a', blob), ('c', blob)],
-                    2: [],
-                    3: [],
-                    4: [('b', blob)],
-                    5: [('a', blob)],
-                    6: [('c', blob)]})
+            6, trees={1: [(b'a', blob), (b'c', blob)],
+                      2: [],
+                      3: [],
+                      4: [(b'b', blob)],
+                      5: [(b'a', blob)],
+                      6: [(b'c', blob)]})
 
         e = lambda n: (n, F, blob.id)
         # Once the path changes to b, we aren't interested in a or c anymore.
         self.assertWalkYields(
-          [TestWalkEntry(c6, [TreeChange(CHANGE_RENAME, e('a'), e('c'))]),
-           TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e('b'), e('a'))]),
-           TestWalkEntry(c4, [TreeChange.add(e('b'))])],
-          [c6.id], paths=['c'], follow=True)
+            [TestWalkEntry(c6, [TreeChange(CHANGE_RENAME, e(b'a'), e(b'c'))]),
+             TestWalkEntry(c5, [TreeChange(CHANGE_RENAME, e(b'b'), e(b'a'))]),
+             TestWalkEntry(c4, [TreeChange.add(e(b'b'))])],
+            [c6.id], paths=[b'c'], follow=True)
 
     def test_since(self):
         c1, c2, c3 = self.make_linear_commits(3)

+ 2 - 6
dulwich/tests/test_web.py

@@ -61,6 +61,7 @@ from dulwich.web import (
 
 from dulwich.tests.utils import (
     make_object,
+    make_tag,
     skipIfPY3,
     )
 
@@ -229,12 +230,7 @@ class DumbHandlersTestCase(WebTestCase):
         blob2 = make_object(Blob, data='2')
         blob3 = make_object(Blob, data='3')
 
-        tag1 = make_object(Tag, name='tag-tag',
-                           tagger='Test <test@example.com>',
-                           tag_time=12345,
-                           tag_timezone=0,
-                           message='message',
-                           object=(Blob, blob2.id))
+        tag1 = make_tag(blob2, name='tag-tag')
 
         objects = [blob1, blob2, blob3, tag1]
         refs = {

+ 28 - 4
dulwich/tests/utils.py

@@ -36,6 +36,8 @@ from dulwich.index import (
 from dulwich.objects import (
     FixedSha,
     Commit,
+    Tag,
+    object_class,
     )
 from dulwich.pack import (
     OFS_DELTA,
@@ -100,6 +102,7 @@ def make_object(cls, **attrs):
         __dict__ instead of __slots__.
         """
         pass
+    TestObject.__name__ = 'TestObject_' + cls.__name__
 
     obj = TestObject()
     for name, value in attrs.items():
@@ -119,19 +122,40 @@ def make_commit(**attrs):
     :return: A newly initialized Commit object.
     """
     default_time = int(time.mktime(datetime.datetime(2010, 1, 1).timetuple()))
-    all_attrs = {'author': 'Test Author <test@nodomain.com>',
+    all_attrs = {'author': b'Test Author <test@nodomain.com>',
                  'author_time': default_time,
                  'author_timezone': 0,
-                 'committer': 'Test Committer <test@nodomain.com>',
+                 'committer': b'Test Committer <test@nodomain.com>',
                  'commit_time': default_time,
                  'commit_timezone': 0,
-                 'message': 'Test message.',
+                 'message': b'Test message.',
                  'parents': [],
-                 'tree': '0' * 40}
+                 'tree': b'0' * 40}
     all_attrs.update(attrs)
     return make_object(Commit, **all_attrs)
 
 
+def make_tag(target, **attrs):
+    """Make a Tag object with a default set of values.
+
+    :param target: object to be tagged (Commit, Blob, Tree, etc)
+    :param attrs: dict of attributes to overwrite from the default values.
+    :return: A newly initialized Tag object.
+    """
+    target_id = target.id
+    target_type = object_class(target.type_name)
+    default_time = int(time.mktime(datetime.datetime(2010, 1, 1).timetuple()))
+    all_attrs = {'tagger': 'Test Author <test@nodomain.com>',
+                 'tag_time': default_time,
+                 'tag_timezone': 0,
+                 'message': 'Test message.',
+                 'object': (target_type, target_id),
+                 'name': 'Test Tag',
+                 }
+    all_attrs.update(attrs)
+    return make_object(Tag, **all_attrs)
+
+
 def functest_builder(method, func):
     """Generate a test method that tests the given function."""
 

+ 1 - 1
dulwich/walk.py

@@ -251,7 +251,7 @@ class Walker(object):
             if changed_path == followed_path:
                 return True
             if (changed_path.startswith(followed_path) and
-                changed_path[len(followed_path)] == '/'):
+                    changed_path[len(followed_path)] == b'/'[0]):
                 return True
         return False
 

+ 2 - 1
setup.py

@@ -8,7 +8,7 @@ except ImportError:
     from distutils.core import setup, Extension
 from distutils.core import Distribution
 
-dulwich_version_string = '0.9.8'
+dulwich_version_string = '0.10.0'
 
 include_dirs = []
 # Windows MSVC support
@@ -76,6 +76,7 @@ setup(name='dulwich',
       in the particular Monty Python sketch.
       """,
       packages=['dulwich', 'dulwich.tests', 'dulwich.tests.compat', 'dulwich.contrib'],
+      package_data={'': ['../docs/tutorial/*.txt']},
       scripts=['bin/dulwich', 'bin/dul-receive-pack', 'bin/dul-upload-pack'],
       classifiers=[
           'Development Status :: 4 - Beta',

Some files were not shown because too many files changed in this diff