浏览代码

Avoid encoding or decoding checkout paths.

* index.build_index_from_tree will not decode tree paths, but will write to the
  file system using the bytes paths.
* Repo.stage will accept either a bytes or string paths. Only if the path is a
  string with the path be encoded with sys.getfilesystemencoding(), otherwise
  the bytes will be used directly to write to the git tree.
Gary van der Merwe 10 年之前
父节点
当前提交
5f3ad83bd0
共有 4 个文件被更改,包括 20 次插入18 次删除
  1. 9 9
      dulwich/index.py
  2. 9 4
      dulwich/repo.py
  3. 1 2
      dulwich/tests/test_index.py
  4. 1 3
      dulwich/tests/test_repository.py

+ 9 - 9
dulwich/index.py

@@ -482,12 +482,13 @@ def build_index_from_tree(prefix, index_path, object_store, tree_id,
     """
 
     index = Index(index_path)
+    if not isinstance(prefix, bytes):
+        prefix = prefix.encode(sys.getfilesystemencoding())
 
     for entry in object_store.iter_tree_contents(tree_id):
         if not validate_path(entry.path, validate_path_element):
             continue
-        full_path = os.path.join(prefix,
-            entry.path.decode(sys.getfilesystemencoding()))
+        full_path = os.path.join(prefix, entry.path)
 
         if not os.path.exists(os.path.dirname(full_path)):
             os.makedirs(os.path.dirname(full_path))
@@ -510,17 +511,13 @@ def blob_from_path_and_stat(path, st):
     :param st: A stat object
     :return: A `Blob` object
     """
+    assert isinstance(path, bytes)
     blob = Blob()
     if not stat.S_ISLNK(st.st_mode):
         with open(path, 'rb') as f:
             blob.data = f.read()
     else:
-        if platform.python_implementation() == 'PyPy':
-            # os.readlink on pypy seems to require bytes
-            # TODO: GaryvdM: test on other pypy configurations,
-            # e.g. windows, pypy3.
-            path = path.encode(sys.getfilesystemencoding())
-        blob.data = os.readlink(path).encode(sys.getfilesystemencoding())
+        blob.data = os.readlink(path)
     return blob
 
 
@@ -532,8 +529,11 @@ def get_unstaged_changes(index, path):
     :return: iterator over paths with unstaged changes
     """
     # For each entry in the index check the sha1 & ensure not staged
+    if not isinstance(path, bytes):
+        path = path.encode(sys.getfilesystemencoding())
+
     for name, entry in index.iteritems():
-        fp = os.path.join(path, name.decode(sys.getfilesystemencoding()))
+        fp = os.path.join(path, name)
         blob = blob_from_path_and_stat(fp, os.lstat(fp))
         if blob.id != entry.sha:
             yield name

+ 9 - 4
dulwich/repo.py

@@ -730,11 +730,14 @@ class Repo(BaseRepo):
         # missing index file, which is treated as empty.
         return not self.bare
 
-    def stage(self, paths, fsencoding=sys.getfilesystemencoding()):
+    def stage(self, paths):
         """Stage a set of paths.
 
         :param paths: List of paths, relative to the repository path
         """
+
+        root_path_bytes = self.path.encode(sys.getfilesystemencoding())
+
         if not isinstance(paths, list):
             paths = [paths]
         from dulwich.index import (
@@ -743,19 +746,21 @@ class Repo(BaseRepo):
             )
         index = self.open_index()
         for path in paths:
-            full_path = os.path.join(self.path, path)
+            if not isinstance(path, bytes):
+                path = path.encode(sys.getfilesystemencoding())
+            full_path = os.path.join(root_path_bytes, path)
             try:
                 st = os.lstat(full_path)
             except OSError:
                 # File no longer exists
                 try:
-                    del index[path.encode(fsencoding)]
+                    del index[path]
                 except KeyError:
                     pass  # already removed
             else:
                 blob = blob_from_path_and_stat(full_path, st)
                 self.object_store.add_object(blob)
-                index[path.encode(fsencoding)] = index_entry_from_stat(st, blob.id, 0)
+                index[path] = index_entry_from_stat(st, blob.id, 0)
         index.write()
 
     def clone(self, target_path, mkdir=True, bare=False,

+ 1 - 2
dulwich/tests/test_index.py

@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # test_index.py -- Tests for the git index
 # encoding: utf-8
 # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
@@ -53,7 +54,6 @@ from dulwich.objects import (
     )
 from dulwich.repo import Repo
 from dulwich.tests import (
-    expectedFailure,
     TestCase,
     skipIf,
     )
@@ -395,7 +395,6 @@ class BuildIndexTests(TestCase):
                 filee.id)
             self.assertFileContents(epath, 'd', symlink=True)
 
-    @expectedFailure
     def test_no_decode_encode(self):
         repo_dir = tempfile.mkdtemp()
         repo_dir_bytes = repo_dir.encode(sys.getfilesystemencoding())

+ 1 - 3
dulwich/tests/test_repository.py

@@ -40,7 +40,6 @@ from dulwich.repo import (
     MemoryRepo,
     )
 from dulwich.tests import (
-    expectedFailure,
     TestCase,
     skipIf,
     )
@@ -54,7 +53,7 @@ missing_sha = b'b91fa4d900e17e99b433218e988c4eb4a3e9a097'
 
 
 def mkdtemp_unicode():
-    suffix = u'déłwíçh'
+    suffix = u'délwíçh'
     return tempfile.mkdtemp(suffix=suffix)
 
 
@@ -746,7 +745,6 @@ class BuildRepoRootTests(TestCase):
         r.stage(['a'])
         r.stage(['a'])  # double-stage a deleted path
 
-    @expectedFailure
     def test_commit_no_encode_decode(self):
         r = self._repo
         repo_path_bytes = r.path.encode(sys.getfilesystemencoding())