Browse Source

Fix handling of stored encoding in dulwich.porcelain.get_object_by_path on Python 3.

Jelmer Vernooij 5 years ago
parent
commit
fa3ad53bc0
3 changed files with 40 additions and 8 deletions
  1. 4 0
      NEWS
  2. 15 7
      dulwich/porcelain.py
  3. 21 1
      dulwich/tests/test_porcelain.py

+ 4 - 0
NEWS

@@ -3,6 +3,10 @@
  * Properly handle files that are just executable for the
    current user. (Jelmer Vernooij, #734)
 
+ * Fix handling of stored encoding in
+   ``dulwich.porcelain.get_object_by_path`` on Python 3.
+   (Jelmer Vernooij)
+
 0.19.14	2019-11-30
 
  * Strip superfluous <> around email. (monnerat)

+ 15 - 7
dulwich/porcelain.py

@@ -523,9 +523,19 @@ rm = remove
 
 
 def commit_decode(commit, contents, default_encoding=DEFAULT_ENCODING):
-    if commit.encoding is not None:
-        return contents.decode(commit.encoding, "replace")
-    return contents.decode(default_encoding, "replace")
+    if commit.encoding:
+        encoding = commit.encoding.decode('ascii')
+    else:
+        encoding = default_encoding
+    return contents.decode(encoding, "replace")
+
+
+def commit_encode(commit, contents, default_encoding=DEFAULT_ENCODING):
+    if commit.encoding:
+        encoding = commit.encoding.decode('ascii')
+    else:
+        encoding = default_encoding
+    return contents.encode(encoding)
 
 
 def print_commit(commit, decode, outstream=sys.stdout):
@@ -604,9 +614,7 @@ def show_commit(repo, commit, decode, outstream=sys.stdout):
         diffstream,
         repo.object_store, base_tree, commit.tree)
     diffstream.seek(0)
-    outstream.write(
-        diffstream.getvalue().decode(
-                commit.encoding or DEFAULT_ENCODING, 'replace'))
+    outstream.write(commit_decode(commit, diffstream.getvalue()))
 
 
 def show_tree(repo, tree, decode, outstream=sys.stdout):
@@ -1565,7 +1573,7 @@ def get_object_by_path(repo, path, committish=None):
         commit = parse_commit(r, committish)
         base_tree = commit.tree
         if not isinstance(path, bytes):
-            path = path.encode(commit.encoding or DEFAULT_ENCODING)
+            path = commit_encode(commit, path)
         (mode, sha) = tree_lookup_path(
             r.object_store.__getitem__,
             base_tree, path)

+ 21 - 1
dulwich/tests/test_porcelain.py

@@ -1770,7 +1770,7 @@ class HelperTests(PorcelainTestCase):
             os.chdir(cwd)
 
 
-class GetObjectBypathTests(PorcelainTestCase):
+class GetObjectByPathTests(PorcelainTestCase):
 
     def test_simple(self):
         fullpath = os.path.join(self.repo.path, 'foo')
@@ -1784,6 +1784,26 @@ class GetObjectBypathTests(PorcelainTestCase):
         self.assertEqual(
             b"BAR",
             porcelain.get_object_by_path(self.repo, 'foo').data)
+        self.assertEqual(
+            b"BAR",
+            porcelain.get_object_by_path(self.repo, b'foo').data)
+
+    def test_encoding(self):
+        fullpath = os.path.join(self.repo.path, 'foo')
+        with open(fullpath, 'w') as f:
+            f.write("BAR")
+        porcelain.add(repo=self.repo.path, paths=[fullpath])
+        porcelain.commit(
+                self.repo.path, message=b"Some message",
+                author=b"Joe <joe@example.com>",
+                committer=b"Bob <bob@example.com>",
+                encoding=b"utf-8")
+        self.assertEqual(
+            b"BAR",
+            porcelain.get_object_by_path(self.repo, 'foo').data)
+        self.assertEqual(
+            b"BAR",
+            porcelain.get_object_by_path(self.repo, b'foo').data)
 
     def test_missing(self):
         self.assertRaises(