6 éve · 9c0ae13b08
--- a/dulwich/config.py
+++ b/dulwich/config.py
@@ -129,7 +129,7 @@ class Config(object):
 
				     def get_boolean(self, section, name, default=None):
			
 
				         """Retrieve a configuration setting as boolean.
			
 
				 
			
 
				-        :param section: Tuple with section name and optional subsection namee
			
 
				+        :param section: Tuple with section name and optional subsection name
			
 
				         :param name: Name of the setting, including section and possible
			
 
				             subsection.
			
 
				         :return: Contents of the setting
			
--- a/dulwich/index.py
+++ b/dulwich/index.py
@@ -604,7 +604,7 @@ def read_submodule_head(path):
 
				         return None
			
 
				 
			
 
				 
			
 
				-def get_unstaged_changes(index, root_path):
			
 
				+def get_unstaged_changes(index, root_path, filter_blob_callback=None):
			
 
				     """Walk through an index and check for differences against working tree.
			
 
				 
			
 
				     :param index: index to check
			
@@ -618,7 +618,12 @@ def get_unstaged_changes(index, root_path):
 
				     for tree_path, entry in index.iteritems():
			
 
				         full_path = _tree_to_fs_path(root_path, tree_path)
			
 
				         try:
			
 
				-            blob = blob_from_path_and_stat(full_path, os.lstat(full_path))
			
 
				+            blob = blob_from_path_and_stat(
			
 
				+                full_path, os.lstat(full_path)
			
 
				+            )
			
 
				+
			
 
				+            if filter_blob_callback is not None:
			
 
				+                blob = filter_blob_callback(blob, tree_path)
			
 
				         except OSError as e:
			
 
				             if e.errno != errno.ENOENT:
			
 
				                 raise
			
--- a/dulwich/line_ending.py
+++ b/dulwich/line_ending.py
@@ -126,6 +126,9 @@ Sources:
 
				 - https://adaptivepatchwork.com/2012/03/01/mind-the-end-of-your-line/
			
 
				 """
			
 
				 
			
 
				+from dulwich.objects import Blob
			
 
				+from dulwich.patch import is_binary
			
 
				+
			
 
				 CRLF = b"\r\n"
			
 
				 LF = b"\n"
			
 
				 
			
@@ -150,6 +153,24 @@ def convert_lf_to_crlf(text_hunk):
 
				     return intermediary.replace(LF, CRLF)
			
 
				 
			
 
				 
			
 
				+def get_checkout_filter(core_eol, core_autocrlf, git_attributes):
			
 
				+    """ Returns the correct checkout filter based on the passed arguments
			
 
				+    """
			
 
				+    # TODO this function should process the git_attributes for the path and if
			
 
				+    # the text attribute is not defined, fallback on the
			
 
				+    # get_checkout_filter_autocrlf function with the autocrlf value
			
 
				+    return get_checkout_filter_autocrlf(core_autocrlf)
			
 
				+
			
 
				+
			
 
				+def get_checkin_filter(core_eol, core_autocrlf, git_attributes):
			
 
				+    """ Returns the correct checkin filter based on the passed arguments
			
 
				+    """
			
 
				+    # TODO this function should process the git_attributes for the path and if
			
 
				+    # the text attribute is not defined, fallback on the
			
 
				+    # get_checkin_filter_autocrlf function with the autocrlf value
			
 
				+    return get_checkin_filter_autocrlf(core_autocrlf)
			
 
				+
			
 
				+
			
 
				 def get_checkout_filter_autocrlf(core_autocrlf):
			
 
				     """ Returns the correct checkout filter base on autocrlf value
			
 
				 
			
@@ -179,3 +200,75 @@ def get_checkin_filter_autocrlf(core_autocrlf):
 
				 
			
 
				     # Checking filter should never be `convert_lf_to_crlf`
			
 
				     return None
			
 
				+
			
 
				+
			
 
				+class BlobNormalizer(object):
			
 
				+    """ An object to store computation result of which filter to apply based
			
 
				+    on configuration, gitattributes, path and operation (checkin or checkout)
			
 
				+    """
			
 
				+
			
 
				+    def __init__(self, config_stack, gitattributes):
			
 
				+        self.config_stack = config_stack
			
 
				+        self.gitattributes = gitattributes
			
 
				+
			
 
				+        # Compute which filters we needs based on parameters
			
 
				+        try:
			
 
				+            core_eol = config_stack.get("core", "eol")
			
 
				+        except KeyError:
			
 
				+            core_eol = "native"
			
 
				+
			
 
				+        try:
			
 
				+            core_autocrlf = config_stack.get("core", "autocrlf").lower()
			
 
				+        except KeyError:
			
 
				+            core_autocrlf = False
			
 
				+
			
 
				+        self.fallback_read_filter = get_checkout_filter(
			
 
				+            core_eol, core_autocrlf, self.gitattributes
			
 
				+        )
			
 
				+        self.fallback_write_filter = get_checkin_filter(
			
 
				+            core_eol, core_autocrlf, self.gitattributes
			
 
				+        )
			
 
				+
			
 
				+    def checkin_normalize(self, blob, tree_path):
			
 
				+        """ Normalize a blob during a checkin operation
			
 
				+        """
			
 
				+        if self.fallback_write_filter is not None:
			
 
				+            return normalize_blob(
			
 
				+                blob, self.fallback_write_filter, binary_detection=False
			
 
				+            )
			
 
				+
			
 
				+        return blob
			
 
				+
			
 
				+    def checkout_normalize(self, blob, tree_path):
			
 
				+        """ Normalize a blob during a checkout operation
			
 
				+        """
			
 
				+        if self.fallback_read_filter is not None:
			
 
				+            return normalize_blob(
			
 
				+                blob, self.fallback_read_filter, binary_detection=False
			
 
				+            )
			
 
				+
			
 
				+        return blob
			
 
				+
			
 
				+
			
 
				+def normalize_blob(blob, conversion, binary_detection):
			
 
				+    """ Takes a blob as input returns either the original blob if
			
 
				+    binary_detection is True and the blob content looks like binary, else
			
 
				+    return a new blob with converted data
			
 
				+    """
			
 
				+    # Read the original blob
			
 
				+    data = blob.data
			
 
				+
			
 
				+    # If we need to detect if a file is binary and the file is detected as
			
 
				+    # binary, do not apply the conversion function and return the original
			
 
				+    # chunked text
			
 
				+    if binary_detection is True:
			
 
				+        if is_binary(data):
			
 
				+            return blob
			
 
				+
			
 
				+    # Now apply the conversion
			
 
				+    converted_data = conversion(data)
			
 
				+
			
 
				+    new_blob = Blob()
			
 
				+    new_blob.data = converted_data
			
 
				+
			
 
				+    return new_blob
			
--- a/dulwich/porcelain.py
+++ b/dulwich/porcelain.py
@@ -882,7 +882,11 @@ def status(repo=".", ignored=False):
 
				         tracked_changes = get_tree_changes(r)
			
 
				         # 2. Get status of unstaged
			
 
				         index = r.open_index()
			
 
				-        unstaged_changes = list(get_unstaged_changes(index, r.path))
			
 
				+        normalizer = r.get_blob_normalizer()
			
 
				+        filter_callback = normalizer.checkin_normalize
			
 
				+        unstaged_changes = list(
			
 
				+            get_unstaged_changes(index, r.path, filter_callback)
			
 
				+        )
			
 
				         ignore_manager = IgnoreFilterManager.from_repo(r)
			
 
				         untracked_paths = get_untracked_paths(r.path, r.path, index)
			
 
				         if ignored:
			
--- a/dulwich/repo.py
+++ b/dulwich/repo.py
@@ -72,6 +72,8 @@ from dulwich.hooks import (
 
				     CommitMsgShellHook,
			
 
				     )
			
 
				 
			
 
				+from dulwich.line_ending import BlobNormalizer
			
 
				+
			
 
				 from dulwich.refs import (  # noqa: F401
			
 
				     ANNOTATED_TAG_SUFFIX,
			
 
				     check_ref_format,
			
@@ -1020,6 +1022,7 @@ class Repo(BaseRepo):
 
				             _fs_to_tree_path,
			
 
				             )
			
 
				         index = self.open_index()
			
 
				+        blob_normalizer = self.get_blob_normalizer()
			
 
				         for fs_path in fs_paths:
			
 
				             if not isinstance(fs_path, bytes):
			
 
				                 fs_path = fs_path.encode(sys.getfilesystemencoding())
			
@@ -1040,6 +1043,7 @@ class Repo(BaseRepo):
 
				             else:
			
 
				                 if not stat.S_ISDIR(st.st_mode):
			
 
				                     blob = blob_from_path_and_stat(full_path, st)
			
 
				+                    blob = blob_normalizer.checkin_normalize(blob, fs_path)
			
 
				                     self.object_store.add_object(blob)
			
 
				                     index[tree_path] = index_entry_from_stat(st, blob.id, 0)
			
 
				                 else:
			
@@ -1261,6 +1265,13 @@ class Repo(BaseRepo):
 
				     def __exit__(self, exc_type, exc_val, exc_tb):
			
 
				         self.close()
			
 
				 
			
 
				+    def get_blob_normalizer(self):
			
 
				+        """ Return a BlobNormalizer object
			
 
				+        """
			
 
				+        # TODO Parse the git attributes files
			
 
				+        git_attributes = {}
			
 
				+        return BlobNormalizer(self.get_config_stack(), git_attributes)
			
 
				+
			
 
				 
			
 
				 class MemoryRepo(BaseRepo):
			
 
				     """Repo that stores refs, objects, and named files in memory.
			
--- a/dulwich/tests/test_line_ending.py
+++ b/dulwich/tests/test_line_ending.py
@@ -23,11 +23,13 @@
 
				 """Tests for the line ending conversion."""
			
 
				 
			
 
				 from dulwich.line_ending import (
			
 
				+    normalize_blob,
			
 
				     convert_crlf_to_lf,
			
 
				     convert_lf_to_crlf,
			
 
				     get_checkin_filter_autocrlf,
			
 
				     get_checkout_filter_autocrlf,
			
 
				 )
			
 
				+from dulwich.objects import Blob
			
 
				 from dulwich.tests import TestCase
			
 
				 
			
 
				 
			
@@ -92,3 +94,113 @@ class GetLineEndingAutocrlfFilters(TestCase):
 
				         checkout_filter = get_checkout_filter_autocrlf(b"input")
			
 
				 
			
 
				         self.assertEqual(checkout_filter, None)
			
 
				+
			
 
				+
			
 
				+class NormalizeBlobTestCase(TestCase):
			
 
				+    def test_normalize_to_lf_no_op(self):
			
 
				+        base_content = b"line1\nline2"
			
 
				+        base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
			
 
				+
			
 
				+        base_blob = Blob()
			
 
				+        base_blob.set_raw_string(base_content)
			
 
				+
			
 
				+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+        filtered_blob = normalize_blob(
			
 
				+            base_blob, convert_crlf_to_lf, binary_detection=False
			
 
				+        )
			
 
				+
			
 
				+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+    def test_normalize_to_lf(self):
			
 
				+        base_content = b"line1\r\nline2"
			
 
				+        base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
			
 
				+
			
 
				+        base_blob = Blob()
			
 
				+        base_blob.set_raw_string(base_content)
			
 
				+
			
 
				+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+        filtered_blob = normalize_blob(
			
 
				+            base_blob, convert_crlf_to_lf, binary_detection=False
			
 
				+        )
			
 
				+
			
 
				+        normalized_content = b"line1\nline2"
			
 
				+        normalized_sha = "f8be7bb828880727816015d21abcbc37d033f233"
			
 
				+
			
 
				+        self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
			
 
				+        self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
			
 
				+
			
 
				+    def test_normalize_to_lf_binary(self):
			
 
				+        base_content = b"line1\r\nline2\0"
			
 
				+        base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
			
 
				+
			
 
				+        base_blob = Blob()
			
 
				+        base_blob.set_raw_string(base_content)
			
 
				+
			
 
				+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+        filtered_blob = normalize_blob(
			
 
				+            base_blob, convert_crlf_to_lf, binary_detection=True
			
 
				+        )
			
 
				+
			
 
				+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+    def test_normalize_to_crlf_no_op(self):
			
 
				+        base_content = b"line1\r\nline2"
			
 
				+        base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
			
 
				+
			
 
				+        base_blob = Blob()
			
 
				+        base_blob.set_raw_string(base_content)
			
 
				+
			
 
				+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+        filtered_blob = normalize_blob(
			
 
				+            base_blob, convert_lf_to_crlf, binary_detection=False
			
 
				+        )
			
 
				+
			
 
				+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+    def test_normalize_to_crlf(self):
			
 
				+        base_content = b"line1\nline2"
			
 
				+        base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
			
 
				+
			
 
				+        base_blob = Blob()
			
 
				+        base_blob.set_raw_string(base_content)
			
 
				+
			
 
				+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+        filtered_blob = normalize_blob(
			
 
				+            base_blob, convert_lf_to_crlf, binary_detection=False
			
 
				+        )
			
 
				+
			
 
				+        normalized_content = b"line1\r\nline2"
			
 
				+        normalized_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
			
 
				+
			
 
				+        self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
			
 
				+        self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
			
 
				+
			
 
				+    def test_normalize_to_crlf_binary(self):
			
 
				+        base_content = b"line1\r\nline2\0"
			
 
				+        base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
			
 
				+
			
 
				+        base_blob = Blob()
			
 
				+        base_blob.set_raw_string(base_content)
			
 
				+
			
 
				+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
			
 
				+
			
 
				+        filtered_blob = normalize_blob(
			
 
				+            base_blob, convert_lf_to_crlf, binary_detection=True
			
 
				+        )
			
 
				+
			
 
				+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
			
 
				+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
			
--- a/dulwich/tests/test_porcelain.py
+++ b/dulwich/tests/test_porcelain.py
@@ -341,6 +341,26 @@ class AddTests(PorcelainTestCase):
 
				             paths=["../foo"])
			
 
				         self.assertEqual([], list(self.repo.open_index()))
			
 
				 
			
 
				+    def test_add_file_clrf_conversion(self):
			
 
				+        # Set the right configuration to the repo
			
 
				+        c = self.repo.get_config()
			
 
				+        c.set("core", "autocrlf", "input")
			
 
				+        c.write_to_path()
			
 
				+
			
 
				+        # Add a file with CRLF line-ending
			
 
				+        fullpath = os.path.join(self.repo.path, 'foo')
			
 
				+        with open(fullpath, 'wb') as f:
			
 
				+            f.write(b"line1\r\nline2")
			
 
				+        porcelain.add(self.repo.path, paths=[fullpath])
			
 
				+
			
 
				+        # The line-endings should have been converted to LF
			
 
				+        index = self.repo.open_index()
			
 
				+        self.assertIn(b"foo", index)
			
 
				+
			
 
				+        entry = index[b"foo"]
			
 
				+        blob = self.repo[entry.sha]
			
 
				+        self.assertEqual(blob.data, b"line1\nline2")
			
 
				+
			
 
				 
			
 
				 class RemoveTests(PorcelainTestCase):
			
 
				 
			
@@ -908,6 +928,57 @@ class StatusTests(PorcelainTestCase):
 
				         self.assertListEqual(results.unstaged, [b'blye'])
			
 
				         self.assertListEqual(results.untracked, ['blyat'])
			
 
				 
			
 
				+    def test_status_crlf_mismatch(self):
			
 
				+        # First make a commit as if the file has been added on a Linux system
			
 
				+        # or with core.autocrlf=True
			
 
				+        file_path = os.path.join(self.repo.path, 'crlf')
			
 
				+        with open(file_path, 'wb') as f:
			
 
				+            f.write(b'line1\nline2')
			
 
				+        porcelain.add(repo=self.repo.path, paths=[file_path])
			
 
				+        porcelain.commit(repo=self.repo.path, message=b'test status',
			
 
				+                         author=b'author <email>',
			
 
				+                         committer=b'committer <email>')
			
 
				+
			
 
				+        # Then update the file as if it was created by CGit on a Windows
			
 
				+        # system with core.autocrlf=true
			
 
				+        with open(file_path, 'wb') as f:
			
 
				+            f.write(b'line1\r\nline2')
			
 
				+
			
 
				+        results = porcelain.status(self.repo)
			
 
				+        self.assertDictEqual(
			
 
				+            {'add': [], 'delete': [], 'modify': []},
			
 
				+            results.staged)
			
 
				+        self.assertListEqual(results.unstaged, [b'crlf'])
			
 
				+        self.assertListEqual(results.untracked, [])
			
 
				+
			
 
				+    def test_status_crlf_convert(self):
			
 
				+        # First make a commit as if the file has been added on a Linux system
			
 
				+        # or with core.autocrlf=True
			
 
				+        file_path = os.path.join(self.repo.path, 'crlf')
			
 
				+        with open(file_path, 'wb') as f:
			
 
				+            f.write(b'line1\nline2')
			
 
				+        porcelain.add(repo=self.repo.path, paths=[file_path])
			
 
				+        porcelain.commit(repo=self.repo.path, message=b'test status',
			
 
				+                         author=b'author <email>',
			
 
				+                         committer=b'committer <email>')
			
 
				+
			
 
				+        # Then update the file as if it was created by CGit on a Windows
			
 
				+        # system with core.autocrlf=true
			
 
				+        with open(file_path, 'wb') as f:
			
 
				+            f.write(b'line1\r\nline2')
			
 
				+
			
 
				+        # TODO: It should be set automatically by looking at the configuration
			
 
				+        c = self.repo.get_config()
			
 
				+        c.set("core", "autocrlf", True)
			
 
				+        c.write_to_path()
			
 
				+
			
 
				+        results = porcelain.status(self.repo)
			
 
				+        self.assertDictEqual(
			
 
				+            {'add': [], 'delete': [], 'modify': []},
			
 
				+            results.staged)
			
 
				+        self.assertListEqual(results.unstaged, [])
			
 
				+        self.assertListEqual(results.untracked, [])
			
 
				+
			
 
				     def test_get_tree_changes_add(self):
			
 
				         """Unit test for get_tree_changes add."""