Преглед изворни кода

Add a normalize_blob function

It takes a blob as input, will check if the blob content is binary and if not,
apply the convert function and finally returns a new blob.
Boris Feld пре 6 година
родитељ
комит
bacb019963
2 измењених фајлова са 104 додато и 0 уклоњено
  1. 26 0
      dulwich/line_ending.py
  2. 78 0
      dulwich/tests/test_line_ending.py

+ 26 - 0
dulwich/line_ending.py

@@ -126,6 +126,8 @@ Sources:
 - https://adaptivepatchwork.com/2012/03/01/mind-the-end-of-your-line/
 """
 
+from dulwich.objects import Blob
+
 CRLF = b"\r\n"
 LF = b"\n"
 
@@ -179,3 +181,27 @@ def get_checkin_filter_autocrlf(core_autocrlf):
 
     # Checking filter should never be `convert_lf_to_crlf`
     return None
+
+
+def normalize_blob(blob, conversion, binary_detection):
+    """ Takes a blob as input returns either the original blob if
+    binary_detection is True and the blob content looks like binary, else
+    return a new blob with converted data
+    """
+    # Read the original blob
+    data = blob.data
+
+    # If we need to detect if a file is binary and the file is detected as
+    # binary, do not apply the conversion function and return the original
+    # chunked text
+    if binary_detection is True:
+        if is_binary(data):
+            return blob
+
+    # Now apply the conversion
+    converted_data = conversion(data)
+
+    new_blob = Blob()
+    new_blob.data = converted_data
+
+    return new_blob

+ 78 - 0
dulwich/tests/test_line_ending.py

@@ -23,11 +23,13 @@
 """Tests for the line ending conversion."""
 
 from dulwich.line_ending import (
+    normalize_blob,
     convert_crlf_to_lf,
     convert_lf_to_crlf,
     get_checkin_filter_autocrlf,
     get_checkout_filter_autocrlf,
 )
+from dulwich.objects import Blob
 from dulwich.tests import TestCase
 
 
@@ -92,3 +94,79 @@ class GetLineEndingAutocrlfFilters(TestCase):
         checkout_filter = get_checkout_filter_autocrlf(b"input")
 
         self.assertEqual(checkout_filter, None)
+
+
+class NormalizeBlobTestCase(TestCase):
+    def test_normalize_to_lf_no_op(self):
+        base_content = b"line1\nline2"
+        base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_crlf_to_lf, binary_detection=False
+        )
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
+
+    def test_normalize_to_lf(self):
+        base_content = b"line1\r\nline2"
+        base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_crlf_to_lf, binary_detection=False
+        )
+
+        normalized_content = b"line1\nline2"
+        normalized_sha = "f8be7bb828880727816015d21abcbc37d033f233"
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
+
+    def test_normalize_to_crlf_no_op(self):
+        base_content = b"line1\r\nline2"
+        base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_lf_to_crlf, binary_detection=False
+        )
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
+
+    def test_normalize_to_crlf(self):
+        base_content = b"line1\nline2"
+        base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
+
+        base_blob = Blob()
+        base_blob.set_raw_string(base_content)
+
+        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
+        self.assertEqual(base_blob.sha().hexdigest(), base_sha)
+
+        filtered_blob = normalize_blob(
+            base_blob, convert_lf_to_crlf, binary_detection=False
+        )
+
+        normalized_content = b"line1\r\nline2"
+        normalized_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
+
+        self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
+        self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)