123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456 |
- # test_line_ending.py -- Tests for the line ending functions
- # Copyright (C) 2018-2019 Boris Feld <boris.feld@comet.ml>
- #
- # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
- # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
- # General Public License as published by the Free Software Foundation; version 2.0
- # or (at your option) any later version. You can redistribute it and/or
- # modify it under the terms of either of these two licenses.
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- # You should have received a copy of the licenses; if not, see
- # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
- # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
- # License, Version 2.0.
- #
- """Tests for the line ending conversion."""
- from dulwich.line_ending import (
- BlobNormalizer,
- LineEndingFilter,
- convert_crlf_to_lf,
- convert_lf_to_crlf,
- get_clean_filter_autocrlf,
- get_smudge_filter_autocrlf,
- normalize_blob,
- )
- from dulwich.objects import Blob
- from . import TestCase
- class LineEndingConversion(TestCase):
- """Test the line ending conversion functions in various cases."""
- def test_convert_crlf_to_lf_no_op(self) -> None:
- self.assertEqual(convert_crlf_to_lf(b"foobar"), b"foobar")
- def test_convert_crlf_to_lf(self) -> None:
- self.assertEqual(convert_crlf_to_lf(b"line1\r\nline2"), b"line1\nline2")
- def test_convert_crlf_to_lf_mixed(self) -> None:
- self.assertEqual(convert_crlf_to_lf(b"line1\r\n\nline2"), b"line1\n\nline2")
- def test_convert_lf_to_crlf_no_op(self) -> None:
- self.assertEqual(convert_lf_to_crlf(b"foobar"), b"foobar")
- def test_convert_lf_to_crlf(self) -> None:
- self.assertEqual(convert_lf_to_crlf(b"line1\nline2"), b"line1\r\nline2")
- def test_convert_lf_to_crlf_mixed(self) -> None:
- self.assertEqual(convert_lf_to_crlf(b"line1\r\n\nline2"), b"line1\r\n\r\nline2")
- class GetLineEndingAutocrlfFilters(TestCase):
- def test_get_clean_filter_autocrlf_default(self) -> None:
- clean_filter = get_clean_filter_autocrlf(b"false")
- self.assertEqual(clean_filter, None)
- def test_get_clean_filter_autocrlf_true(self) -> None:
- clean_filter = get_clean_filter_autocrlf(b"true")
- self.assertEqual(clean_filter, convert_crlf_to_lf)
- def test_get_clean_filter_autocrlf_input(self) -> None:
- clean_filter = get_clean_filter_autocrlf(b"input")
- self.assertEqual(clean_filter, convert_crlf_to_lf)
- def test_get_smudge_filter_autocrlf_default(self) -> None:
- smudge_filter = get_smudge_filter_autocrlf(b"false")
- self.assertEqual(smudge_filter, None)
- def test_get_smudge_filter_autocrlf_true(self) -> None:
- smudge_filter = get_smudge_filter_autocrlf(b"true")
- self.assertEqual(smudge_filter, convert_lf_to_crlf)
- def test_get_smudge_filter_autocrlf_input(self) -> None:
- smudge_filter = get_smudge_filter_autocrlf(b"input")
- self.assertEqual(smudge_filter, None)
- class NormalizeBlobTestCase(TestCase):
- def test_normalize_to_lf_no_op(self) -> None:
- base_content = b"line1\nline2"
- base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
- base_blob = Blob()
- base_blob.set_raw_string(base_content)
- self.assertEqual(base_blob.as_raw_chunks(), [base_content])
- self.assertEqual(base_blob.sha().hexdigest(), base_sha)
- filtered_blob = normalize_blob(
- base_blob, convert_crlf_to_lf, binary_detection=False
- )
- self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
- self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
- def test_normalize_to_lf(self) -> None:
- base_content = b"line1\r\nline2"
- base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
- base_blob = Blob()
- base_blob.set_raw_string(base_content)
- self.assertEqual(base_blob.as_raw_chunks(), [base_content])
- self.assertEqual(base_blob.sha().hexdigest(), base_sha)
- filtered_blob = normalize_blob(
- base_blob, convert_crlf_to_lf, binary_detection=False
- )
- normalized_content = b"line1\nline2"
- normalized_sha = "f8be7bb828880727816015d21abcbc37d033f233"
- self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
- self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
- def test_normalize_to_lf_binary(self) -> None:
- base_content = b"line1\r\nline2\0"
- base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
- base_blob = Blob()
- base_blob.set_raw_string(base_content)
- self.assertEqual(base_blob.as_raw_chunks(), [base_content])
- self.assertEqual(base_blob.sha().hexdigest(), base_sha)
- filtered_blob = normalize_blob(
- base_blob, convert_crlf_to_lf, binary_detection=True
- )
- self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
- self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
- def test_normalize_to_crlf_no_op(self) -> None:
- base_content = b"line1\r\nline2"
- base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
- base_blob = Blob()
- base_blob.set_raw_string(base_content)
- self.assertEqual(base_blob.as_raw_chunks(), [base_content])
- self.assertEqual(base_blob.sha().hexdigest(), base_sha)
- filtered_blob = normalize_blob(
- base_blob, convert_lf_to_crlf, binary_detection=False
- )
- self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
- self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
- def test_normalize_to_crlf(self) -> None:
- base_content = b"line1\nline2"
- base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
- base_blob = Blob()
- base_blob.set_raw_string(base_content)
- self.assertEqual(base_blob.as_raw_chunks(), [base_content])
- self.assertEqual(base_blob.sha().hexdigest(), base_sha)
- filtered_blob = normalize_blob(
- base_blob, convert_lf_to_crlf, binary_detection=False
- )
- normalized_content = b"line1\r\nline2"
- normalized_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
- self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
- self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
- def test_normalize_to_crlf_binary(self) -> None:
- base_content = b"line1\r\nline2\0"
- base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
- base_blob = Blob()
- base_blob.set_raw_string(base_content)
- self.assertEqual(base_blob.as_raw_chunks(), [base_content])
- self.assertEqual(base_blob.sha().hexdigest(), base_sha)
- filtered_blob = normalize_blob(
- base_blob, convert_lf_to_crlf, binary_detection=True
- )
- self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
- self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
- class LineEndingFilterTests(TestCase):
- """Test the LineEndingFilter class."""
- def test_clean_no_conversion(self) -> None:
- """Test clean with no conversion function."""
- filter = LineEndingFilter()
- data = b"test\r\ndata"
- self.assertEqual(filter.clean(data), data)
- def test_clean_with_conversion(self) -> None:
- """Test clean with CRLF to LF conversion."""
- filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf)
- data = b"test\r\ndata"
- self.assertEqual(filter.clean(data), b"test\ndata")
- def test_clean_binary_detection(self) -> None:
- """Test clean skips binary files."""
- filter = LineEndingFilter(
- clean_conversion=convert_crlf_to_lf, binary_detection=True
- )
- # Binary data with null byte
- data = b"test\r\n\x00data"
- self.assertEqual(filter.clean(data), data) # Should not convert
- def test_smudge_no_conversion(self) -> None:
- """Test smudge with no conversion function."""
- filter = LineEndingFilter()
- data = b"test\ndata"
- self.assertEqual(filter.smudge(data), data)
- def test_smudge_with_conversion(self) -> None:
- """Test smudge with LF to CRLF conversion."""
- filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf)
- data = b"test\ndata"
- self.assertEqual(filter.smudge(data), b"test\r\ndata")
- def test_smudge_binary_detection(self) -> None:
- """Test smudge skips binary files."""
- filter = LineEndingFilter(
- smudge_conversion=convert_lf_to_crlf, binary_detection=True
- )
- # Binary data with null byte
- data = b"test\n\x00data"
- self.assertEqual(filter.smudge(data), data) # Should not convert
- class BlobNormalizerTests(TestCase):
- """Test the BlobNormalizer class integration with filters."""
- def setUp(self) -> None:
- super().setUp()
- from dulwich.config import ConfigDict
- self.config = ConfigDict()
- self.gitattributes = {}
- def test_autocrlf_true_checkin(self) -> None:
- """Test checkin with autocrlf=true."""
- normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
- # Create blob with CRLF
- blob = Blob()
- blob.data = b"line1\r\nline2\r\n"
- # Should convert to LF on checkin
- result = normalizer.checkin_normalize(blob, b"test.txt")
- self.assertEqual(result.data, b"line1\nline2\n")
- def test_autocrlf_true_checkout(self) -> None:
- """Test checkout with autocrlf=true."""
- normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
- # Create blob with LF
- blob = Blob()
- blob.data = b"line1\nline2\n"
- # Should convert to CRLF on checkout
- result = normalizer.checkout_normalize(blob, b"test.txt")
- self.assertEqual(result.data, b"line1\r\nline2\r\n")
- def test_autocrlf_input_checkin(self) -> None:
- """Test checkin with autocrlf=input."""
- normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
- # Create blob with CRLF
- blob = Blob()
- blob.data = b"line1\r\nline2\r\n"
- # Should convert to LF on checkin
- result = normalizer.checkin_normalize(blob, b"test.txt")
- self.assertEqual(result.data, b"line1\nline2\n")
- def test_autocrlf_input_checkout(self) -> None:
- """Test checkout with autocrlf=input."""
- normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
- # Create blob with LF
- blob = Blob()
- blob.data = b"line1\nline2\n"
- # Should NOT convert on checkout with input mode
- result = normalizer.checkout_normalize(blob, b"test.txt")
- self.assertIs(result, blob) # Same object, no conversion
- def test_autocrlf_false(self) -> None:
- """Test with autocrlf=false (no conversion)."""
- normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
- # Create blob with mixed line endings
- blob = Blob()
- blob.data = b"line1\r\nline2\nline3"
- # Should not convert on either operation
- result = normalizer.checkin_normalize(blob, b"test.txt")
- self.assertIs(result, blob)
- result = normalizer.checkout_normalize(blob, b"test.txt")
- self.assertIs(result, blob)
- def test_gitattributes_text_attr(self) -> None:
- """Test gitattributes text attribute overrides autocrlf."""
- # Set gitattributes to force text conversion
- self.gitattributes[b"*.txt"] = {b"text": True}
- # Even with autocrlf=false, should convert based on gitattributes
- normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
- blob = Blob()
- blob.data = b"line1\r\nline2\r\n"
- # Should still convert because of gitattributes
- result = normalizer.checkin_normalize(blob, b"test.txt")
- # Note: with just text=true and no eol setting, it follows platform defaults
- # For checkin, it should always normalize to LF
- self.assertIsNot(result, blob)
- def test_gitattributes_binary_attr(self) -> None:
- """Test gitattributes -text attribute prevents conversion."""
- # Set gitattributes to force binary (no conversion)
- self.gitattributes[b"*.bin"] = {b"text": False}
- normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
- blob = Blob()
- blob.data = b"line1\r\nline2\r\n"
- # Should not convert despite autocrlf=true
- result = normalizer.checkin_normalize(blob, b"test.bin")
- self.assertIs(result, blob)
- def test_binary_file_detection(self) -> None:
- """Test that binary files are not converted."""
- normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
- # Create blob with binary content
- blob = Blob()
- blob.data = b"line1\r\n\x00\xffbinary\r\ndata"
- # Should not convert binary files
- result = normalizer.checkin_normalize(blob, b"binary.dat")
- self.assertIs(result, blob)
- result = normalizer.checkout_normalize(blob, b"binary.dat")
- self.assertIs(result, blob)
- class LineEndingIntegrationTests(TestCase):
- """Integration tests for line ending conversion with the filter system."""
- def setUp(self) -> None:
- super().setUp()
- from dulwich.config import ConfigDict
- from dulwich.filters import FilterRegistry
- self.config = ConfigDict()
- self.registry = FilterRegistry(self.config)
- def test_filter_registry_with_line_endings(self) -> None:
- """Test that line ending filters work through the registry."""
- # Register a custom text filter that does line ending conversion
- filter = LineEndingFilter(
- clean_conversion=convert_crlf_to_lf,
- smudge_conversion=convert_lf_to_crlf,
- binary_detection=True,
- )
- self.registry.register_driver("text", filter)
- # Set up gitattributes
- # Create GitAttributes
- from dulwich.attrs import GitAttributes, Pattern
- patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})]
- gitattributes = GitAttributes(patterns)
- # Create normalizer
- from dulwich.filters import FilterBlobNormalizer
- normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
- # Test round trip
- blob = Blob()
- blob.data = b"Hello\r\nWorld\r\n"
- # Checkin should convert CRLF to LF
- checked_in = normalizer.checkin_normalize(blob, b"test.txt")
- self.assertEqual(checked_in.data, b"Hello\nWorld\n")
- # Checkout should convert LF to CRLF
- checked_out = normalizer.checkout_normalize(checked_in, b"test.txt")
- self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n")
- def test_mixed_filters(self) -> None:
- """Test multiple filters can coexist (line endings and LFS)."""
- # This would be a more complex test requiring LFS setup
- # For now, just verify the structure works
- text_filter = LineEndingFilter(
- clean_conversion=convert_crlf_to_lf,
- smudge_conversion=convert_lf_to_crlf,
- )
- self.registry.register_driver("text", text_filter)
- # Mock LFS filter
- class MockLFSFilter:
- def clean(self, data):
- return b"LFS pointer"
- def smudge(self, data):
- return b"LFS content"
- self.registry.register_driver("lfs", MockLFSFilter())
- # Different files use different filters
- from dulwich.attrs import GitAttributes, Pattern
- patterns = [
- (Pattern(b"*.txt"), {b"filter": b"text"}),
- (Pattern(b"*.bin"), {b"filter": b"lfs"}),
- ]
- gitattributes = GitAttributes(patterns)
- from dulwich.filters import FilterBlobNormalizer
- normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
- # Text file gets line ending conversion
- text_blob = Blob()
- text_blob.data = b"text\r\nfile"
- result = normalizer.checkin_normalize(text_blob, b"test.txt")
- self.assertEqual(result.data, b"text\nfile")
- # Binary file gets LFS conversion
- bin_blob = Blob()
- bin_blob.data = b"binary content"
- result = normalizer.checkin_normalize(bin_blob, b"test.bin")
- self.assertEqual(result.data, b"LFS pointer")
|