# test_line_ending.py -- Tests for the line ending functions # Copyright (C) 2018-2019 Boris Feld # # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU # General Public License as published by the Free Software Foundation; version 2.0 # or (at your option) any later version. You can redistribute it and/or # modify it under the terms of either of these two licenses. # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # You should have received a copy of the licenses; if not, see # for a copy of the GNU General Public License # and for a copy of the Apache # License, Version 2.0. # """Tests for the line ending conversion.""" from dulwich.line_ending import ( BlobNormalizer, LineEndingFilter, convert_crlf_to_lf, convert_lf_to_crlf, get_clean_filter_autocrlf, get_smudge_filter_autocrlf, normalize_blob, ) from dulwich.objects import Blob from . import TestCase class LineEndingConversion(TestCase): """Test the line ending conversion functions in various cases.""" def test_convert_crlf_to_lf_no_op(self) -> None: self.assertEqual(convert_crlf_to_lf(b"foobar"), b"foobar") def test_convert_crlf_to_lf(self) -> None: self.assertEqual(convert_crlf_to_lf(b"line1\r\nline2"), b"line1\nline2") def test_convert_crlf_to_lf_mixed(self) -> None: self.assertEqual(convert_crlf_to_lf(b"line1\r\n\nline2"), b"line1\n\nline2") def test_convert_lf_to_crlf_no_op(self) -> None: self.assertEqual(convert_lf_to_crlf(b"foobar"), b"foobar") def test_convert_lf_to_crlf(self) -> None: self.assertEqual(convert_lf_to_crlf(b"line1\nline2"), b"line1\r\nline2") def test_convert_lf_to_crlf_mixed(self) -> None: self.assertEqual(convert_lf_to_crlf(b"line1\r\n\nline2"), b"line1\r\n\r\nline2") class GetLineEndingAutocrlfFilters(TestCase): def test_get_clean_filter_autocrlf_default(self) -> None: clean_filter = get_clean_filter_autocrlf(b"false") self.assertEqual(clean_filter, None) def test_get_clean_filter_autocrlf_true(self) -> None: clean_filter = get_clean_filter_autocrlf(b"true") self.assertEqual(clean_filter, convert_crlf_to_lf) def test_get_clean_filter_autocrlf_input(self) -> None: clean_filter = get_clean_filter_autocrlf(b"input") self.assertEqual(clean_filter, convert_crlf_to_lf) def test_get_smudge_filter_autocrlf_default(self) -> None: smudge_filter = get_smudge_filter_autocrlf(b"false") self.assertEqual(smudge_filter, None) def test_get_smudge_filter_autocrlf_true(self) -> None: smudge_filter = get_smudge_filter_autocrlf(b"true") self.assertEqual(smudge_filter, convert_lf_to_crlf) def test_get_smudge_filter_autocrlf_input(self) -> None: smudge_filter = get_smudge_filter_autocrlf(b"input") self.assertEqual(smudge_filter, None) class NormalizeBlobTestCase(TestCase): def test_normalize_to_lf_no_op(self) -> None: base_content = b"line1\nline2" base_sha = "f8be7bb828880727816015d21abcbc37d033f233" base_blob = Blob() base_blob.set_raw_string(base_content) self.assertEqual(base_blob.as_raw_chunks(), [base_content]) self.assertEqual(base_blob.sha().hexdigest(), base_sha) filtered_blob = normalize_blob( base_blob, convert_crlf_to_lf, binary_detection=False ) self.assertEqual(filtered_blob.as_raw_chunks(), [base_content]) self.assertEqual(filtered_blob.sha().hexdigest(), base_sha) def test_normalize_to_lf(self) -> None: base_content = b"line1\r\nline2" base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96" base_blob = Blob() base_blob.set_raw_string(base_content) self.assertEqual(base_blob.as_raw_chunks(), [base_content]) self.assertEqual(base_blob.sha().hexdigest(), base_sha) filtered_blob = normalize_blob( base_blob, convert_crlf_to_lf, binary_detection=False ) normalized_content = b"line1\nline2" normalized_sha = "f8be7bb828880727816015d21abcbc37d033f233" self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content]) self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha) def test_normalize_to_lf_binary(self) -> None: base_content = b"line1\r\nline2\0" base_sha = "b44504193b765f7cd79673812de8afb55b372ab2" base_blob = Blob() base_blob.set_raw_string(base_content) self.assertEqual(base_blob.as_raw_chunks(), [base_content]) self.assertEqual(base_blob.sha().hexdigest(), base_sha) filtered_blob = normalize_blob( base_blob, convert_crlf_to_lf, binary_detection=True ) self.assertEqual(filtered_blob.as_raw_chunks(), [base_content]) self.assertEqual(filtered_blob.sha().hexdigest(), base_sha) def test_normalize_to_crlf_no_op(self) -> None: base_content = b"line1\r\nline2" base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96" base_blob = Blob() base_blob.set_raw_string(base_content) self.assertEqual(base_blob.as_raw_chunks(), [base_content]) self.assertEqual(base_blob.sha().hexdigest(), base_sha) filtered_blob = normalize_blob( base_blob, convert_lf_to_crlf, binary_detection=False ) self.assertEqual(filtered_blob.as_raw_chunks(), [base_content]) self.assertEqual(filtered_blob.sha().hexdigest(), base_sha) def test_normalize_to_crlf(self) -> None: base_content = b"line1\nline2" base_sha = "f8be7bb828880727816015d21abcbc37d033f233" base_blob = Blob() base_blob.set_raw_string(base_content) self.assertEqual(base_blob.as_raw_chunks(), [base_content]) self.assertEqual(base_blob.sha().hexdigest(), base_sha) filtered_blob = normalize_blob( base_blob, convert_lf_to_crlf, binary_detection=False ) normalized_content = b"line1\r\nline2" normalized_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96" self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content]) self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha) def test_normalize_to_crlf_binary(self) -> None: base_content = b"line1\r\nline2\0" base_sha = "b44504193b765f7cd79673812de8afb55b372ab2" base_blob = Blob() base_blob.set_raw_string(base_content) self.assertEqual(base_blob.as_raw_chunks(), [base_content]) self.assertEqual(base_blob.sha().hexdigest(), base_sha) filtered_blob = normalize_blob( base_blob, convert_lf_to_crlf, binary_detection=True ) self.assertEqual(filtered_blob.as_raw_chunks(), [base_content]) self.assertEqual(filtered_blob.sha().hexdigest(), base_sha) class LineEndingFilterTests(TestCase): """Test the LineEndingFilter class.""" def test_clean_no_conversion(self) -> None: """Test clean with no conversion function.""" filter = LineEndingFilter() data = b"test\r\ndata" self.assertEqual(filter.clean(data), data) def test_clean_with_conversion(self) -> None: """Test clean with CRLF to LF conversion.""" filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf) data = b"test\r\ndata" self.assertEqual(filter.clean(data), b"test\ndata") def test_clean_binary_detection(self) -> None: """Test clean skips binary files.""" filter = LineEndingFilter( clean_conversion=convert_crlf_to_lf, binary_detection=True ) # Binary data with null byte data = b"test\r\n\x00data" self.assertEqual(filter.clean(data), data) # Should not convert def test_smudge_no_conversion(self) -> None: """Test smudge with no conversion function.""" filter = LineEndingFilter() data = b"test\ndata" self.assertEqual(filter.smudge(data), data) def test_smudge_with_conversion(self) -> None: """Test smudge with LF to CRLF conversion.""" filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf) data = b"test\ndata" self.assertEqual(filter.smudge(data), b"test\r\ndata") def test_smudge_binary_detection(self) -> None: """Test smudge skips binary files.""" filter = LineEndingFilter( smudge_conversion=convert_lf_to_crlf, binary_detection=True ) # Binary data with null byte data = b"test\n\x00data" self.assertEqual(filter.smudge(data), data) # Should not convert class BlobNormalizerTests(TestCase): """Test the BlobNormalizer class integration with filters.""" def setUp(self) -> None: super().setUp() from dulwich.config import ConfigDict self.config = ConfigDict() self.gitattributes = {} def test_autocrlf_true_checkin(self) -> None: """Test checkin with autocrlf=true.""" normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true") # Create blob with CRLF blob = Blob() blob.data = b"line1\r\nline2\r\n" # Should convert to LF on checkin result = normalizer.checkin_normalize(blob, b"test.txt") self.assertEqual(result.data, b"line1\nline2\n") def test_autocrlf_true_checkout(self) -> None: """Test checkout with autocrlf=true.""" normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true") # Create blob with LF blob = Blob() blob.data = b"line1\nline2\n" # Should convert to CRLF on checkout result = normalizer.checkout_normalize(blob, b"test.txt") self.assertEqual(result.data, b"line1\r\nline2\r\n") def test_autocrlf_input_checkin(self) -> None: """Test checkin with autocrlf=input.""" normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input") # Create blob with CRLF blob = Blob() blob.data = b"line1\r\nline2\r\n" # Should convert to LF on checkin result = normalizer.checkin_normalize(blob, b"test.txt") self.assertEqual(result.data, b"line1\nline2\n") def test_autocrlf_input_checkout(self) -> None: """Test checkout with autocrlf=input.""" normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input") # Create blob with LF blob = Blob() blob.data = b"line1\nline2\n" # Should NOT convert on checkout with input mode result = normalizer.checkout_normalize(blob, b"test.txt") self.assertIs(result, blob) # Same object, no conversion def test_autocrlf_false(self) -> None: """Test with autocrlf=false (no conversion).""" normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false") # Create blob with mixed line endings blob = Blob() blob.data = b"line1\r\nline2\nline3" # Should not convert on either operation result = normalizer.checkin_normalize(blob, b"test.txt") self.assertIs(result, blob) result = normalizer.checkout_normalize(blob, b"test.txt") self.assertIs(result, blob) def test_gitattributes_text_attr(self) -> None: """Test gitattributes text attribute overrides autocrlf.""" # Set gitattributes to force text conversion self.gitattributes[b"*.txt"] = {b"text": True} # Even with autocrlf=false, should convert based on gitattributes normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false") blob = Blob() blob.data = b"line1\r\nline2\r\n" # Should still convert because of gitattributes result = normalizer.checkin_normalize(blob, b"test.txt") # Note: with just text=true and no eol setting, it follows platform defaults # For checkin, it should always normalize to LF self.assertIsNot(result, blob) def test_gitattributes_binary_attr(self) -> None: """Test gitattributes -text attribute prevents conversion.""" # Set gitattributes to force binary (no conversion) self.gitattributes[b"*.bin"] = {b"text": False} normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true") blob = Blob() blob.data = b"line1\r\nline2\r\n" # Should not convert despite autocrlf=true result = normalizer.checkin_normalize(blob, b"test.bin") self.assertIs(result, blob) def test_binary_file_detection(self) -> None: """Test that binary files are not converted.""" normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true") # Create blob with binary content blob = Blob() blob.data = b"line1\r\n\x00\xffbinary\r\ndata" # Should not convert binary files result = normalizer.checkin_normalize(blob, b"binary.dat") self.assertIs(result, blob) result = normalizer.checkout_normalize(blob, b"binary.dat") self.assertIs(result, blob) class LineEndingIntegrationTests(TestCase): """Integration tests for line ending conversion with the filter system.""" def setUp(self) -> None: super().setUp() from dulwich.config import ConfigDict from dulwich.filters import FilterRegistry self.config = ConfigDict() self.registry = FilterRegistry(self.config) def test_filter_registry_with_line_endings(self) -> None: """Test that line ending filters work through the registry.""" # Register a custom text filter that does line ending conversion filter = LineEndingFilter( clean_conversion=convert_crlf_to_lf, smudge_conversion=convert_lf_to_crlf, binary_detection=True, ) self.registry.register_driver("text", filter) # Set up gitattributes # Create GitAttributes from dulwich.attrs import GitAttributes, Pattern patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})] gitattributes = GitAttributes(patterns) # Create normalizer from dulwich.filters import FilterBlobNormalizer normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry) # Test round trip blob = Blob() blob.data = b"Hello\r\nWorld\r\n" # Checkin should convert CRLF to LF checked_in = normalizer.checkin_normalize(blob, b"test.txt") self.assertEqual(checked_in.data, b"Hello\nWorld\n") # Checkout should convert LF to CRLF checked_out = normalizer.checkout_normalize(checked_in, b"test.txt") self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n") def test_mixed_filters(self) -> None: """Test multiple filters can coexist (line endings and LFS).""" # This would be a more complex test requiring LFS setup # For now, just verify the structure works text_filter = LineEndingFilter( clean_conversion=convert_crlf_to_lf, smudge_conversion=convert_lf_to_crlf, ) self.registry.register_driver("text", text_filter) # Mock LFS filter class MockLFSFilter: def clean(self, data): return b"LFS pointer" def smudge(self, data): return b"LFS content" self.registry.register_driver("lfs", MockLFSFilter()) # Different files use different filters from dulwich.attrs import GitAttributes, Pattern patterns = [ (Pattern(b"*.txt"), {b"filter": b"text"}), (Pattern(b"*.bin"), {b"filter": b"lfs"}), ] gitattributes = GitAttributes(patterns) from dulwich.filters import FilterBlobNormalizer normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry) # Text file gets line ending conversion text_blob = Blob() text_blob.data = b"text\r\nfile" result = normalizer.checkin_normalize(text_blob, b"test.txt") self.assertEqual(result.data, b"text\nfile") # Binary file gets LFS conversion bin_blob = Blob() bin_blob.data = b"binary content" result = normalizer.checkin_normalize(bin_blob, b"test.bin") self.assertEqual(result.data, b"LFS pointer")