test_line_ending.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. # test_line_ending.py -- Tests for the line ending functions
  2. # Copyright (C) 2018-2019 Boris Feld <boris.feld@comet.ml>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for the line ending conversion."""
  22. from dulwich.line_ending import (
  23. BlobNormalizer,
  24. LineEndingFilter,
  25. convert_crlf_to_lf,
  26. convert_lf_to_crlf,
  27. get_clean_filter_autocrlf,
  28. get_smudge_filter_autocrlf,
  29. normalize_blob,
  30. )
  31. from dulwich.objects import Blob
  32. from . import TestCase
  33. class LineEndingConversion(TestCase):
  34. """Test the line ending conversion functions in various cases."""
  35. def test_convert_crlf_to_lf_no_op(self) -> None:
  36. self.assertEqual(convert_crlf_to_lf(b"foobar"), b"foobar")
  37. def test_convert_crlf_to_lf(self) -> None:
  38. self.assertEqual(convert_crlf_to_lf(b"line1\r\nline2"), b"line1\nline2")
  39. def test_convert_crlf_to_lf_mixed(self) -> None:
  40. self.assertEqual(convert_crlf_to_lf(b"line1\r\n\nline2"), b"line1\n\nline2")
  41. def test_convert_lf_to_crlf_no_op(self) -> None:
  42. self.assertEqual(convert_lf_to_crlf(b"foobar"), b"foobar")
  43. def test_convert_lf_to_crlf(self) -> None:
  44. self.assertEqual(convert_lf_to_crlf(b"line1\nline2"), b"line1\r\nline2")
  45. def test_convert_lf_to_crlf_mixed(self) -> None:
  46. self.assertEqual(convert_lf_to_crlf(b"line1\r\n\nline2"), b"line1\r\n\r\nline2")
  47. class GetLineEndingAutocrlfFilters(TestCase):
  48. def test_get_clean_filter_autocrlf_default(self) -> None:
  49. clean_filter = get_clean_filter_autocrlf(b"false")
  50. self.assertEqual(clean_filter, None)
  51. def test_get_clean_filter_autocrlf_true(self) -> None:
  52. clean_filter = get_clean_filter_autocrlf(b"true")
  53. self.assertEqual(clean_filter, convert_crlf_to_lf)
  54. def test_get_clean_filter_autocrlf_input(self) -> None:
  55. clean_filter = get_clean_filter_autocrlf(b"input")
  56. self.assertEqual(clean_filter, convert_crlf_to_lf)
  57. def test_get_smudge_filter_autocrlf_default(self) -> None:
  58. smudge_filter = get_smudge_filter_autocrlf(b"false")
  59. self.assertEqual(smudge_filter, None)
  60. def test_get_smudge_filter_autocrlf_true(self) -> None:
  61. smudge_filter = get_smudge_filter_autocrlf(b"true")
  62. self.assertEqual(smudge_filter, convert_lf_to_crlf)
  63. def test_get_smudge_filter_autocrlf_input(self) -> None:
  64. smudge_filter = get_smudge_filter_autocrlf(b"input")
  65. self.assertEqual(smudge_filter, None)
  66. class NormalizeBlobTestCase(TestCase):
  67. def test_normalize_to_lf_no_op(self) -> None:
  68. base_content = b"line1\nline2"
  69. base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
  70. base_blob = Blob()
  71. base_blob.set_raw_string(base_content)
  72. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  73. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  74. filtered_blob = normalize_blob(
  75. base_blob, convert_crlf_to_lf, binary_detection=False
  76. )
  77. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  78. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  79. def test_normalize_to_lf(self) -> None:
  80. base_content = b"line1\r\nline2"
  81. base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
  82. base_blob = Blob()
  83. base_blob.set_raw_string(base_content)
  84. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  85. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  86. filtered_blob = normalize_blob(
  87. base_blob, convert_crlf_to_lf, binary_detection=False
  88. )
  89. normalized_content = b"line1\nline2"
  90. normalized_sha = "f8be7bb828880727816015d21abcbc37d033f233"
  91. self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
  92. self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
  93. def test_normalize_to_lf_binary(self) -> None:
  94. base_content = b"line1\r\nline2\0"
  95. base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
  96. base_blob = Blob()
  97. base_blob.set_raw_string(base_content)
  98. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  99. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  100. filtered_blob = normalize_blob(
  101. base_blob, convert_crlf_to_lf, binary_detection=True
  102. )
  103. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  104. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  105. def test_normalize_to_crlf_no_op(self) -> None:
  106. base_content = b"line1\r\nline2"
  107. base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
  108. base_blob = Blob()
  109. base_blob.set_raw_string(base_content)
  110. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  111. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  112. filtered_blob = normalize_blob(
  113. base_blob, convert_lf_to_crlf, binary_detection=False
  114. )
  115. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  116. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  117. def test_normalize_to_crlf(self) -> None:
  118. base_content = b"line1\nline2"
  119. base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
  120. base_blob = Blob()
  121. base_blob.set_raw_string(base_content)
  122. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  123. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  124. filtered_blob = normalize_blob(
  125. base_blob, convert_lf_to_crlf, binary_detection=False
  126. )
  127. normalized_content = b"line1\r\nline2"
  128. normalized_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
  129. self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
  130. self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
  131. def test_normalize_to_crlf_binary(self) -> None:
  132. base_content = b"line1\r\nline2\0"
  133. base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
  134. base_blob = Blob()
  135. base_blob.set_raw_string(base_content)
  136. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  137. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  138. filtered_blob = normalize_blob(
  139. base_blob, convert_lf_to_crlf, binary_detection=True
  140. )
  141. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  142. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  143. class LineEndingFilterTests(TestCase):
  144. """Test the LineEndingFilter class."""
  145. def test_clean_no_conversion(self) -> None:
  146. """Test clean with no conversion function."""
  147. filter = LineEndingFilter()
  148. data = b"test\r\ndata"
  149. self.assertEqual(filter.clean(data), data)
  150. def test_clean_with_conversion(self) -> None:
  151. """Test clean with CRLF to LF conversion."""
  152. filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf)
  153. data = b"test\r\ndata"
  154. self.assertEqual(filter.clean(data), b"test\ndata")
  155. def test_clean_binary_detection(self) -> None:
  156. """Test clean skips binary files."""
  157. filter = LineEndingFilter(
  158. clean_conversion=convert_crlf_to_lf, binary_detection=True
  159. )
  160. # Binary data with null byte
  161. data = b"test\r\n\x00data"
  162. self.assertEqual(filter.clean(data), data) # Should not convert
  163. def test_smudge_no_conversion(self) -> None:
  164. """Test smudge with no conversion function."""
  165. filter = LineEndingFilter()
  166. data = b"test\ndata"
  167. self.assertEqual(filter.smudge(data), data)
  168. def test_smudge_with_conversion(self) -> None:
  169. """Test smudge with LF to CRLF conversion."""
  170. filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf)
  171. data = b"test\ndata"
  172. self.assertEqual(filter.smudge(data), b"test\r\ndata")
  173. def test_smudge_binary_detection(self) -> None:
  174. """Test smudge skips binary files."""
  175. filter = LineEndingFilter(
  176. smudge_conversion=convert_lf_to_crlf, binary_detection=True
  177. )
  178. # Binary data with null byte
  179. data = b"test\n\x00data"
  180. self.assertEqual(filter.smudge(data), data) # Should not convert
  181. class BlobNormalizerTests(TestCase):
  182. """Test the BlobNormalizer class integration with filters."""
  183. def setUp(self) -> None:
  184. super().setUp()
  185. from dulwich.config import ConfigDict
  186. self.config = ConfigDict()
  187. self.gitattributes = {}
  188. def test_autocrlf_true_checkin(self) -> None:
  189. """Test checkin with autocrlf=true."""
  190. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  191. # Create blob with CRLF
  192. blob = Blob()
  193. blob.data = b"line1\r\nline2\r\n"
  194. # Should convert to LF on checkin
  195. result = normalizer.checkin_normalize(blob, b"test.txt")
  196. self.assertEqual(result.data, b"line1\nline2\n")
  197. def test_autocrlf_true_checkout(self) -> None:
  198. """Test checkout with autocrlf=true."""
  199. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  200. # Create blob with LF
  201. blob = Blob()
  202. blob.data = b"line1\nline2\n"
  203. # Should convert to CRLF on checkout
  204. result = normalizer.checkout_normalize(blob, b"test.txt")
  205. self.assertEqual(result.data, b"line1\r\nline2\r\n")
  206. def test_autocrlf_input_checkin(self) -> None:
  207. """Test checkin with autocrlf=input."""
  208. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
  209. # Create blob with CRLF
  210. blob = Blob()
  211. blob.data = b"line1\r\nline2\r\n"
  212. # Should convert to LF on checkin
  213. result = normalizer.checkin_normalize(blob, b"test.txt")
  214. self.assertEqual(result.data, b"line1\nline2\n")
  215. def test_autocrlf_input_checkout(self) -> None:
  216. """Test checkout with autocrlf=input."""
  217. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
  218. # Create blob with LF
  219. blob = Blob()
  220. blob.data = b"line1\nline2\n"
  221. # Should NOT convert on checkout with input mode
  222. result = normalizer.checkout_normalize(blob, b"test.txt")
  223. self.assertIs(result, blob) # Same object, no conversion
  224. def test_autocrlf_false(self) -> None:
  225. """Test with autocrlf=false (no conversion)."""
  226. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
  227. # Create blob with mixed line endings
  228. blob = Blob()
  229. blob.data = b"line1\r\nline2\nline3"
  230. # Should not convert on either operation
  231. result = normalizer.checkin_normalize(blob, b"test.txt")
  232. self.assertIs(result, blob)
  233. result = normalizer.checkout_normalize(blob, b"test.txt")
  234. self.assertIs(result, blob)
  235. def test_gitattributes_text_attr(self) -> None:
  236. """Test gitattributes text attribute overrides autocrlf."""
  237. # Set gitattributes to force text conversion
  238. self.gitattributes[b"*.txt"] = {b"text": True}
  239. # Even with autocrlf=false, should convert based on gitattributes
  240. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
  241. blob = Blob()
  242. blob.data = b"line1\r\nline2\r\n"
  243. # Should still convert because of gitattributes
  244. result = normalizer.checkin_normalize(blob, b"test.txt")
  245. # Note: with just text=true and no eol setting, it follows platform defaults
  246. # For checkin, it should always normalize to LF
  247. self.assertIsNot(result, blob)
  248. def test_gitattributes_binary_attr(self) -> None:
  249. """Test gitattributes -text attribute prevents conversion."""
  250. # Set gitattributes to force binary (no conversion)
  251. self.gitattributes[b"*.bin"] = {b"text": False}
  252. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  253. blob = Blob()
  254. blob.data = b"line1\r\nline2\r\n"
  255. # Should not convert despite autocrlf=true
  256. result = normalizer.checkin_normalize(blob, b"test.bin")
  257. self.assertIs(result, blob)
  258. def test_binary_file_detection(self) -> None:
  259. """Test that binary files are not converted."""
  260. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  261. # Create blob with binary content
  262. blob = Blob()
  263. blob.data = b"line1\r\n\x00\xffbinary\r\ndata"
  264. # Should not convert binary files
  265. result = normalizer.checkin_normalize(blob, b"binary.dat")
  266. self.assertIs(result, blob)
  267. result = normalizer.checkout_normalize(blob, b"binary.dat")
  268. self.assertIs(result, blob)
  269. class LineEndingIntegrationTests(TestCase):
  270. """Integration tests for line ending conversion with the filter system."""
  271. def setUp(self) -> None:
  272. super().setUp()
  273. from dulwich.config import ConfigDict
  274. from dulwich.filters import FilterRegistry
  275. self.config = ConfigDict()
  276. self.registry = FilterRegistry(self.config)
  277. def test_filter_registry_with_line_endings(self) -> None:
  278. """Test that line ending filters work through the registry."""
  279. # Register a custom text filter that does line ending conversion
  280. filter = LineEndingFilter(
  281. clean_conversion=convert_crlf_to_lf,
  282. smudge_conversion=convert_lf_to_crlf,
  283. binary_detection=True,
  284. )
  285. self.registry.register_driver("text", filter)
  286. # Set up gitattributes
  287. # Create GitAttributes
  288. from dulwich.attrs import GitAttributes, Pattern
  289. patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})]
  290. gitattributes = GitAttributes(patterns)
  291. # Create normalizer
  292. from dulwich.filters import FilterBlobNormalizer
  293. normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
  294. # Test round trip
  295. blob = Blob()
  296. blob.data = b"Hello\r\nWorld\r\n"
  297. # Checkin should convert CRLF to LF
  298. checked_in = normalizer.checkin_normalize(blob, b"test.txt")
  299. self.assertEqual(checked_in.data, b"Hello\nWorld\n")
  300. # Checkout should convert LF to CRLF
  301. checked_out = normalizer.checkout_normalize(checked_in, b"test.txt")
  302. self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n")
  303. def test_mixed_filters(self) -> None:
  304. """Test multiple filters can coexist (line endings and LFS)."""
  305. # This would be a more complex test requiring LFS setup
  306. # For now, just verify the structure works
  307. text_filter = LineEndingFilter(
  308. clean_conversion=convert_crlf_to_lf,
  309. smudge_conversion=convert_lf_to_crlf,
  310. )
  311. self.registry.register_driver("text", text_filter)
  312. # Mock LFS filter
  313. class MockLFSFilter:
  314. def clean(self, data):
  315. return b"LFS pointer"
  316. def smudge(self, data):
  317. return b"LFS content"
  318. self.registry.register_driver("lfs", MockLFSFilter())
  319. # Different files use different filters
  320. from dulwich.attrs import GitAttributes, Pattern
  321. patterns = [
  322. (Pattern(b"*.txt"), {b"filter": b"text"}),
  323. (Pattern(b"*.bin"), {b"filter": b"lfs"}),
  324. ]
  325. gitattributes = GitAttributes(patterns)
  326. from dulwich.filters import FilterBlobNormalizer
  327. normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
  328. # Text file gets line ending conversion
  329. text_blob = Blob()
  330. text_blob.data = b"text\r\nfile"
  331. result = normalizer.checkin_normalize(text_blob, b"test.txt")
  332. self.assertEqual(result.data, b"text\nfile")
  333. # Binary file gets LFS conversion
  334. bin_blob = Blob()
  335. bin_blob.data = b"binary content"
  336. result = normalizer.checkin_normalize(bin_blob, b"test.bin")
  337. self.assertEqual(result.data, b"LFS pointer")