test_line_ending.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. # test_line_ending.py -- Tests for the line ending functions
  2. # Copyright (C) 2018-2019 Boris Feld <boris.feld@comet.ml>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for the line ending conversion."""
  22. from dulwich.line_ending import (
  23. BlobNormalizer,
  24. LineEndingFilter,
  25. TreeBlobNormalizer,
  26. convert_crlf_to_lf,
  27. convert_lf_to_crlf,
  28. get_clean_filter_autocrlf,
  29. get_smudge_filter_autocrlf,
  30. normalize_blob,
  31. )
  32. from dulwich.objects import Blob
  33. from . import TestCase
  34. class LineEndingConversion(TestCase):
  35. """Test the line ending conversion functions in various cases."""
  36. def test_convert_crlf_to_lf_no_op(self) -> None:
  37. self.assertEqual(convert_crlf_to_lf(b"foobar"), b"foobar")
  38. def test_convert_crlf_to_lf(self) -> None:
  39. self.assertEqual(convert_crlf_to_lf(b"line1\r\nline2"), b"line1\nline2")
  40. def test_convert_crlf_to_lf_mixed(self) -> None:
  41. self.assertEqual(convert_crlf_to_lf(b"line1\r\n\nline2"), b"line1\n\nline2")
  42. def test_convert_lf_to_crlf_no_op(self) -> None:
  43. self.assertEqual(convert_lf_to_crlf(b"foobar"), b"foobar")
  44. def test_convert_lf_to_crlf(self) -> None:
  45. self.assertEqual(convert_lf_to_crlf(b"line1\nline2"), b"line1\r\nline2")
  46. def test_convert_lf_to_crlf_mixed(self) -> None:
  47. self.assertEqual(convert_lf_to_crlf(b"line1\r\n\nline2"), b"line1\r\n\r\nline2")
  48. class GetLineEndingAutocrlfFilters(TestCase):
  49. def test_get_clean_filter_autocrlf_default(self) -> None:
  50. clean_filter = get_clean_filter_autocrlf(b"false")
  51. self.assertEqual(clean_filter, None)
  52. def test_get_clean_filter_autocrlf_true(self) -> None:
  53. clean_filter = get_clean_filter_autocrlf(b"true")
  54. self.assertEqual(clean_filter, convert_crlf_to_lf)
  55. def test_get_clean_filter_autocrlf_input(self) -> None:
  56. clean_filter = get_clean_filter_autocrlf(b"input")
  57. self.assertEqual(clean_filter, convert_crlf_to_lf)
  58. def test_get_smudge_filter_autocrlf_default(self) -> None:
  59. smudge_filter = get_smudge_filter_autocrlf(b"false")
  60. self.assertEqual(smudge_filter, None)
  61. def test_get_smudge_filter_autocrlf_true(self) -> None:
  62. smudge_filter = get_smudge_filter_autocrlf(b"true")
  63. self.assertEqual(smudge_filter, convert_lf_to_crlf)
  64. def test_get_smudge_filter_autocrlf_input(self) -> None:
  65. smudge_filter = get_smudge_filter_autocrlf(b"input")
  66. self.assertEqual(smudge_filter, None)
  67. class NormalizeBlobTestCase(TestCase):
  68. def test_normalize_to_lf_no_op(self) -> None:
  69. base_content = b"line1\nline2"
  70. base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
  71. base_blob = Blob()
  72. base_blob.set_raw_string(base_content)
  73. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  74. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  75. filtered_blob = normalize_blob(
  76. base_blob, convert_crlf_to_lf, binary_detection=False
  77. )
  78. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  79. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  80. def test_normalize_to_lf(self) -> None:
  81. base_content = b"line1\r\nline2"
  82. base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
  83. base_blob = Blob()
  84. base_blob.set_raw_string(base_content)
  85. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  86. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  87. filtered_blob = normalize_blob(
  88. base_blob, convert_crlf_to_lf, binary_detection=False
  89. )
  90. normalized_content = b"line1\nline2"
  91. normalized_sha = "f8be7bb828880727816015d21abcbc37d033f233"
  92. self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
  93. self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
  94. def test_normalize_to_lf_binary(self) -> None:
  95. base_content = b"line1\r\nline2\0"
  96. base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
  97. base_blob = Blob()
  98. base_blob.set_raw_string(base_content)
  99. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  100. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  101. filtered_blob = normalize_blob(
  102. base_blob, convert_crlf_to_lf, binary_detection=True
  103. )
  104. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  105. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  106. def test_normalize_to_crlf_no_op(self) -> None:
  107. base_content = b"line1\r\nline2"
  108. base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
  109. base_blob = Blob()
  110. base_blob.set_raw_string(base_content)
  111. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  112. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  113. filtered_blob = normalize_blob(
  114. base_blob, convert_lf_to_crlf, binary_detection=False
  115. )
  116. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  117. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  118. def test_normalize_to_crlf(self) -> None:
  119. base_content = b"line1\nline2"
  120. base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
  121. base_blob = Blob()
  122. base_blob.set_raw_string(base_content)
  123. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  124. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  125. filtered_blob = normalize_blob(
  126. base_blob, convert_lf_to_crlf, binary_detection=False
  127. )
  128. normalized_content = b"line1\r\nline2"
  129. normalized_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
  130. self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
  131. self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
  132. def test_normalize_to_crlf_binary(self) -> None:
  133. base_content = b"line1\r\nline2\0"
  134. base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
  135. base_blob = Blob()
  136. base_blob.set_raw_string(base_content)
  137. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  138. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  139. filtered_blob = normalize_blob(
  140. base_blob, convert_lf_to_crlf, binary_detection=True
  141. )
  142. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  143. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  144. class LineEndingFilterTests(TestCase):
  145. """Test the LineEndingFilter class."""
  146. def test_clean_no_conversion(self) -> None:
  147. """Test clean with no conversion function."""
  148. filter = LineEndingFilter()
  149. data = b"test\r\ndata"
  150. self.assertEqual(filter.clean(data), data)
  151. def test_clean_with_conversion(self) -> None:
  152. """Test clean with CRLF to LF conversion."""
  153. filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf)
  154. data = b"test\r\ndata"
  155. self.assertEqual(filter.clean(data), b"test\ndata")
  156. def test_clean_binary_detection(self) -> None:
  157. """Test clean skips binary files."""
  158. filter = LineEndingFilter(
  159. clean_conversion=convert_crlf_to_lf, binary_detection=True
  160. )
  161. # Binary data with null byte
  162. data = b"test\r\n\x00data"
  163. self.assertEqual(filter.clean(data), data) # Should not convert
  164. def test_smudge_no_conversion(self) -> None:
  165. """Test smudge with no conversion function."""
  166. filter = LineEndingFilter()
  167. data = b"test\ndata"
  168. self.assertEqual(filter.smudge(data), data)
  169. def test_smudge_with_conversion(self) -> None:
  170. """Test smudge with LF to CRLF conversion."""
  171. filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf)
  172. data = b"test\ndata"
  173. self.assertEqual(filter.smudge(data), b"test\r\ndata")
  174. def test_smudge_binary_detection(self) -> None:
  175. """Test smudge skips binary files."""
  176. filter = LineEndingFilter(
  177. smudge_conversion=convert_lf_to_crlf, binary_detection=True
  178. )
  179. # Binary data with null byte
  180. data = b"test\n\x00data"
  181. self.assertEqual(filter.smudge(data), data) # Should not convert
  182. class BlobNormalizerTests(TestCase):
  183. """Test the BlobNormalizer class integration with filters."""
  184. def setUp(self) -> None:
  185. super().setUp()
  186. from dulwich.config import ConfigDict
  187. self.config = ConfigDict()
  188. self.gitattributes = {}
  189. def test_autocrlf_true_checkin(self) -> None:
  190. """Test checkin with autocrlf=true."""
  191. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  192. # Create blob with CRLF
  193. blob = Blob()
  194. blob.data = b"line1\r\nline2\r\n"
  195. # Should convert to LF on checkin
  196. result = normalizer.checkin_normalize(blob, b"test.txt")
  197. self.assertEqual(result.data, b"line1\nline2\n")
  198. def test_autocrlf_true_checkout(self) -> None:
  199. """Test checkout with autocrlf=true."""
  200. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  201. # Create blob with LF
  202. blob = Blob()
  203. blob.data = b"line1\nline2\n"
  204. # Should convert to CRLF on checkout
  205. result = normalizer.checkout_normalize(blob, b"test.txt")
  206. self.assertEqual(result.data, b"line1\r\nline2\r\n")
  207. def test_autocrlf_input_checkin(self) -> None:
  208. """Test checkin with autocrlf=input."""
  209. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
  210. # Create blob with CRLF
  211. blob = Blob()
  212. blob.data = b"line1\r\nline2\r\n"
  213. # Should convert to LF on checkin
  214. result = normalizer.checkin_normalize(blob, b"test.txt")
  215. self.assertEqual(result.data, b"line1\nline2\n")
  216. def test_autocrlf_input_checkout(self) -> None:
  217. """Test checkout with autocrlf=input."""
  218. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
  219. # Create blob with LF
  220. blob = Blob()
  221. blob.data = b"line1\nline2\n"
  222. # Should NOT convert on checkout with input mode
  223. result = normalizer.checkout_normalize(blob, b"test.txt")
  224. self.assertIs(result, blob) # Same object, no conversion
  225. def test_autocrlf_false(self) -> None:
  226. """Test with autocrlf=false (no conversion)."""
  227. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
  228. # Create blob with mixed line endings
  229. blob = Blob()
  230. blob.data = b"line1\r\nline2\nline3"
  231. # Should not convert on either operation
  232. result = normalizer.checkin_normalize(blob, b"test.txt")
  233. self.assertIs(result, blob)
  234. result = normalizer.checkout_normalize(blob, b"test.txt")
  235. self.assertIs(result, blob)
  236. def test_gitattributes_text_attr(self) -> None:
  237. """Test gitattributes text attribute overrides autocrlf."""
  238. # Set gitattributes to force text conversion
  239. self.gitattributes[b"*.txt"] = {b"text": True}
  240. # Even with autocrlf=false, should convert based on gitattributes
  241. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
  242. blob = Blob()
  243. blob.data = b"line1\r\nline2\r\n"
  244. # Should still convert because of gitattributes
  245. result = normalizer.checkin_normalize(blob, b"test.txt")
  246. # Note: with just text=true and no eol setting, it follows platform defaults
  247. # For checkin, it should always normalize to LF
  248. self.assertIsNot(result, blob)
  249. def test_gitattributes_binary_attr(self) -> None:
  250. """Test gitattributes -text attribute prevents conversion."""
  251. # Set gitattributes to force binary (no conversion)
  252. self.gitattributes[b"*.bin"] = {b"text": False}
  253. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  254. blob = Blob()
  255. blob.data = b"line1\r\nline2\r\n"
  256. # Should not convert despite autocrlf=true
  257. result = normalizer.checkin_normalize(blob, b"test.bin")
  258. self.assertIs(result, blob)
  259. def test_binary_file_detection(self) -> None:
  260. """Test that binary files are not converted."""
  261. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  262. # Create blob with binary content
  263. blob = Blob()
  264. blob.data = b"line1\r\n\x00\xffbinary\r\ndata"
  265. # Should not convert binary files
  266. result = normalizer.checkin_normalize(blob, b"binary.dat")
  267. self.assertIs(result, blob)
  268. result = normalizer.checkout_normalize(blob, b"binary.dat")
  269. self.assertIs(result, blob)
  270. class TreeBlobNormalizerTests(TestCase):
  271. """Test the TreeBlobNormalizer class for existing file handling."""
  272. def setUp(self) -> None:
  273. super().setUp()
  274. from dulwich.config import ConfigDict
  275. from dulwich.object_store import MemoryObjectStore
  276. self.config = ConfigDict()
  277. self.gitattributes = {}
  278. self.object_store = MemoryObjectStore()
  279. def test_autocrlf_input_existing_files(self) -> None:
  280. """Test that autocrlf=input normalizes existing files with CRLF."""
  281. # Create a tree with an existing file
  282. from dulwich.objects import Tree
  283. tree = Tree()
  284. tree[b"existing.txt"] = (0o100644, b"a" * 40) # dummy sha
  285. self.object_store.add_object(tree)
  286. # Create normalizer with autocrlf=input
  287. normalizer = TreeBlobNormalizer(
  288. self.config,
  289. self.gitattributes,
  290. self.object_store,
  291. tree.id,
  292. autocrlf=b"input",
  293. )
  294. # Create blob with CRLF line endings
  295. blob = Blob()
  296. blob.data = b"line1\r\nline2\r\n"
  297. # Should convert CRLF to LF on checkin even for existing files
  298. result = normalizer.checkin_normalize(blob, b"existing.txt")
  299. self.assertEqual(result.data, b"line1\nline2\n")
  300. def test_autocrlf_false_existing_files(self) -> None:
  301. """Test that autocrlf=false does not normalize existing files."""
  302. # Create a tree with an existing file
  303. from dulwich.objects import Tree
  304. tree = Tree()
  305. tree[b"existing.txt"] = (0o100644, b"a" * 40) # dummy sha
  306. self.object_store.add_object(tree)
  307. # Create normalizer with autocrlf=false
  308. normalizer = TreeBlobNormalizer(
  309. self.config,
  310. self.gitattributes,
  311. self.object_store,
  312. tree.id,
  313. autocrlf=b"false",
  314. )
  315. # Create blob with CRLF line endings
  316. blob = Blob()
  317. blob.data = b"line1\r\nline2\r\n"
  318. # Should NOT convert for existing files when autocrlf=false
  319. result = normalizer.checkin_normalize(blob, b"existing.txt")
  320. self.assertIs(result, blob)
  321. def test_autocrlf_input_new_files(self) -> None:
  322. """Test that autocrlf=input normalizes new files."""
  323. # Create empty tree (no existing files)
  324. from dulwich.objects import Tree
  325. tree = Tree()
  326. self.object_store.add_object(tree)
  327. # Create normalizer with autocrlf=input
  328. normalizer = TreeBlobNormalizer(
  329. self.config,
  330. self.gitattributes,
  331. self.object_store,
  332. tree.id,
  333. autocrlf=b"input",
  334. )
  335. # Create blob with CRLF line endings
  336. blob = Blob()
  337. blob.data = b"line1\r\nline2\r\n"
  338. # Should convert CRLF to LF for new files
  339. result = normalizer.checkin_normalize(blob, b"new.txt")
  340. self.assertEqual(result.data, b"line1\nline2\n")
  341. class LineEndingIntegrationTests(TestCase):
  342. """Integration tests for line ending conversion with the filter system."""
  343. def setUp(self) -> None:
  344. super().setUp()
  345. from dulwich.config import ConfigDict
  346. from dulwich.filters import FilterRegistry
  347. self.config = ConfigDict()
  348. self.registry = FilterRegistry(self.config)
  349. def test_filter_registry_with_line_endings(self) -> None:
  350. """Test that line ending filters work through the registry."""
  351. # Register a custom text filter that does line ending conversion
  352. filter = LineEndingFilter(
  353. clean_conversion=convert_crlf_to_lf,
  354. smudge_conversion=convert_lf_to_crlf,
  355. binary_detection=True,
  356. )
  357. self.registry.register_driver("text", filter)
  358. # Set up gitattributes
  359. # Create GitAttributes
  360. from dulwich.attrs import GitAttributes, Pattern
  361. patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})]
  362. gitattributes = GitAttributes(patterns)
  363. # Create normalizer
  364. from dulwich.filters import FilterBlobNormalizer
  365. normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
  366. # Test round trip
  367. blob = Blob()
  368. blob.data = b"Hello\r\nWorld\r\n"
  369. # Checkin should convert CRLF to LF
  370. checked_in = normalizer.checkin_normalize(blob, b"test.txt")
  371. self.assertEqual(checked_in.data, b"Hello\nWorld\n")
  372. # Checkout should convert LF to CRLF
  373. checked_out = normalizer.checkout_normalize(checked_in, b"test.txt")
  374. self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n")
  375. def test_mixed_filters(self) -> None:
  376. """Test multiple filters can coexist (line endings and LFS)."""
  377. # This would be a more complex test requiring LFS setup
  378. # For now, just verify the structure works
  379. text_filter = LineEndingFilter(
  380. clean_conversion=convert_crlf_to_lf,
  381. smudge_conversion=convert_lf_to_crlf,
  382. )
  383. self.registry.register_driver("text", text_filter)
  384. # Mock LFS filter
  385. class MockLFSFilter:
  386. def clean(self, data):
  387. return b"LFS pointer"
  388. def smudge(self, data):
  389. return b"LFS content"
  390. def cleanup(self):
  391. pass
  392. def reuse(self, config, filter_name):
  393. return False
  394. self.registry.register_driver("lfs", MockLFSFilter())
  395. # Different files use different filters
  396. from dulwich.attrs import GitAttributes, Pattern
  397. patterns = [
  398. (Pattern(b"*.txt"), {b"filter": b"text"}),
  399. (Pattern(b"*.bin"), {b"filter": b"lfs"}),
  400. ]
  401. gitattributes = GitAttributes(patterns)
  402. from dulwich.filters import FilterBlobNormalizer
  403. normalizer = FilterBlobNormalizer(self.config, gitattributes, self.registry)
  404. # Text file gets line ending conversion
  405. text_blob = Blob()
  406. text_blob.data = b"text\r\nfile"
  407. result = normalizer.checkin_normalize(text_blob, b"test.txt")
  408. self.assertEqual(result.data, b"text\nfile")
  409. # Binary file gets LFS conversion
  410. bin_blob = Blob()
  411. bin_blob.data = b"binary content"
  412. result = normalizer.checkin_normalize(bin_blob, b"test.bin")
  413. self.assertEqual(result.data, b"LFS pointer")