2
0

test_line_ending.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690
  1. # test_line_ending.py -- Tests for the line ending functions
  2. # Copyright (C) 2018-2019 Boris Feld <boris.feld@comet.ml>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for the line ending conversion."""
  22. from dulwich.line_ending import (
  23. BlobNormalizer,
  24. LineEndingFilter,
  25. TreeBlobNormalizer,
  26. check_safecrlf,
  27. convert_crlf_to_lf,
  28. convert_lf_to_crlf,
  29. get_clean_filter_autocrlf,
  30. get_smudge_filter_autocrlf,
  31. normalize_blob,
  32. )
  33. from dulwich.objects import Blob
  34. from . import TestCase
  35. class LineEndingConversion(TestCase):
  36. """Test the line ending conversion functions in various cases."""
  37. def test_convert_crlf_to_lf_no_op(self) -> None:
  38. self.assertEqual(convert_crlf_to_lf(b"foobar"), b"foobar")
  39. def test_convert_crlf_to_lf(self) -> None:
  40. self.assertEqual(convert_crlf_to_lf(b"line1\r\nline2"), b"line1\nline2")
  41. def test_convert_crlf_to_lf_mixed(self) -> None:
  42. self.assertEqual(convert_crlf_to_lf(b"line1\r\n\nline2"), b"line1\n\nline2")
  43. def test_convert_lf_to_crlf_no_op(self) -> None:
  44. self.assertEqual(convert_lf_to_crlf(b"foobar"), b"foobar")
  45. def test_convert_lf_to_crlf(self) -> None:
  46. self.assertEqual(convert_lf_to_crlf(b"line1\nline2"), b"line1\r\nline2")
  47. def test_convert_lf_to_crlf_mixed(self) -> None:
  48. self.assertEqual(convert_lf_to_crlf(b"line1\r\n\nline2"), b"line1\r\n\r\nline2")
  49. class GetLineEndingAutocrlfFilters(TestCase):
  50. def test_get_clean_filter_autocrlf_default(self) -> None:
  51. clean_filter = get_clean_filter_autocrlf(b"false")
  52. self.assertEqual(clean_filter, None)
  53. def test_get_clean_filter_autocrlf_true(self) -> None:
  54. clean_filter = get_clean_filter_autocrlf(b"true")
  55. self.assertEqual(clean_filter, convert_crlf_to_lf)
  56. def test_get_clean_filter_autocrlf_input(self) -> None:
  57. clean_filter = get_clean_filter_autocrlf(b"input")
  58. self.assertEqual(clean_filter, convert_crlf_to_lf)
  59. def test_get_smudge_filter_autocrlf_default(self) -> None:
  60. smudge_filter = get_smudge_filter_autocrlf(b"false")
  61. self.assertEqual(smudge_filter, None)
  62. def test_get_smudge_filter_autocrlf_true(self) -> None:
  63. smudge_filter = get_smudge_filter_autocrlf(b"true")
  64. self.assertEqual(smudge_filter, convert_lf_to_crlf)
  65. def test_get_smudge_filter_autocrlf_input(self) -> None:
  66. smudge_filter = get_smudge_filter_autocrlf(b"input")
  67. self.assertEqual(smudge_filter, None)
  68. class NormalizeBlobTestCase(TestCase):
  69. def test_normalize_to_lf_no_op(self) -> None:
  70. base_content = b"line1\nline2"
  71. base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
  72. base_blob = Blob()
  73. base_blob.set_raw_string(base_content)
  74. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  75. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  76. filtered_blob = normalize_blob(
  77. base_blob, convert_crlf_to_lf, binary_detection=False
  78. )
  79. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  80. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  81. def test_normalize_to_lf(self) -> None:
  82. base_content = b"line1\r\nline2"
  83. base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
  84. base_blob = Blob()
  85. base_blob.set_raw_string(base_content)
  86. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  87. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  88. filtered_blob = normalize_blob(
  89. base_blob, convert_crlf_to_lf, binary_detection=False
  90. )
  91. normalized_content = b"line1\nline2"
  92. normalized_sha = "f8be7bb828880727816015d21abcbc37d033f233"
  93. self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
  94. self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
  95. def test_normalize_to_lf_binary(self) -> None:
  96. base_content = b"line1\r\nline2\0"
  97. base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
  98. base_blob = Blob()
  99. base_blob.set_raw_string(base_content)
  100. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  101. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  102. filtered_blob = normalize_blob(
  103. base_blob, convert_crlf_to_lf, binary_detection=True
  104. )
  105. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  106. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  107. def test_normalize_to_crlf_no_op(self) -> None:
  108. base_content = b"line1\r\nline2"
  109. base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
  110. base_blob = Blob()
  111. base_blob.set_raw_string(base_content)
  112. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  113. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  114. filtered_blob = normalize_blob(
  115. base_blob, convert_lf_to_crlf, binary_detection=False
  116. )
  117. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  118. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  119. def test_normalize_to_crlf(self) -> None:
  120. base_content = b"line1\nline2"
  121. base_sha = "f8be7bb828880727816015d21abcbc37d033f233"
  122. base_blob = Blob()
  123. base_blob.set_raw_string(base_content)
  124. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  125. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  126. filtered_blob = normalize_blob(
  127. base_blob, convert_lf_to_crlf, binary_detection=False
  128. )
  129. normalized_content = b"line1\r\nline2"
  130. normalized_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"
  131. self.assertEqual(filtered_blob.as_raw_chunks(), [normalized_content])
  132. self.assertEqual(filtered_blob.sha().hexdigest(), normalized_sha)
  133. def test_normalize_to_crlf_binary(self) -> None:
  134. base_content = b"line1\r\nline2\0"
  135. base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"
  136. base_blob = Blob()
  137. base_blob.set_raw_string(base_content)
  138. self.assertEqual(base_blob.as_raw_chunks(), [base_content])
  139. self.assertEqual(base_blob.sha().hexdigest(), base_sha)
  140. filtered_blob = normalize_blob(
  141. base_blob, convert_lf_to_crlf, binary_detection=True
  142. )
  143. self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
  144. self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
  145. class LineEndingFilterTests(TestCase):
  146. """Test the LineEndingFilter class."""
  147. def test_clean_no_conversion(self) -> None:
  148. """Test clean with no conversion function."""
  149. filter = LineEndingFilter()
  150. data = b"test\r\ndata"
  151. self.assertEqual(filter.clean(data), data)
  152. def test_clean_with_conversion(self) -> None:
  153. """Test clean with CRLF to LF conversion."""
  154. filter = LineEndingFilter(clean_conversion=convert_crlf_to_lf)
  155. data = b"test\r\ndata"
  156. self.assertEqual(filter.clean(data), b"test\ndata")
  157. def test_clean_binary_detection(self) -> None:
  158. """Test clean skips binary files."""
  159. filter = LineEndingFilter(
  160. clean_conversion=convert_crlf_to_lf, binary_detection=True
  161. )
  162. # Binary data with null byte
  163. data = b"test\r\n\x00data"
  164. self.assertEqual(filter.clean(data), data) # Should not convert
  165. def test_smudge_no_conversion(self) -> None:
  166. """Test smudge with no conversion function."""
  167. filter = LineEndingFilter()
  168. data = b"test\ndata"
  169. self.assertEqual(filter.smudge(data), data)
  170. def test_smudge_with_conversion(self) -> None:
  171. """Test smudge with LF to CRLF conversion."""
  172. filter = LineEndingFilter(smudge_conversion=convert_lf_to_crlf)
  173. data = b"test\ndata"
  174. self.assertEqual(filter.smudge(data), b"test\r\ndata")
  175. def test_smudge_binary_detection(self) -> None:
  176. """Test smudge skips binary files."""
  177. filter = LineEndingFilter(
  178. smudge_conversion=convert_lf_to_crlf, binary_detection=True
  179. )
  180. # Binary data with null byte
  181. data = b"test\n\x00data"
  182. self.assertEqual(filter.smudge(data), data) # Should not convert
  183. class BlobNormalizerTests(TestCase):
  184. """Test the BlobNormalizer class integration with filters."""
  185. def setUp(self) -> None:
  186. super().setUp()
  187. from dulwich.config import ConfigDict
  188. self.config = ConfigDict()
  189. self.gitattributes = {}
  190. def test_autocrlf_true_checkin(self) -> None:
  191. """Test checkin with autocrlf=true."""
  192. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  193. # Create blob with CRLF
  194. blob = Blob()
  195. blob.data = b"line1\r\nline2\r\n"
  196. # Should convert to LF on checkin
  197. result = normalizer.checkin_normalize(blob, b"test.txt")
  198. self.assertEqual(result.data, b"line1\nline2\n")
  199. def test_autocrlf_true_checkout(self) -> None:
  200. """Test checkout with autocrlf=true."""
  201. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  202. # Create blob with LF
  203. blob = Blob()
  204. blob.data = b"line1\nline2\n"
  205. # Should convert to CRLF on checkout
  206. result = normalizer.checkout_normalize(blob, b"test.txt")
  207. self.assertEqual(result.data, b"line1\r\nline2\r\n")
  208. def test_autocrlf_input_checkin(self) -> None:
  209. """Test checkin with autocrlf=input."""
  210. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
  211. # Create blob with CRLF
  212. blob = Blob()
  213. blob.data = b"line1\r\nline2\r\n"
  214. # Should convert to LF on checkin
  215. result = normalizer.checkin_normalize(blob, b"test.txt")
  216. self.assertEqual(result.data, b"line1\nline2\n")
  217. def test_autocrlf_input_checkout(self) -> None:
  218. """Test checkout with autocrlf=input."""
  219. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"input")
  220. # Create blob with LF
  221. blob = Blob()
  222. blob.data = b"line1\nline2\n"
  223. # Should NOT convert on checkout with input mode
  224. result = normalizer.checkout_normalize(blob, b"test.txt")
  225. self.assertIs(result, blob) # Same object, no conversion
  226. def test_autocrlf_false(self) -> None:
  227. """Test with autocrlf=false (no conversion)."""
  228. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
  229. # Create blob with mixed line endings
  230. blob = Blob()
  231. blob.data = b"line1\r\nline2\nline3"
  232. # Should not convert on either operation
  233. result = normalizer.checkin_normalize(blob, b"test.txt")
  234. self.assertIs(result, blob)
  235. result = normalizer.checkout_normalize(blob, b"test.txt")
  236. self.assertIs(result, blob)
  237. def test_gitattributes_text_attr(self) -> None:
  238. """Test gitattributes text attribute overrides autocrlf."""
  239. # Set gitattributes to force text conversion
  240. self.gitattributes[b"*.txt"] = {b"text": True}
  241. # Even with autocrlf=false, should convert based on gitattributes
  242. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"false")
  243. blob = Blob()
  244. blob.data = b"line1\r\nline2\r\n"
  245. # Should still convert because of gitattributes
  246. result = normalizer.checkin_normalize(blob, b"test.txt")
  247. # Note: with just text=true and no eol setting, it follows platform defaults
  248. # For checkin, it should always normalize to LF
  249. self.assertIsNot(result, blob)
  250. def test_gitattributes_binary_attr(self) -> None:
  251. """Test gitattributes -text attribute prevents conversion."""
  252. # Set gitattributes to force binary (no conversion)
  253. self.gitattributes[b"*.bin"] = {b"text": False}
  254. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  255. blob = Blob()
  256. blob.data = b"line1\r\nline2\r\n"
  257. # Should not convert despite autocrlf=true
  258. result = normalizer.checkin_normalize(blob, b"test.bin")
  259. self.assertIs(result, blob)
  260. def test_binary_file_detection(self) -> None:
  261. """Test that binary files are not converted."""
  262. normalizer = BlobNormalizer(self.config, self.gitattributes, autocrlf=b"true")
  263. # Create blob with binary content
  264. blob = Blob()
  265. blob.data = b"line1\r\n\x00\xffbinary\r\ndata"
  266. # Should not convert binary files
  267. result = normalizer.checkin_normalize(blob, b"binary.dat")
  268. self.assertIs(result, blob)
  269. result = normalizer.checkout_normalize(blob, b"binary.dat")
  270. self.assertIs(result, blob)
  271. class TreeBlobNormalizerTests(TestCase):
  272. """Test the TreeBlobNormalizer class for existing file handling."""
  273. def setUp(self) -> None:
  274. super().setUp()
  275. from dulwich.config import ConfigDict
  276. from dulwich.object_store import MemoryObjectStore
  277. self.config = ConfigDict()
  278. self.gitattributes = {}
  279. self.object_store = MemoryObjectStore()
  280. def test_autocrlf_input_existing_files(self) -> None:
  281. """Test that autocrlf=input normalizes existing files with CRLF."""
  282. # Create a tree with an existing file
  283. from dulwich.objects import Tree
  284. tree = Tree()
  285. tree[b"existing.txt"] = (0o100644, b"a" * 40) # dummy sha
  286. self.object_store.add_object(tree)
  287. # Create normalizer with autocrlf=input
  288. normalizer = TreeBlobNormalizer(
  289. self.config,
  290. self.gitattributes,
  291. self.object_store,
  292. tree.id,
  293. autocrlf=b"input",
  294. )
  295. # Create blob with CRLF line endings
  296. blob = Blob()
  297. blob.data = b"line1\r\nline2\r\n"
  298. # Should convert CRLF to LF on checkin even for existing files
  299. result = normalizer.checkin_normalize(blob, b"existing.txt")
  300. self.assertEqual(result.data, b"line1\nline2\n")
  301. def test_autocrlf_false_existing_files(self) -> None:
  302. """Test that autocrlf=false does not normalize existing files."""
  303. # Create a tree with an existing file
  304. from dulwich.objects import Tree
  305. tree = Tree()
  306. tree[b"existing.txt"] = (0o100644, b"a" * 40) # dummy sha
  307. self.object_store.add_object(tree)
  308. # Create normalizer with autocrlf=false
  309. normalizer = TreeBlobNormalizer(
  310. self.config,
  311. self.gitattributes,
  312. self.object_store,
  313. tree.id,
  314. autocrlf=b"false",
  315. )
  316. # Create blob with CRLF line endings
  317. blob = Blob()
  318. blob.data = b"line1\r\nline2\r\n"
  319. # Should NOT convert for existing files when autocrlf=false
  320. result = normalizer.checkin_normalize(blob, b"existing.txt")
  321. self.assertIs(result, blob)
  322. def test_autocrlf_input_new_files(self) -> None:
  323. """Test that autocrlf=input normalizes new files."""
  324. # Create empty tree (no existing files)
  325. from dulwich.objects import Tree
  326. tree = Tree()
  327. self.object_store.add_object(tree)
  328. # Create normalizer with autocrlf=input
  329. normalizer = TreeBlobNormalizer(
  330. self.config,
  331. self.gitattributes,
  332. self.object_store,
  333. tree.id,
  334. autocrlf=b"input",
  335. )
  336. # Create blob with CRLF line endings
  337. blob = Blob()
  338. blob.data = b"line1\r\nline2\r\n"
  339. # Should convert CRLF to LF for new files
  340. result = normalizer.checkin_normalize(blob, b"new.txt")
  341. self.assertEqual(result.data, b"line1\nline2\n")
  342. class LineEndingIntegrationTests(TestCase):
  343. """Integration tests for line ending conversion with the filter system."""
  344. def setUp(self) -> None:
  345. super().setUp()
  346. from dulwich.config import ConfigDict
  347. from dulwich.filters import FilterRegistry
  348. self.config = ConfigDict()
  349. self.registry = FilterRegistry(self.config)
  350. def test_filter_registry_with_line_endings(self) -> None:
  351. """Test that line ending filters work through the registry."""
  352. # Register a custom text filter that does line ending conversion
  353. filter = LineEndingFilter(
  354. clean_conversion=convert_crlf_to_lf,
  355. smudge_conversion=convert_lf_to_crlf,
  356. binary_detection=True,
  357. )
  358. self.registry.register_driver("text", filter)
  359. # Set up gitattributes
  360. # Create GitAttributes
  361. from dulwich.attrs import GitAttributes, Pattern
  362. patterns = [(Pattern(b"*.txt"), {b"filter": b"text"})]
  363. gitattributes = GitAttributes(patterns)
  364. # Create normalizer
  365. from dulwich.filters import FilterBlobNormalizer, FilterContext
  366. filter_context = FilterContext(self.registry)
  367. normalizer = FilterBlobNormalizer(
  368. self.config, gitattributes, filter_context=filter_context
  369. )
  370. # Test round trip
  371. blob = Blob()
  372. blob.data = b"Hello\r\nWorld\r\n"
  373. # Checkin should convert CRLF to LF
  374. checked_in = normalizer.checkin_normalize(blob, b"test.txt")
  375. self.assertEqual(checked_in.data, b"Hello\nWorld\n")
  376. # Checkout should convert LF to CRLF
  377. checked_out = normalizer.checkout_normalize(checked_in, b"test.txt")
  378. self.assertEqual(checked_out.data, b"Hello\r\nWorld\r\n")
  379. def test_mixed_filters(self) -> None:
  380. """Test multiple filters can coexist (line endings and LFS)."""
  381. # This would be a more complex test requiring LFS setup
  382. # For now, just verify the structure works
  383. text_filter = LineEndingFilter(
  384. clean_conversion=convert_crlf_to_lf,
  385. smudge_conversion=convert_lf_to_crlf,
  386. )
  387. self.registry.register_driver("text", text_filter)
  388. # Mock LFS filter
  389. class MockLFSFilter:
  390. def clean(self, data):
  391. return b"LFS pointer"
  392. def smudge(self, data):
  393. return b"LFS content"
  394. def cleanup(self):
  395. pass
  396. def reuse(self, config, filter_name):
  397. return False
  398. self.registry.register_driver("lfs", MockLFSFilter())
  399. # Different files use different filters
  400. from dulwich.attrs import GitAttributes, Pattern
  401. patterns = [
  402. (Pattern(b"*.txt"), {b"filter": b"text"}),
  403. (Pattern(b"*.bin"), {b"filter": b"lfs"}),
  404. ]
  405. gitattributes = GitAttributes(patterns)
  406. from dulwich.filters import FilterBlobNormalizer, FilterContext
  407. filter_context = FilterContext(self.registry)
  408. normalizer = FilterBlobNormalizer(
  409. self.config, gitattributes, filter_context=filter_context
  410. )
  411. # Text file gets line ending conversion
  412. text_blob = Blob()
  413. text_blob.data = b"text\r\nfile"
  414. result = normalizer.checkin_normalize(text_blob, b"test.txt")
  415. self.assertEqual(result.data, b"text\nfile")
  416. # Binary file gets LFS conversion
  417. bin_blob = Blob()
  418. bin_blob.data = b"binary content"
  419. result = normalizer.checkin_normalize(bin_blob, b"test.bin")
  420. self.assertEqual(result.data, b"LFS pointer")
  421. class LineEndingFilterFromConfigTests(TestCase):
  422. """Test LineEndingFilter.from_config classmethod."""
  423. def test_from_config_none(self) -> None:
  424. """Test from_config with no config."""
  425. # No config, not for text attr - no conversion
  426. filter = LineEndingFilter.from_config(None, for_text_attr=False)
  427. self.assertIsNone(filter.clean_conversion)
  428. self.assertIsNone(filter.smudge_conversion)
  429. self.assertEqual(filter.safecrlf, b"false")
  430. # No config, for text attr - normalize on checkin
  431. filter = LineEndingFilter.from_config(None, for_text_attr=True)
  432. self.assertIsNotNone(filter.clean_conversion)
  433. self.assertIsNone(filter.smudge_conversion)
  434. self.assertEqual(filter.safecrlf, b"false")
  435. def test_from_config_autocrlf_true(self) -> None:
  436. """Test from_config with autocrlf=true."""
  437. from dulwich.config import ConfigDict
  438. config = ConfigDict()
  439. config.set(b"core", b"autocrlf", b"true")
  440. filter = LineEndingFilter.from_config(config, for_text_attr=False)
  441. self.assertIsNotNone(filter.clean_conversion)
  442. self.assertIsNotNone(filter.smudge_conversion)
  443. self.assertEqual(filter.safecrlf, b"false")
  444. def test_from_config_with_safecrlf(self) -> None:
  445. """Test from_config with safecrlf setting."""
  446. from dulwich.config import ConfigDict
  447. config = ConfigDict()
  448. config.set(b"core", b"autocrlf", b"input")
  449. config.set(b"core", b"safecrlf", b"warn")
  450. filter = LineEndingFilter.from_config(config, for_text_attr=False)
  451. self.assertIsNotNone(filter.clean_conversion)
  452. self.assertIsNone(filter.smudge_conversion)
  453. self.assertEqual(filter.safecrlf, b"warn")
  454. def test_from_config_text_attr_overrides(self) -> None:
  455. """Test that for_text_attr=True always normalizes on checkin."""
  456. from dulwich.config import ConfigDict
  457. config = ConfigDict()
  458. config.set(b"core", b"autocrlf", b"false")
  459. # Even with autocrlf=false, text attr should normalize
  460. filter = LineEndingFilter.from_config(config, for_text_attr=True)
  461. self.assertIsNotNone(filter.clean_conversion)
  462. # Smudge should still be None since autocrlf=false
  463. self.assertIsNone(filter.smudge_conversion)
  464. class SafeCRLFTests(TestCase):
  465. """Test core.safecrlf functionality."""
  466. def test_safecrlf_false(self) -> None:
  467. """Test that safecrlf=false allows any conversion."""
  468. original = b"line1\r\nline2\r\n"
  469. converted = b"line1\nline2\n"
  470. # Should not raise
  471. check_safecrlf(original, converted, b"false", b"test.txt")
  472. def test_safecrlf_true_safe_conversion(self) -> None:
  473. """Test that safecrlf=true allows safe conversions."""
  474. # CRLF -> LF -> CRLF is reversible
  475. original = b"line1\r\nline2\r\n"
  476. converted = b"line1\nline2\n"
  477. # Should not raise because conversion is reversible
  478. check_safecrlf(original, converted, b"true", b"test.txt")
  479. def test_safecrlf_true_unsafe_conversion(self) -> None:
  480. """Test that safecrlf=true fails on unsafe conversions."""
  481. # Mixed line endings would be lost
  482. original = b"line1\r\nline2\nline3\r\n"
  483. converted = b"line1\nline2\nline3\n"
  484. # Should raise because converting back gives all CRLF
  485. with self.assertRaises(ValueError) as cm:
  486. check_safecrlf(original, converted, b"true", b"test.txt")
  487. self.assertIn("CRLF would be replaced by LF", str(cm.exception))
  488. def test_safecrlf_warn(self) -> None:
  489. """Test that safecrlf=warn issues warnings."""
  490. # Mixed line endings would be lost
  491. original = b"line1\r\nline2\nline3\r\n"
  492. converted = b"line1\nline2\nline3\n"
  493. # Should warn but not raise
  494. with self.assertLogs("dulwich.line_ending", level="WARNING") as cm:
  495. check_safecrlf(original, converted, b"warn", b"test.txt")
  496. self.assertEqual(len(cm.output), 1)
  497. self.assertIn("CRLF would be replaced by LF", cm.output[0])
  498. def test_lineending_filter_with_safecrlf(self) -> None:
  499. """Test LineEndingFilter with safecrlf enabled."""
  500. # Test with safecrlf=true
  501. filter_strict = LineEndingFilter(
  502. clean_conversion=convert_crlf_to_lf,
  503. smudge_conversion=None,
  504. binary_detection=False,
  505. safecrlf=b"true",
  506. )
  507. # Safe conversion should work
  508. safe_data = b"line1\r\nline2\r\n"
  509. result = filter_strict.clean(safe_data, b"test.txt")
  510. self.assertEqual(result, b"line1\nline2\n")
  511. # Unsafe conversion should fail
  512. unsafe_data = b"line1\r\nline2\nline3\r\n"
  513. with self.assertRaises(ValueError):
  514. filter_strict.clean(unsafe_data, b"test.txt")
  515. # Test with safecrlf=warn
  516. filter_warn = LineEndingFilter(
  517. clean_conversion=convert_crlf_to_lf,
  518. smudge_conversion=None,
  519. binary_detection=False,
  520. safecrlf=b"warn",
  521. )
  522. # Should warn but still convert
  523. with self.assertLogs("dulwich.line_ending", level="WARNING") as cm:
  524. result = filter_warn.clean(unsafe_data, b"test.txt")
  525. self.assertEqual(result, b"line1\nline2\nline3\n")
  526. self.assertEqual(len(cm.output), 1)