test_lfs.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979
  1. # test_lfs.py -- tests for LFS
  2. # Copyright (C) 2020 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for LFS support."""
  22. import json
  23. import shutil
  24. import tempfile
  25. from dulwich.lfs import LFSFilterDriver, LFSPointer, LFSStore
  26. from . import TestCase
  27. class LFSTests(TestCase):
  28. def setUp(self) -> None:
  29. super().setUp()
  30. self.test_dir = tempfile.mkdtemp()
  31. self.addCleanup(shutil.rmtree, self.test_dir)
  32. self.lfs = LFSStore.create(self.test_dir)
  33. def test_create(self) -> None:
  34. sha = self.lfs.write_object([b"a", b"b"])
  35. with self.lfs.open_object(sha) as f:
  36. self.assertEqual(b"ab", f.read())
  37. def test_missing(self) -> None:
  38. self.assertRaises(KeyError, self.lfs.open_object, "abcdeabcdeabcdeabcde")
  39. def test_write_object_empty(self) -> None:
  40. """Test writing an empty object."""
  41. sha = self.lfs.write_object([])
  42. with self.lfs.open_object(sha) as f:
  43. self.assertEqual(b"", f.read())
  44. def test_write_object_multiple_chunks(self) -> None:
  45. """Test writing an object with multiple chunks."""
  46. chunks = [b"chunk1", b"chunk2", b"chunk3"]
  47. sha = self.lfs.write_object(chunks)
  48. with self.lfs.open_object(sha) as f:
  49. self.assertEqual(b"".join(chunks), f.read())
  50. def test_sha_path_calculation(self) -> None:
  51. """Test the internal sha path calculation."""
  52. # The implementation splits the sha into parts for directory structure
  53. # Write and verify we can read it back
  54. sha = self.lfs.write_object([b"test data"])
  55. self.assertEqual(len(sha), 64) # SHA-256 is 64 hex chars
  56. # Open should succeed, which verifies the path calculation works
  57. with self.lfs.open_object(sha) as f:
  58. self.assertEqual(b"test data", f.read())
  59. def test_create_lfs_dir(self) -> None:
  60. """Test creating an LFS directory when it doesn't exist."""
  61. import os
  62. # Create a temporary directory for the test
  63. lfs_parent_dir = tempfile.mkdtemp()
  64. self.addCleanup(shutil.rmtree, lfs_parent_dir)
  65. # Create a path for the LFS directory
  66. lfs_dir = os.path.join(lfs_parent_dir, "lfs")
  67. # Create the LFS store
  68. LFSStore.create(lfs_dir)
  69. # Verify the directories were created
  70. self.assertTrue(os.path.isdir(lfs_dir))
  71. self.assertTrue(os.path.isdir(os.path.join(lfs_dir, "tmp")))
  72. self.assertTrue(os.path.isdir(os.path.join(lfs_dir, "objects")))
  73. class LFSPointerTests(TestCase):
  74. def test_from_bytes_valid(self) -> None:
  75. """Test parsing a valid LFS pointer."""
  76. pointer_data = (
  77. b"version https://git-lfs.github.com/spec/v1\n"
  78. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  79. b"size 0\n"
  80. )
  81. pointer = LFSPointer.from_bytes(pointer_data)
  82. self.assertIsNotNone(pointer)
  83. self.assertEqual(
  84. pointer.oid,
  85. "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
  86. )
  87. self.assertEqual(pointer.size, 0)
  88. def test_from_bytes_with_extra_fields(self) -> None:
  89. """Test parsing LFS pointer with extra fields (should still work)."""
  90. pointer_data = (
  91. b"version https://git-lfs.github.com/spec/v1\n"
  92. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  93. b"size 1234\n"
  94. b"x-custom-field value\n"
  95. )
  96. pointer = LFSPointer.from_bytes(pointer_data)
  97. self.assertIsNotNone(pointer)
  98. self.assertEqual(pointer.size, 1234)
  99. def test_from_bytes_invalid_version(self) -> None:
  100. """Test parsing with invalid version line."""
  101. pointer_data = (
  102. b"version https://invalid.com/spec/v1\n"
  103. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  104. b"size 0\n"
  105. )
  106. pointer = LFSPointer.from_bytes(pointer_data)
  107. self.assertIsNone(pointer)
  108. def test_from_bytes_missing_oid(self) -> None:
  109. """Test parsing with missing OID."""
  110. pointer_data = b"version https://git-lfs.github.com/spec/v1\nsize 0\n"
  111. pointer = LFSPointer.from_bytes(pointer_data)
  112. self.assertIsNone(pointer)
  113. def test_from_bytes_missing_size(self) -> None:
  114. """Test parsing with missing size."""
  115. pointer_data = (
  116. b"version https://git-lfs.github.com/spec/v1\n"
  117. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  118. )
  119. pointer = LFSPointer.from_bytes(pointer_data)
  120. self.assertIsNone(pointer)
  121. def test_from_bytes_invalid_size(self) -> None:
  122. """Test parsing with invalid size."""
  123. pointer_data = (
  124. b"version https://git-lfs.github.com/spec/v1\n"
  125. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  126. b"size not_a_number\n"
  127. )
  128. pointer = LFSPointer.from_bytes(pointer_data)
  129. self.assertIsNone(pointer)
  130. def test_from_bytes_binary_data(self) -> None:
  131. """Test parsing binary data (not an LFS pointer)."""
  132. binary_data = b"\x00\x01\x02\x03\x04"
  133. pointer = LFSPointer.from_bytes(binary_data)
  134. self.assertIsNone(pointer)
  135. def test_to_bytes(self) -> None:
  136. """Test converting LFS pointer to bytes."""
  137. pointer = LFSPointer(
  138. "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", 1234
  139. )
  140. data = pointer.to_bytes()
  141. expected = (
  142. b"version https://git-lfs.github.com/spec/v1\n"
  143. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  144. b"size 1234\n"
  145. )
  146. self.assertEqual(data, expected)
  147. def test_round_trip(self) -> None:
  148. """Test converting to bytes and back."""
  149. original = LFSPointer(
  150. "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", 9876
  151. )
  152. data = original.to_bytes()
  153. parsed = LFSPointer.from_bytes(data)
  154. self.assertIsNotNone(parsed)
  155. self.assertEqual(parsed.oid, original.oid)
  156. self.assertEqual(parsed.size, original.size)
  157. def test_is_valid_oid(self) -> None:
  158. """Test OID validation."""
  159. # Valid SHA256
  160. valid_pointer = LFSPointer(
  161. "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", 0
  162. )
  163. self.assertTrue(valid_pointer.is_valid_oid())
  164. # Too short
  165. short_pointer = LFSPointer("e3b0c44298fc1c14", 0)
  166. self.assertFalse(short_pointer.is_valid_oid())
  167. # Invalid hex characters
  168. invalid_pointer = LFSPointer(
  169. "g3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", 0
  170. )
  171. self.assertFalse(invalid_pointer.is_valid_oid())
  172. class LFSIntegrationTests(TestCase):
  173. """Integration tests for LFS with Git operations."""
  174. def setUp(self) -> None:
  175. super().setUp()
  176. import os
  177. from dulwich.repo import Repo
  178. # Create temporary directory for test repo
  179. self.test_dir = tempfile.mkdtemp()
  180. self.addCleanup(shutil.rmtree, self.test_dir)
  181. # Initialize repo
  182. self.repo = Repo.init(self.test_dir)
  183. self.lfs_dir = os.path.join(self.test_dir, ".git", "lfs")
  184. self.lfs_store = LFSStore.create(self.lfs_dir)
  185. def test_lfs_with_gitattributes(self) -> None:
  186. """Test LFS integration with .gitattributes."""
  187. import os
  188. # Create .gitattributes file
  189. gitattributes_path = os.path.join(self.test_dir, ".gitattributes")
  190. with open(gitattributes_path, "wb") as f:
  191. f.write(b"*.bin filter=lfs diff=lfs merge=lfs -text\n")
  192. # Create a binary file
  193. bin_path = os.path.join(self.test_dir, "large.bin")
  194. large_content = b"Large binary content" * 1000
  195. with open(bin_path, "wb") as f:
  196. f.write(large_content)
  197. # Add files to repo
  198. self.repo.stage([".gitattributes", "large.bin"])
  199. # Get the blob for large.bin from the index
  200. index = self.repo.open_index()
  201. entry = index[b"large.bin"]
  202. blob = self.repo.object_store[entry.sha]
  203. # With LFS configured, the blob should contain an LFS pointer
  204. # (Note: This would require actual LFS filter integration in dulwich)
  205. # For now, we just verify the structure
  206. self.assertIsNotNone(blob)
  207. def test_lfs_checkout_missing_object(self) -> None:
  208. """Test checkout behavior when LFS object is missing."""
  209. from dulwich.objects import Blob, Commit, Tree
  210. # Create an LFS pointer blob
  211. pointer = LFSPointer(
  212. "0000000000000000000000000000000000000000000000000000000000000000", 1234
  213. )
  214. blob = Blob()
  215. blob.data = pointer.to_bytes()
  216. self.repo.object_store.add_object(blob)
  217. # Create tree with the blob
  218. tree = Tree()
  219. tree.add(b"missing.bin", 0o100644, blob.id)
  220. self.repo.object_store.add_object(tree)
  221. # Create commit
  222. commit = Commit()
  223. commit.tree = tree.id
  224. commit.message = b"Add missing LFS file"
  225. commit.author = commit.committer = b"Test User <test@example.com>"
  226. commit.commit_time = commit.author_time = 1234567890
  227. commit.commit_timezone = commit.author_timezone = 0
  228. self.repo.object_store.add_object(commit)
  229. # Update HEAD
  230. self.repo.refs[b"HEAD"] = commit.id
  231. # Checkout should leave pointer file when object is missing
  232. # (actual checkout would require more integration)
  233. def test_lfs_pointer_detection(self) -> None:
  234. """Test detection of LFS pointer files."""
  235. # Test various file contents
  236. test_cases = [
  237. # Valid LFS pointer
  238. (
  239. b"version https://git-lfs.github.com/spec/v1\n"
  240. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  241. b"size 1234\n",
  242. True,
  243. ),
  244. # Regular text file
  245. (b"This is a regular text file\n", False),
  246. # Binary file
  247. (b"\x00\x01\x02\x03\x04", False),
  248. # File that starts like pointer but isn't
  249. (b"version 1.0\nThis is not an LFS pointer\n", False),
  250. ]
  251. for content, expected_is_pointer in test_cases:
  252. pointer = LFSPointer.from_bytes(content)
  253. self.assertEqual(
  254. pointer is not None,
  255. expected_is_pointer,
  256. f"Failed for content: {content!r}",
  257. )
  258. class LFSFilterDriverTests(TestCase):
  259. def setUp(self) -> None:
  260. super().setUp()
  261. self.test_dir = tempfile.mkdtemp()
  262. self.addCleanup(shutil.rmtree, self.test_dir)
  263. self.lfs_store = LFSStore.create(self.test_dir)
  264. self.filter_driver = LFSFilterDriver(self.lfs_store)
  265. def test_clean_new_file(self) -> None:
  266. """Test clean filter on new file content."""
  267. content = b"This is a test file content"
  268. result = self.filter_driver.clean(content)
  269. # Result should be an LFS pointer
  270. pointer = LFSPointer.from_bytes(result)
  271. self.assertIsNotNone(pointer)
  272. self.assertEqual(pointer.size, len(content))
  273. # Content should be stored in LFS
  274. with self.lfs_store.open_object(pointer.oid) as f:
  275. self.assertEqual(f.read(), content)
  276. def test_clean_existing_pointer(self) -> None:
  277. """Test clean filter on already-pointer content."""
  278. # Create a pointer
  279. pointer = LFSPointer(
  280. "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", 1234
  281. )
  282. pointer_data = pointer.to_bytes()
  283. # Clean should return the pointer unchanged
  284. result = self.filter_driver.clean(pointer_data)
  285. self.assertEqual(result, pointer_data)
  286. def test_smudge_valid_pointer(self) -> None:
  287. """Test smudge filter with valid pointer."""
  288. # Store some content
  289. content = b"This is the actual file content"
  290. sha = self.lfs_store.write_object([content])
  291. # Create pointer
  292. pointer = LFSPointer(sha, len(content))
  293. pointer_data = pointer.to_bytes()
  294. # Smudge should return the actual content
  295. result = self.filter_driver.smudge(pointer_data)
  296. self.assertEqual(result, content)
  297. def test_smudge_missing_object(self) -> None:
  298. """Test smudge filter with missing LFS object."""
  299. # Create pointer to non-existent object
  300. pointer = LFSPointer(
  301. "0000000000000000000000000000000000000000000000000000000000000000", 1234
  302. )
  303. pointer_data = pointer.to_bytes()
  304. # Smudge should return the pointer as-is when object is missing
  305. result = self.filter_driver.smudge(pointer_data)
  306. self.assertEqual(result, pointer_data)
  307. def test_smudge_non_pointer(self) -> None:
  308. """Test smudge filter on non-pointer content."""
  309. content = b"This is not an LFS pointer"
  310. # Smudge should return content unchanged
  311. result = self.filter_driver.smudge(content)
  312. self.assertEqual(result, content)
  313. def test_round_trip(self) -> None:
  314. """Test clean followed by smudge."""
  315. original_content = b"Round trip test content"
  316. # Clean (working tree -> repo)
  317. pointer_data = self.filter_driver.clean(original_content)
  318. # Verify it's a pointer
  319. pointer = LFSPointer.from_bytes(pointer_data)
  320. self.assertIsNotNone(pointer)
  321. # Smudge (repo -> working tree)
  322. restored_content = self.filter_driver.smudge(pointer_data)
  323. # Should get back the original content
  324. self.assertEqual(restored_content, original_content)
  325. def test_clean_empty_file(self) -> None:
  326. """Test clean filter on empty file."""
  327. content = b""
  328. result = self.filter_driver.clean(content)
  329. # Result should be an LFS pointer
  330. pointer = LFSPointer.from_bytes(result)
  331. self.assertIsNotNone(pointer)
  332. self.assertEqual(pointer.size, 0)
  333. # Empty content should be stored in LFS
  334. with self.lfs_store.open_object(pointer.oid) as f:
  335. self.assertEqual(f.read(), content)
  336. def test_clean_large_file(self) -> None:
  337. """Test clean filter on large file."""
  338. # Create a large file (1MB)
  339. content = b"x" * (1024 * 1024)
  340. result = self.filter_driver.clean(content)
  341. # Result should be an LFS pointer
  342. pointer = LFSPointer.from_bytes(result)
  343. self.assertIsNotNone(pointer)
  344. self.assertEqual(pointer.size, len(content))
  345. # Content should be stored in LFS
  346. with self.lfs_store.open_object(pointer.oid) as f:
  347. self.assertEqual(f.read(), content)
  348. def test_smudge_corrupt_pointer(self) -> None:
  349. """Test smudge filter with corrupt pointer data."""
  350. # Create corrupt pointer data
  351. corrupt_data = (
  352. b"version https://git-lfs.github.com/spec/v1\noid sha256:invalid\n"
  353. )
  354. # Smudge should return the data as-is
  355. result = self.filter_driver.smudge(corrupt_data)
  356. self.assertEqual(result, corrupt_data)
  357. def test_clean_unicode_content(self) -> None:
  358. """Test clean filter with unicode content."""
  359. # UTF-8 encoded unicode content
  360. content = "Hello 世界 🌍".encode()
  361. result = self.filter_driver.clean(content)
  362. # Result should be an LFS pointer
  363. pointer = LFSPointer.from_bytes(result)
  364. self.assertIsNotNone(pointer)
  365. # Content should be preserved exactly
  366. with self.lfs_store.open_object(pointer.oid) as f:
  367. self.assertEqual(f.read(), content)
  368. class LFSStoreEdgeCaseTests(TestCase):
  369. """Edge case tests for LFS store."""
  370. def setUp(self) -> None:
  371. super().setUp()
  372. self.test_dir = tempfile.mkdtemp()
  373. self.addCleanup(shutil.rmtree, self.test_dir)
  374. self.lfs = LFSStore.create(self.test_dir)
  375. def test_concurrent_writes(self) -> None:
  376. """Test that concurrent writes to same content work correctly."""
  377. content = b"duplicate content"
  378. # Write the same content multiple times
  379. sha1 = self.lfs.write_object([content])
  380. sha2 = self.lfs.write_object([content])
  381. # Should get the same SHA
  382. self.assertEqual(sha1, sha2)
  383. # Content should be stored only once
  384. with self.lfs.open_object(sha1) as f:
  385. self.assertEqual(f.read(), content)
  386. def test_write_with_generator(self) -> None:
  387. """Test writing object with generator chunks."""
  388. def chunk_generator():
  389. yield b"chunk1"
  390. yield b"chunk2"
  391. yield b"chunk3"
  392. sha = self.lfs.write_object(chunk_generator())
  393. # Verify content
  394. with self.lfs.open_object(sha) as f:
  395. self.assertEqual(f.read(), b"chunk1chunk2chunk3")
  396. def test_partial_write_rollback(self) -> None:
  397. """Test that partial writes don't leave artifacts."""
  398. import os
  399. # Count initial objects
  400. objects_dir = os.path.join(self.test_dir, "objects")
  401. initial_count = sum(len(files) for _, _, files in os.walk(objects_dir))
  402. # Try to write with a failing generator
  403. def failing_generator():
  404. yield b"chunk1"
  405. raise RuntimeError("Simulated error")
  406. # This should fail
  407. with self.assertRaises(RuntimeError):
  408. self.lfs.write_object(failing_generator())
  409. # No new objects should have been created
  410. final_count = sum(len(files) for _, _, files in os.walk(objects_dir))
  411. self.assertEqual(initial_count, final_count)
  412. class LFSPointerEdgeCaseTests(TestCase):
  413. """Edge case tests for LFS pointer parsing."""
  414. def test_pointer_with_windows_line_endings(self) -> None:
  415. """Test parsing pointer with Windows line endings."""
  416. pointer_data = (
  417. b"version https://git-lfs.github.com/spec/v1\r\n"
  418. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\r\n"
  419. b"size 1234\r\n"
  420. )
  421. pointer = LFSPointer.from_bytes(pointer_data)
  422. self.assertIsNotNone(pointer)
  423. self.assertEqual(pointer.size, 1234)
  424. def test_pointer_with_extra_whitespace(self) -> None:
  425. """Test parsing pointer with extra whitespace."""
  426. pointer_data = (
  427. b"version https://git-lfs.github.com/spec/v1 \n"
  428. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  429. b"size 1234 \n"
  430. )
  431. pointer = LFSPointer.from_bytes(pointer_data)
  432. self.assertIsNotNone(pointer)
  433. self.assertEqual(pointer.size, 1234)
  434. def test_pointer_case_sensitivity(self) -> None:
  435. """Test that pointer parsing is case sensitive."""
  436. # Version line must be exact
  437. pointer_data = (
  438. b"Version https://git-lfs.github.com/spec/v1\n" # Capital V
  439. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  440. b"size 1234\n"
  441. )
  442. pointer = LFSPointer.from_bytes(pointer_data)
  443. self.assertIsNone(pointer) # Should fail due to case
  444. def test_pointer_oid_formats(self) -> None:
  445. """Test different OID formats."""
  446. # SHA256 is currently the only supported format
  447. # Test SHA1 format (should fail)
  448. pointer_data = (
  449. b"version https://git-lfs.github.com/spec/v1\n"
  450. b"oid sha1:356a192b7913b04c54574d18c28d46e6395428ab\n" # SHA1
  451. b"size 1234\n"
  452. )
  453. pointer = LFSPointer.from_bytes(pointer_data)
  454. # This might be accepted but marked as invalid OID
  455. if pointer:
  456. self.assertFalse(pointer.is_valid_oid())
  457. def test_pointer_size_limits(self) -> None:
  458. """Test size value limits."""
  459. # Test with very large size
  460. pointer_data = (
  461. b"version https://git-lfs.github.com/spec/v1\n"
  462. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  463. b"size 999999999999999999\n" # Very large number
  464. )
  465. pointer = LFSPointer.from_bytes(pointer_data)
  466. self.assertIsNotNone(pointer)
  467. self.assertEqual(pointer.size, 999999999999999999)
  468. # Test with negative size (should fail)
  469. pointer_data = (
  470. b"version https://git-lfs.github.com/spec/v1\n"
  471. b"oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"
  472. b"size -1\n"
  473. )
  474. pointer = LFSPointer.from_bytes(pointer_data)
  475. self.assertIsNone(pointer) # Should fail with negative size
  476. class LFSServerTests(TestCase):
  477. """Tests for the LFS server implementation."""
  478. def setUp(self) -> None:
  479. super().setUp()
  480. import threading
  481. from dulwich.lfs_server import run_lfs_server
  482. # Create temporary directory for LFS storage
  483. self.test_dir = tempfile.mkdtemp()
  484. self.addCleanup(shutil.rmtree, self.test_dir)
  485. # Start LFS server
  486. self.server, self.server_url = run_lfs_server(port=0, lfs_dir=self.test_dir)
  487. self.server_thread = threading.Thread(target=self.server.serve_forever)
  488. self.server_thread.daemon = True
  489. self.server_thread.start()
  490. self.addCleanup(self.server.shutdown)
  491. def test_server_batch_endpoint(self) -> None:
  492. """Test the batch endpoint directly."""
  493. from urllib.request import Request, urlopen
  494. # Create batch request
  495. batch_data = {
  496. "operation": "download",
  497. "transfers": ["basic"],
  498. "objects": [{"oid": "abc123", "size": 100}],
  499. }
  500. req = Request(
  501. f"{self.server_url}/objects/batch",
  502. data=json.dumps(batch_data).encode("utf-8"),
  503. headers={
  504. "Content-Type": "application/vnd.git-lfs+json",
  505. "Accept": "application/vnd.git-lfs+json",
  506. },
  507. method="POST",
  508. )
  509. with urlopen(req) as response:
  510. result = json.loads(response.read())
  511. self.assertIn("objects", result)
  512. self.assertEqual(len(result["objects"]), 1)
  513. self.assertEqual(result["objects"][0]["oid"], "abc123")
  514. self.assertIn("error", result["objects"][0]) # Object doesn't exist
  515. def test_server_upload_download(self) -> None:
  516. """Test uploading and downloading an object."""
  517. import hashlib
  518. from urllib.request import Request, urlopen
  519. test_content = b"test server content"
  520. test_oid = hashlib.sha256(test_content).hexdigest()
  521. # Get upload URL via batch
  522. batch_data = {
  523. "operation": "upload",
  524. "transfers": ["basic"],
  525. "objects": [{"oid": test_oid, "size": len(test_content)}],
  526. }
  527. req = Request(
  528. f"{self.server_url}/objects/batch",
  529. data=json.dumps(batch_data).encode("utf-8"),
  530. headers={
  531. "Content-Type": "application/vnd.git-lfs+json",
  532. "Accept": "application/vnd.git-lfs+json",
  533. },
  534. method="POST",
  535. )
  536. with urlopen(req) as response:
  537. batch_result = json.loads(response.read())
  538. upload_url = batch_result["objects"][0]["actions"]["upload"]["href"]
  539. # Upload the object
  540. upload_req = Request(
  541. upload_url,
  542. data=test_content,
  543. headers={"Content-Type": "application/octet-stream"},
  544. method="PUT",
  545. )
  546. with urlopen(upload_req) as response:
  547. self.assertEqual(response.status, 200)
  548. # Download the object
  549. download_batch_data = {
  550. "operation": "download",
  551. "transfers": ["basic"],
  552. "objects": [{"oid": test_oid, "size": len(test_content)}],
  553. }
  554. req = Request(
  555. f"{self.server_url}/objects/batch",
  556. data=json.dumps(download_batch_data).encode("utf-8"),
  557. headers={
  558. "Content-Type": "application/vnd.git-lfs+json",
  559. "Accept": "application/vnd.git-lfs+json",
  560. },
  561. method="POST",
  562. )
  563. with urlopen(req) as response:
  564. download_batch_result = json.loads(response.read())
  565. download_url = download_batch_result["objects"][0]["actions"]["download"][
  566. "href"
  567. ]
  568. # Download the object
  569. download_req = Request(download_url)
  570. with urlopen(download_req) as response:
  571. downloaded_content = response.read()
  572. self.assertEqual(downloaded_content, test_content)
  573. def test_server_verify_endpoint(self) -> None:
  574. """Test the verify endpoint."""
  575. import hashlib
  576. from urllib.error import HTTPError
  577. from urllib.request import Request, urlopen
  578. test_content = b"verify test"
  579. test_oid = hashlib.sha256(test_content).hexdigest()
  580. # First upload the object
  581. self.server.lfs_store.write_object([test_content])
  582. # Test verify for existing object
  583. verify_req = Request(
  584. f"{self.server_url}/objects/{test_oid}/verify",
  585. data=json.dumps({"oid": test_oid, "size": len(test_content)}).encode(
  586. "utf-8"
  587. ),
  588. headers={"Content-Type": "application/vnd.git-lfs+json"},
  589. method="POST",
  590. )
  591. with urlopen(verify_req) as response:
  592. self.assertEqual(response.status, 200)
  593. # Test verify for non-existent object
  594. fake_oid = "0" * 64
  595. verify_req = Request(
  596. f"{self.server_url}/objects/{fake_oid}/verify",
  597. data=json.dumps({"oid": fake_oid, "size": 100}).encode("utf-8"),
  598. headers={"Content-Type": "application/vnd.git-lfs+json"},
  599. method="POST",
  600. )
  601. with self.assertRaises(HTTPError) as cm:
  602. with urlopen(verify_req):
  603. pass
  604. self.assertEqual(cm.exception.code, 404)
  605. def test_server_invalid_endpoints(self) -> None:
  606. """Test invalid endpoints return 404."""
  607. from urllib.error import HTTPError
  608. from urllib.request import Request, urlopen
  609. # Test invalid GET endpoint
  610. with self.assertRaises(HTTPError) as cm:
  611. with urlopen(f"{self.server_url}/invalid"):
  612. pass
  613. self.assertEqual(cm.exception.code, 404)
  614. # Test invalid POST endpoint
  615. req = Request(f"{self.server_url}/invalid", data=b"test", method="POST")
  616. with self.assertRaises(HTTPError) as cm:
  617. with urlopen(req):
  618. pass
  619. self.assertEqual(cm.exception.code, 404)
  620. def test_server_batch_invalid_operation(self) -> None:
  621. """Test batch endpoint with invalid operation."""
  622. from urllib.error import HTTPError
  623. from urllib.request import Request, urlopen
  624. batch_data = {"operation": "invalid", "transfers": ["basic"], "objects": []}
  625. req = Request(
  626. f"{self.server_url}/objects/batch",
  627. data=json.dumps(batch_data).encode("utf-8"),
  628. headers={"Content-Type": "application/vnd.git-lfs+json"},
  629. method="POST",
  630. )
  631. with self.assertRaises(HTTPError) as cm:
  632. with urlopen(req):
  633. pass
  634. self.assertEqual(cm.exception.code, 400)
  635. def test_server_batch_missing_fields(self) -> None:
  636. """Test batch endpoint with missing required fields."""
  637. from urllib.request import Request, urlopen
  638. # Missing oid
  639. batch_data = {
  640. "operation": "download",
  641. "transfers": ["basic"],
  642. "objects": [{"size": 100}], # Missing oid
  643. }
  644. req = Request(
  645. f"{self.server_url}/objects/batch",
  646. data=json.dumps(batch_data).encode("utf-8"),
  647. headers={"Content-Type": "application/vnd.git-lfs+json"},
  648. method="POST",
  649. )
  650. with urlopen(req) as response:
  651. result = json.loads(response.read())
  652. self.assertIn("error", result["objects"][0])
  653. self.assertIn("Missing oid", result["objects"][0]["error"]["message"])
  654. def test_server_upload_oid_mismatch(self) -> None:
  655. """Test upload with OID mismatch."""
  656. from urllib.error import HTTPError
  657. from urllib.request import Request, urlopen
  658. # Upload with wrong OID
  659. upload_req = Request(
  660. f"{self.server_url}/objects/wrongoid123",
  661. data=b"test content",
  662. headers={"Content-Type": "application/octet-stream"},
  663. method="PUT",
  664. )
  665. with self.assertRaises(HTTPError) as cm:
  666. with urlopen(upload_req):
  667. pass
  668. self.assertEqual(cm.exception.code, 400)
  669. self.assertIn("OID mismatch", cm.exception.read().decode())
  670. def test_server_download_non_existent(self) -> None:
  671. """Test downloading non-existent object."""
  672. from urllib.error import HTTPError
  673. from urllib.request import urlopen
  674. fake_oid = "0" * 64
  675. with self.assertRaises(HTTPError) as cm:
  676. with urlopen(f"{self.server_url}/objects/{fake_oid}"):
  677. pass
  678. self.assertEqual(cm.exception.code, 404)
  679. def test_server_invalid_json(self) -> None:
  680. """Test batch endpoint with invalid JSON."""
  681. from urllib.error import HTTPError
  682. from urllib.request import Request, urlopen
  683. req = Request(
  684. f"{self.server_url}/objects/batch",
  685. data=b"not json",
  686. headers={"Content-Type": "application/vnd.git-lfs+json"},
  687. method="POST",
  688. )
  689. with self.assertRaises(HTTPError) as cm:
  690. with urlopen(req):
  691. pass
  692. self.assertEqual(cm.exception.code, 400)
  693. class LFSClientTests(TestCase):
  694. """Tests for LFS client network operations."""
  695. def setUp(self) -> None:
  696. super().setUp()
  697. import threading
  698. from dulwich.lfs import LFSClient
  699. from dulwich.lfs_server import run_lfs_server
  700. # Create temporary directory for LFS storage
  701. self.test_dir = tempfile.mkdtemp()
  702. self.addCleanup(shutil.rmtree, self.test_dir)
  703. # Start LFS server in a thread
  704. self.server, self.server_url = run_lfs_server(port=0, lfs_dir=self.test_dir)
  705. self.server_thread = threading.Thread(target=self.server.serve_forever)
  706. self.server_thread.daemon = True
  707. self.server_thread.start()
  708. self.addCleanup(self.server.shutdown)
  709. # Create LFS client pointing to our test server
  710. self.client = LFSClient(f"{self.server_url}/objects")
  711. def test_client_url_normalization(self) -> None:
  712. """Test that client URL is normalized correctly."""
  713. from dulwich.lfs import LFSClient
  714. # Test with trailing slash
  715. client = LFSClient("https://example.com/repo.git/info/lfs/")
  716. self.assertEqual(client.url, "https://example.com/repo.git/info/lfs")
  717. # Test without trailing slash
  718. client = LFSClient("https://example.com/repo.git/info/lfs")
  719. self.assertEqual(client.url, "https://example.com/repo.git/info/lfs")
  720. def test_batch_request_format(self) -> None:
  721. """Test batch request formatting."""
  722. # Create an object in the store
  723. test_content = b"test content for batch"
  724. sha = self.server.lfs_store.write_object([test_content])
  725. # Request download batch
  726. result = self.client.batch(
  727. "download", [{"oid": sha, "size": len(test_content)}]
  728. )
  729. self.assertIsNotNone(result.objects)
  730. self.assertEqual(len(result.objects), 1)
  731. self.assertEqual(result.objects[0].oid, sha)
  732. self.assertIsNotNone(result.objects[0].actions)
  733. self.assertIn("download", result.objects[0].actions)
  734. def test_download_with_verification(self) -> None:
  735. """Test download with size and hash verification."""
  736. import hashlib
  737. from dulwich.lfs import LFSError
  738. test_content = b"test content for download"
  739. test_oid = hashlib.sha256(test_content).hexdigest()
  740. # Store the object
  741. sha = self.server.lfs_store.write_object([test_content])
  742. self.assertEqual(sha, test_oid) # Verify SHA calculation
  743. # Download the object
  744. content = self.client.download(test_oid, len(test_content))
  745. self.assertEqual(content, test_content)
  746. # Test size mismatch
  747. with self.assertRaises(LFSError) as cm:
  748. self.client.download(test_oid, 999) # Wrong size
  749. self.assertIn("size", str(cm.exception))
  750. def test_upload_with_verify(self) -> None:
  751. """Test upload with verification step."""
  752. import hashlib
  753. test_content = b"upload test content"
  754. test_oid = hashlib.sha256(test_content).hexdigest()
  755. test_size = len(test_content)
  756. # Upload the object
  757. self.client.upload(test_oid, test_size, test_content)
  758. # Verify it was stored
  759. with self.server.lfs_store.open_object(test_oid) as f:
  760. stored_content = f.read()
  761. self.assertEqual(stored_content, test_content)
  762. def test_upload_already_exists(self) -> None:
  763. """Test upload when object already exists on server."""
  764. import hashlib
  765. test_content = b"existing content"
  766. test_oid = hashlib.sha256(test_content).hexdigest()
  767. # Pre-store the object
  768. self.server.lfs_store.write_object([test_content])
  769. # Upload again - should not raise an error
  770. self.client.upload(test_oid, len(test_content), test_content)
  771. # Verify it's still there
  772. with self.server.lfs_store.open_object(test_oid) as f:
  773. self.assertEqual(f.read(), test_content)
  774. def test_error_handling(self) -> None:
  775. """Test error handling for various scenarios."""
  776. from urllib.error import HTTPError
  777. from dulwich.lfs import LFSError
  778. # Test downloading non-existent object
  779. with self.assertRaises(LFSError) as cm:
  780. self.client.download(
  781. "0000000000000000000000000000000000000000000000000000000000000000", 100
  782. )
  783. self.assertIn("Object not found", str(cm.exception))
  784. # Test uploading with wrong OID
  785. with self.assertRaises(HTTPError) as cm:
  786. self.client.upload("wrong_oid", 5, b"hello")
  787. # Server should reject due to OID mismatch
  788. self.assertIn("OID mismatch", str(cm.exception))