test_sha256_packs.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. # test_sha256_packs.py -- Compatibility tests for SHA256 pack files
  2. # Copyright (C) 2024 The Dulwich contributors
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for SHA256 pack files with git command line tools."""
  22. import os
  23. import tempfile
  24. from dulwich.object_format import SHA256
  25. from dulwich.objects import Blob, Commit, Tree
  26. from dulwich.pack import load_pack_index_file
  27. from dulwich.repo import Repo
  28. from .utils import CompatTestCase, run_git_or_fail
  29. class GitSHA256PackCompatibilityTests(CompatTestCase):
  30. """Test SHA256 pack file compatibility with git command line tools."""
  31. min_git_version = (2, 29, 0)
  32. def _run_git(self, args, cwd=None):
  33. """Run git command in the specified directory."""
  34. return run_git_or_fail(args, cwd=cwd)
  35. def test_git_pack_readable_by_dulwich(self):
  36. """Test that git-created SHA256 pack files are readable by dulwich."""
  37. # Create SHA256 repo with git
  38. repo_path = tempfile.mkdtemp()
  39. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  40. self._run_git(["init", "--object-format=sha256", repo_path])
  41. # Create multiple files to ensure pack creation
  42. for i in range(20):
  43. test_file = os.path.join(repo_path, f"file{i}.txt")
  44. with open(test_file, "w") as f:
  45. f.write(f"Content for file {i}\n")
  46. self._run_git(["add", "."], cwd=repo_path)
  47. self._run_git(["commit", "-m", "Add 20 files"], cwd=repo_path)
  48. # Force pack creation
  49. self._run_git(["gc"], cwd=repo_path)
  50. # Open with dulwich
  51. repo = Repo(repo_path)
  52. self.assertEqual(repo.object_format, SHA256)
  53. # Find pack files
  54. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  55. pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
  56. self.assertGreater(len(pack_files), 0, "No pack files created")
  57. # Read pack with dulwich
  58. for pack_file in pack_files:
  59. pack_path = os.path.join(pack_dir, pack_file)
  60. idx_path = pack_path[:-5] + ".idx"
  61. # Load pack index with SHA256 algorithm
  62. with open(idx_path, "rb") as f:
  63. pack_idx = load_pack_index_file(
  64. idx_path, f, object_format=repo.object_format
  65. )
  66. # Verify it's detected as SHA256
  67. self.assertEqual(pack_idx.hash_size, 32)
  68. # Verify we can iterate objects
  69. obj_count = 0
  70. for sha, offset, crc32 in pack_idx.iterentries():
  71. self.assertEqual(len(sha), 32) # SHA256
  72. obj_count += 1
  73. self.assertGreater(obj_count, 20) # At least our files + trees + commit
  74. # Verify we can read all objects through the repo interface
  75. head_ref = repo.refs[b"refs/heads/master"]
  76. commit = repo[head_ref]
  77. self.assertIsInstance(commit, Commit)
  78. # Read the tree
  79. tree = repo[commit.tree]
  80. self.assertIsInstance(tree, Tree)
  81. # Verify all files are there
  82. file_count = 0
  83. for name, mode, sha in tree.items():
  84. if name.startswith(b"file") and name.endswith(b".txt"):
  85. file_count += 1
  86. # Read the blob
  87. blob = repo[sha]
  88. self.assertIsInstance(blob, Blob)
  89. self.assertEqual(file_count, 20)
  90. repo.close()
  91. def test_dulwich_objects_readable_by_git(self):
  92. """Test that dulwich-created SHA256 objects are readable by git."""
  93. # Create SHA256 repo with dulwich
  94. repo_path = tempfile.mkdtemp()
  95. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  96. repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
  97. # Create objects
  98. blobs = []
  99. for i in range(10):
  100. blob = Blob.from_string(f"Dulwich blob content {i}".encode())
  101. repo.object_store.add_object(blob)
  102. blobs.append(blob)
  103. # Create a tree with all blobs
  104. tree = Tree()
  105. for i, blob in enumerate(blobs):
  106. tree.add(f"blob{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
  107. repo.object_store.add_object(tree)
  108. # Create a commit
  109. commit = Commit()
  110. commit.tree = tree.get_id(SHA256)
  111. commit.author = commit.committer = b"Dulwich Test <test@dulwich.org>"
  112. commit.commit_time = commit.author_time = 1234567890
  113. commit.commit_timezone = commit.author_timezone = 0
  114. commit.message = b"Test commit with blobs"
  115. repo.object_store.add_object(commit)
  116. # Update HEAD
  117. repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
  118. repo.close()
  119. # Verify git can read all objects
  120. output = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
  121. self.assertEqual(len(output.strip()), 64) # SHA256
  122. # List tree contents
  123. tree_output = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
  124. # Count lines instead of occurrences of "blob" since "blob" appears twice per line
  125. lines = tree_output.strip().split(b"\n")
  126. self.assertEqual(len(lines), 10)
  127. # Verify git can check out the content
  128. self._run_git(["checkout", "HEAD", "--", "."], cwd=repo_path)
  129. # Verify files exist with correct content
  130. for i in range(10):
  131. file_path = os.path.join(repo_path, f"blob{i}.txt")
  132. self.assertTrue(os.path.exists(file_path))
  133. with open(file_path, "rb") as f:
  134. content = f.read()
  135. self.assertEqual(content, f"Dulwich blob content {i}".encode())
  136. def test_pack_index_v1_interop(self):
  137. """Test pack index v1 interoperability with SHA256."""
  138. # Create repo with git using pack index v1
  139. repo_path = tempfile.mkdtemp()
  140. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  141. self._run_git(["init", "--object-format=sha256", repo_path])
  142. self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
  143. # Create files
  144. for i in range(10):
  145. test_file = os.path.join(repo_path, f"v1test{i}.txt")
  146. with open(test_file, "w") as f:
  147. f.write(f"Pack v1 test {i}\n")
  148. self._run_git(["add", "."], cwd=repo_path)
  149. self._run_git(["commit", "-m", "Test pack v1"], cwd=repo_path)
  150. self._run_git(["gc"], cwd=repo_path)
  151. # Read with dulwich
  152. repo = Repo(repo_path)
  153. # Find pack index
  154. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  155. idx_files = [f for f in os.listdir(pack_dir) if f.endswith(".idx")]
  156. for idx_file in idx_files:
  157. idx_path = os.path.join(pack_dir, idx_file)
  158. with open(idx_path, "rb") as f:
  159. pack_idx = load_pack_index_file(
  160. idx_path, f, object_format=repo.object_format
  161. )
  162. # Verify it's v1 with SHA256
  163. self.assertEqual(pack_idx.version, 1)
  164. self.assertEqual(pack_idx.hash_size, 32)
  165. # Verify we can iterate
  166. for sha, offset, crc32 in pack_idx.iterentries():
  167. self.assertEqual(len(sha), 32)
  168. self.assertIsNone(crc32) # v1 doesn't store CRC32
  169. repo.close()
  170. def test_large_pack_interop(self):
  171. """Test large pack file interoperability."""
  172. # Create repo with dulwich
  173. repo_path = tempfile.mkdtemp()
  174. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  175. repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
  176. # Create a large file that will use delta compression
  177. large_content = b"A" * 10000
  178. blobs = []
  179. # Create similar blobs to trigger delta compression
  180. for i in range(10):
  181. content = large_content + f" variation {i}".encode()
  182. blob = Blob.from_string(content)
  183. repo.object_store.add_object(blob)
  184. blobs.append(blob)
  185. # Create tree
  186. tree = Tree()
  187. for i, blob in enumerate(blobs):
  188. tree.add(f"large{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
  189. repo.object_store.add_object(tree)
  190. # Create commit
  191. commit = Commit()
  192. commit.tree = tree.get_id(SHA256)
  193. commit.author = commit.committer = b"Test <test@example.com>"
  194. commit.commit_time = commit.author_time = 1234567890
  195. commit.commit_timezone = commit.author_timezone = 0
  196. commit.message = b"Large files for delta compression test"
  197. repo.object_store.add_object(commit)
  198. repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
  199. repo.close()
  200. # Run git gc to create packs with delta compression
  201. self._run_git(["gc", "--aggressive"], cwd=repo_path)
  202. # Verify git created a pack
  203. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  204. pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
  205. self.assertGreater(len(pack_files), 0)
  206. # Re-open with dulwich and verify we can read everything
  207. repo = Repo(repo_path)
  208. head = repo.refs[b"refs/heads/master"]
  209. commit = repo[head]
  210. tree = repo[commit.tree]
  211. # Read all blobs
  212. for i in range(10):
  213. name = f"large{i}.txt".encode()
  214. mode, sha = tree[name]
  215. blob = repo[sha]
  216. expected = large_content + f" variation {i}".encode()
  217. self.assertEqual(blob.data, expected)
  218. repo.close()
  219. def test_mixed_loose_packed_objects(self):
  220. """Test repositories with both loose and packed objects."""
  221. # Create repo with git
  222. repo_path = tempfile.mkdtemp()
  223. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  224. self._run_git(["init", "--object-format=sha256", repo_path])
  225. # Create initial objects that will be packed
  226. for i in range(5):
  227. test_file = os.path.join(repo_path, f"packed{i}.txt")
  228. with open(test_file, "w") as f:
  229. f.write(f"Will be packed {i}\n")
  230. self._run_git(["add", "."], cwd=repo_path)
  231. self._run_git(["commit", "-m", "Initial packed objects"], cwd=repo_path)
  232. self._run_git(["gc"], cwd=repo_path)
  233. # Create more objects that will remain loose
  234. for i in range(5):
  235. test_file = os.path.join(repo_path, f"loose{i}.txt")
  236. with open(test_file, "w") as f:
  237. f.write(f"Will stay loose {i}\n")
  238. self._run_git(["add", "."], cwd=repo_path)
  239. self._run_git(["commit", "-m", "Loose objects"], cwd=repo_path)
  240. # Open with dulwich
  241. repo = Repo(repo_path)
  242. # Count objects in packs vs loose
  243. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  244. pack_count = len([f for f in os.listdir(pack_dir) if f.endswith(".pack")])
  245. self.assertGreater(pack_count, 0)
  246. # Verify we can read all objects
  247. head = repo.refs[b"refs/heads/master"]
  248. commit = repo[head]
  249. # Walk the commit history
  250. commit_count = 0
  251. while commit.parents:
  252. commit_count += 1
  253. tree = repo[commit.tree]
  254. # Verify we can read the tree
  255. self.assertGreater(len(tree), 0)
  256. if commit.parents:
  257. commit = repo[commit.parents[0]]
  258. else:
  259. break
  260. self.assertEqual(commit_count, 1) # We made 2 commits total
  261. repo.close()
  262. if __name__ == "__main__":
  263. import unittest
  264. unittest.main()