test_sha256_packs.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. # test_sha256_packs.py -- Compatibility tests for SHA256 pack files
  2. # Copyright (C) 2024 The Dulwich contributors
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for SHA256 pack files with git command line tools."""
  22. import os
  23. import tempfile
  24. from dulwich.object_format import SHA256
  25. from dulwich.objects import Blob, Commit, Tree
  26. from dulwich.pack import load_pack_index_file
  27. from dulwich.repo import Repo
  28. from .utils import CompatTestCase, rmtree_ro, run_git_or_fail
  29. class GitSHA256PackCompatibilityTests(CompatTestCase):
  30. """Test SHA256 pack file compatibility with git command line tools."""
  31. min_git_version = (2, 29, 0)
  32. def _run_git(self, args, cwd=None):
  33. """Run git command in the specified directory."""
  34. return run_git_or_fail(args, cwd=cwd)
  35. def test_git_pack_readable_by_dulwich(self):
  36. """Test that git-created SHA256 pack files are readable by dulwich."""
  37. # Create SHA256 repo with git
  38. repo_path = tempfile.mkdtemp()
  39. self.addCleanup(rmtree_ro, repo_path)
  40. self._run_git(["init", "--object-format=sha256", repo_path])
  41. # Create multiple files to ensure pack creation
  42. for i in range(20):
  43. test_file = os.path.join(repo_path, f"file{i}.txt")
  44. with open(test_file, "w") as f:
  45. f.write(f"Content for file {i}\n")
  46. self._run_git(["add", "."], cwd=repo_path)
  47. self._run_git(["commit", "-m", "Add 20 files"], cwd=repo_path)
  48. # Force pack creation
  49. self._run_git(["gc"], cwd=repo_path)
  50. # Open with dulwich
  51. repo = Repo(repo_path)
  52. self.assertEqual(repo.object_format, SHA256)
  53. # Find pack files
  54. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  55. pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
  56. self.assertGreater(len(pack_files), 0, "No pack files created")
  57. # Read pack with dulwich
  58. for pack_file in pack_files:
  59. pack_path = os.path.join(pack_dir, pack_file)
  60. idx_path = pack_path[:-5] + ".idx"
  61. # Load pack index with SHA256 algorithm
  62. with open(idx_path, "rb") as f:
  63. pack_idx = load_pack_index_file(idx_path, f, SHA256)
  64. # Verify it's detected as SHA256
  65. self.assertEqual(pack_idx.hash_size, 32)
  66. # Verify we can iterate objects
  67. obj_count = 0
  68. for sha, offset, crc32 in pack_idx.iterentries():
  69. self.assertEqual(len(sha), 32) # SHA256
  70. obj_count += 1
  71. self.assertGreater(obj_count, 20) # At least our files + trees + commit
  72. # Verify we can read all objects through the repo interface
  73. head_ref = repo.refs[b"refs/heads/master"]
  74. commit = repo[head_ref]
  75. self.assertIsInstance(commit, Commit)
  76. # Read the tree
  77. tree = repo[commit.tree]
  78. self.assertIsInstance(tree, Tree)
  79. # Verify all files are there
  80. file_count = 0
  81. for name, mode, sha in tree.items():
  82. if name.startswith(b"file") and name.endswith(b".txt"):
  83. file_count += 1
  84. # Read the blob
  85. blob = repo[sha]
  86. self.assertIsInstance(blob, Blob)
  87. self.assertEqual(file_count, 20)
  88. repo.close()
  89. def test_dulwich_objects_readable_by_git(self):
  90. """Test that dulwich-created SHA256 objects are readable by git."""
  91. # Create SHA256 repo with dulwich
  92. repo_path = tempfile.mkdtemp()
  93. self.addCleanup(rmtree_ro, repo_path)
  94. repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
  95. # Create objects
  96. blobs = []
  97. for i in range(10):
  98. blob = Blob.from_string(f"Dulwich blob content {i}".encode())
  99. repo.object_store.add_object(blob)
  100. blobs.append(blob)
  101. # Create a tree with all blobs
  102. tree = Tree()
  103. for i, blob in enumerate(blobs):
  104. tree.add(f"blob{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
  105. repo.object_store.add_object(tree)
  106. # Create a commit
  107. commit = Commit()
  108. commit.tree = tree.get_id(SHA256)
  109. commit.author = commit.committer = b"Dulwich Test <test@dulwich.org>"
  110. commit.commit_time = commit.author_time = 1234567890
  111. commit.commit_timezone = commit.author_timezone = 0
  112. commit.message = b"Test commit with blobs"
  113. repo.object_store.add_object(commit)
  114. # Update HEAD
  115. repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
  116. repo.close()
  117. # Verify git can read all objects
  118. output = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
  119. self.assertEqual(len(output.strip()), 64) # SHA256
  120. # List tree contents
  121. tree_output = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
  122. # Count lines instead of occurrences of "blob" since "blob" appears twice per line
  123. lines = tree_output.strip().split(b"\n")
  124. self.assertEqual(len(lines), 10)
  125. # Verify git can check out the content
  126. self._run_git(["checkout", "HEAD", "--", "."], cwd=repo_path)
  127. # Verify files exist with correct content
  128. for i in range(10):
  129. file_path = os.path.join(repo_path, f"blob{i}.txt")
  130. self.assertTrue(os.path.exists(file_path))
  131. with open(file_path, "rb") as f:
  132. content = f.read()
  133. self.assertEqual(content, f"Dulwich blob content {i}".encode())
  134. def test_large_pack_interop(self):
  135. """Test large pack file interoperability."""
  136. # Create repo with dulwich
  137. repo_path = tempfile.mkdtemp()
  138. self.addCleanup(rmtree_ro, repo_path)
  139. repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
  140. # Create a large file that will use delta compression
  141. large_content = b"A" * 10000
  142. blobs = []
  143. # Create similar blobs to trigger delta compression
  144. for i in range(10):
  145. content = large_content + f" variation {i}".encode()
  146. blob = Blob.from_string(content)
  147. repo.object_store.add_object(blob)
  148. blobs.append(blob)
  149. # Create tree
  150. tree = Tree()
  151. for i, blob in enumerate(blobs):
  152. tree.add(f"large{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
  153. repo.object_store.add_object(tree)
  154. # Create commit
  155. commit = Commit()
  156. commit.tree = tree.get_id(SHA256)
  157. commit.author = commit.committer = b"Test <test@example.com>"
  158. commit.commit_time = commit.author_time = 1234567890
  159. commit.commit_timezone = commit.author_timezone = 0
  160. commit.message = b"Large files for delta compression test"
  161. repo.object_store.add_object(commit)
  162. repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
  163. repo.close()
  164. # Run git gc to create packs with delta compression
  165. self._run_git(["gc", "--aggressive"], cwd=repo_path)
  166. # Verify git created a pack
  167. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  168. pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
  169. self.assertGreater(len(pack_files), 0)
  170. # Re-open with dulwich and verify we can read everything
  171. repo = Repo(repo_path)
  172. head = repo.refs[b"refs/heads/master"]
  173. commit = repo[head]
  174. tree = repo[commit.tree]
  175. # Read all blobs
  176. for i in range(10):
  177. name = f"large{i}.txt".encode()
  178. _mode, sha = tree[name]
  179. blob = repo[sha]
  180. expected = large_content + f" variation {i}".encode()
  181. self.assertEqual(blob.data, expected)
  182. repo.close()
  183. def test_mixed_loose_packed_objects(self):
  184. """Test repositories with both loose and packed objects."""
  185. # Create repo with git
  186. repo_path = tempfile.mkdtemp()
  187. self.addCleanup(rmtree_ro, repo_path)
  188. self._run_git(["init", "--object-format=sha256", repo_path])
  189. # Create initial objects that will be packed
  190. for i in range(5):
  191. test_file = os.path.join(repo_path, f"packed{i}.txt")
  192. with open(test_file, "w") as f:
  193. f.write(f"Will be packed {i}\n")
  194. self._run_git(["add", "."], cwd=repo_path)
  195. self._run_git(["commit", "-m", "Initial packed objects"], cwd=repo_path)
  196. self._run_git(["gc"], cwd=repo_path)
  197. # Create more objects that will remain loose
  198. for i in range(5):
  199. test_file = os.path.join(repo_path, f"loose{i}.txt")
  200. with open(test_file, "w") as f:
  201. f.write(f"Will stay loose {i}\n")
  202. self._run_git(["add", "."], cwd=repo_path)
  203. self._run_git(["commit", "-m", "Loose objects"], cwd=repo_path)
  204. # Open with dulwich
  205. repo = Repo(repo_path)
  206. # Count objects in packs vs loose
  207. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  208. pack_count = len([f for f in os.listdir(pack_dir) if f.endswith(".pack")])
  209. self.assertGreater(pack_count, 0)
  210. # Verify we can read all objects
  211. head = repo.refs[b"refs/heads/master"]
  212. commit = repo[head]
  213. # Walk the commit history
  214. commit_count = 0
  215. while commit.parents:
  216. commit_count += 1
  217. tree = repo[commit.tree]
  218. # Verify we can read the tree
  219. self.assertGreater(len(tree), 0)
  220. if commit.parents:
  221. commit = repo[commit.parents[0]]
  222. else:
  223. break
  224. self.assertEqual(commit_count, 1) # We made 2 commits total
  225. repo.close()
  226. if __name__ == "__main__":
  227. import unittest
  228. unittest.main()