test_sha256_packs.py 12 KB


  1. # test_sha256_packs.py -- Compatibility tests for SHA256 pack files
  2. # Copyright (C) 2024 The Dulwich contributors
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for SHA256 pack files with git command line tools."""
  22. import os
  23. import tempfile
  24. from dulwich.hash import SHA256
  25. from dulwich.objects import Blob, Commit, Tree
  26. from dulwich.pack import load_pack_index_file
  27. from dulwich.repo import Repo
  28. from .utils import CompatTestCase, run_git_or_fail
  29. class GitSHA256PackCompatibilityTests(CompatTestCase):
  30. """Test SHA256 pack file compatibility with git command line tools."""
  31. min_git_version = (2, 29, 0)
  32. def _run_git(self, args, cwd=None):
  33. """Run git command in the specified directory."""
  34. return run_git_or_fail(args, cwd=cwd)
  35. def test_git_pack_readable_by_dulwich(self):
  36. """Test that git-created SHA256 pack files are readable by dulwich."""
  37. # Create SHA256 repo with git
  38. repo_path = tempfile.mkdtemp()
  39. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  40. self._run_git(["init", "--object-format=sha256", repo_path])
  41. # Create multiple files to ensure pack creation
  42. for i in range(20):
  43. test_file = os.path.join(repo_path, f"file{i}.txt")
  44. with open(test_file, "w") as f:
  45. f.write(f"Content for file {i}\n")
  46. self._run_git(["add", "."], cwd=repo_path)
  47. self._run_git(["commit", "-m", "Add 20 files"], cwd=repo_path)
  48. # Force pack creation
  49. self._run_git(["gc"], cwd=repo_path)
  50. # Open with dulwich
  51. repo = Repo(repo_path)
  52. self.assertEqual(repo.get_hash_algorithm(), SHA256)
  53. # Find pack files
  54. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  55. pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
  56. self.assertGreater(len(pack_files), 0, "No pack files created")
  57. # Read pack with dulwich
  58. for pack_file in pack_files:
  59. pack_path = os.path.join(pack_dir, pack_file)
  60. idx_path = pack_path[:-5] + ".idx"
  61. # Load pack index with SHA256 algorithm
  62. with open(idx_path, "rb") as f:
  63. pack_idx = load_pack_index_file(
  64. idx_path, f, hash_algorithm=repo.get_hash_algorithm()
  65. )
  66. # Verify it's detected as SHA256
  67. self.assertEqual(pack_idx.hash_size, 32)
  68. # Verify we can iterate objects
  69. obj_count = 0
  70. for sha, offset, crc32 in pack_idx.iterentries():
  71. self.assertEqual(len(sha), 32) # SHA256
  72. obj_count += 1
  73. self.assertGreater(obj_count, 20) # At least our files + trees + commit
  74. # Verify we can read all objects through the repo interface
  75. head_ref = repo.refs[b"refs/heads/master"]
  76. commit = repo[head_ref]
  77. self.assertIsInstance(commit, Commit)
  78. # Read the tree
  79. tree = repo[commit.tree]
  80. self.assertIsInstance(tree, Tree)
  81. # Verify all files are there
  82. file_count = 0
  83. for name, mode, sha in tree.items():
  84. if name.startswith(b"file") and name.endswith(b".txt"):
  85. file_count += 1
  86. # Read the blob
  87. blob = repo[sha]
  88. self.assertIsInstance(blob, Blob)
  89. self.assertEqual(file_count, 20)
  90. repo.close()
  91. def test_dulwich_objects_readable_by_git(self):
  92. """Test that dulwich-created SHA256 objects are readable by git."""
  93. # Create SHA256 repo with dulwich
  94. repo_path = tempfile.mkdtemp()
  95. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  96. repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
  97. # Create objects
  98. blobs = []
  99. for i in range(10):
  100. blob = Blob.from_string(f"Dulwich blob content {i}".encode())
  101. repo.object_store.add_object(blob)
  102. blobs.append(blob)
  103. # Create a tree with all blobs
  104. tree = Tree()
  105. for i, blob in enumerate(blobs):
  106. tree.add(f"blob{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
  107. repo.object_store.add_object(tree)
  108. # Create a commit
  109. commit = Commit()
  110. commit.tree = tree.get_id(SHA256)
  111. commit.author = commit.committer = b"Dulwich Test <test@dulwich.org>"
  112. commit.commit_time = commit.author_time = 1234567890
  113. commit.commit_timezone = commit.author_timezone = 0
  114. commit.message = b"Test commit with blobs"
  115. repo.object_store.add_object(commit)
  116. # Update HEAD
  117. repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
  118. repo.close()
  119. # Verify git can read all objects
  120. output = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
  121. self.assertEqual(len(output.strip()), 64) # SHA256
  122. # List tree contents
  123. tree_output = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
  124. # Count lines instead of occurrences of "blob" since "blob" appears twice per line
  125. lines = tree_output.strip().split(b"\n")
  126. self.assertEqual(len(lines), 10)
  127. # Verify git can check out the content
  128. self._run_git(["checkout", "HEAD", "--", "."], cwd=repo_path)
  129. # Verify files exist with correct content
  130. for i in range(10):
  131. file_path = os.path.join(repo_path, f"blob{i}.txt")
  132. self.assertTrue(os.path.exists(file_path))
  133. with open(file_path, "rb") as f:
  134. content = f.read()
  135. self.assertEqual(content, f"Dulwich blob content {i}".encode())
  136. def test_pack_index_v1_interop(self):
  137. """Test pack index v1 interoperability with SHA256."""
  138. # Create repo with git using pack index v1
  139. repo_path = tempfile.mkdtemp()
  140. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  141. self._run_git(["init", "--object-format=sha256", repo_path])
  142. self._run_git(["config", "pack.indexVersion", "1"], cwd=repo_path)
  143. # Create files
  144. for i in range(10):
  145. test_file = os.path.join(repo_path, f"v1test{i}.txt")
  146. with open(test_file, "w") as f:
  147. f.write(f"Pack v1 test {i}\n")
  148. self._run_git(["add", "."], cwd=repo_path)
  149. self._run_git(["commit", "-m", "Test pack v1"], cwd=repo_path)
  150. self._run_git(["gc"], cwd=repo_path)
  151. # Read with dulwich
  152. repo = Repo(repo_path)
  153. # Find pack index
  154. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  155. idx_files = [f for f in os.listdir(pack_dir) if f.endswith(".idx")]
  156. for idx_file in idx_files:
  157. idx_path = os.path.join(pack_dir, idx_file)
  158. with open(idx_path, "rb") as f:
  159. pack_idx = load_pack_index_file(
  160. idx_path, f, hash_algorithm=repo.get_hash_algorithm()
  161. )
  162. # Verify it's v1 with SHA256
  163. self.assertEqual(pack_idx.version, 1)
  164. self.assertEqual(pack_idx.hash_size, 32)
  165. # Verify we can iterate
  166. for sha, offset, crc32 in pack_idx.iterentries():
  167. self.assertEqual(len(sha), 32)
  168. self.assertIsNone(crc32) # v1 doesn't store CRC32
  169. repo.close()
  170. def test_large_pack_interop(self):
  171. """Test large pack file interoperability."""
  172. # Create repo with dulwich
  173. repo_path = tempfile.mkdtemp()
  174. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  175. repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
  176. # Create a large file that will use delta compression
  177. large_content = b"A" * 10000
  178. blobs = []
  179. # Create similar blobs to trigger delta compression
  180. for i in range(10):
  181. content = large_content + f" variation {i}".encode()
  182. blob = Blob.from_string(content)
  183. repo.object_store.add_object(blob)
  184. blobs.append(blob)
  185. # Create tree
  186. tree = Tree()
  187. for i, blob in enumerate(blobs):
  188. tree.add(f"large{i}.txt".encode(), 0o100644, blob.get_id(SHA256))
  189. repo.object_store.add_object(tree)
  190. # Create commit
  191. commit = Commit()
  192. commit.tree = tree.get_id(SHA256)
  193. commit.author = commit.committer = b"Test <test@example.com>"
  194. commit.commit_time = commit.author_time = 1234567890
  195. commit.commit_timezone = commit.author_timezone = 0
  196. commit.message = b"Large files for delta compression test"
  197. repo.object_store.add_object(commit)
  198. repo.refs[b"refs/heads/master"] = commit.get_id(SHA256)
  199. repo.close()
  200. # Run git gc to create packs with delta compression
  201. self._run_git(["gc", "--aggressive"], cwd=repo_path)
  202. # Verify git created a pack
  203. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  204. pack_files = [f for f in os.listdir(pack_dir) if f.endswith(".pack")]
  205. self.assertGreater(len(pack_files), 0)
  206. # Re-open with dulwich and verify we can read everything
  207. repo = Repo(repo_path)
  208. head = repo.refs[b"refs/heads/master"]
  209. commit = repo[head]
  210. tree = repo[commit.tree]
  211. # Read all blobs
  212. for i in range(10):
  213. name = f"large{i}.txt".encode()
  214. mode, sha = tree[name]
  215. blob = repo[sha]
  216. expected = large_content + f" variation {i}".encode()
  217. self.assertEqual(blob.data, expected)
  218. repo.close()
  219. def test_mixed_loose_packed_objects(self):
  220. """Test repositories with both loose and packed objects."""
  221. # Create repo with git
  222. repo_path = tempfile.mkdtemp()
  223. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  224. self._run_git(["init", "--object-format=sha256", repo_path])
  225. # Create initial objects that will be packed
  226. for i in range(5):
  227. test_file = os.path.join(repo_path, f"packed{i}.txt")
  228. with open(test_file, "w") as f:
  229. f.write(f"Will be packed {i}\n")
  230. self._run_git(["add", "."], cwd=repo_path)
  231. self._run_git(["commit", "-m", "Initial packed objects"], cwd=repo_path)
  232. self._run_git(["gc"], cwd=repo_path)
  233. # Create more objects that will remain loose
  234. for i in range(5):
  235. test_file = os.path.join(repo_path, f"loose{i}.txt")
  236. with open(test_file, "w") as f:
  237. f.write(f"Will stay loose {i}\n")
  238. self._run_git(["add", "."], cwd=repo_path)
  239. self._run_git(["commit", "-m", "Loose objects"], cwd=repo_path)
  240. # Open with dulwich
  241. repo = Repo(repo_path)
  242. # Count objects in packs vs loose
  243. pack_dir = os.path.join(repo_path, ".git", "objects", "pack")
  244. pack_count = len([f for f in os.listdir(pack_dir) if f.endswith(".pack")])
  245. self.assertGreater(pack_count, 0)
  246. # Verify we can read all objects
  247. head = repo.refs[b"refs/heads/master"]
  248. commit = repo[head]
  249. # Walk the commit history
  250. commit_count = 0
  251. while commit.parents:
  252. commit_count += 1
  253. tree = repo[commit.tree]
  254. # Verify we can read the tree
  255. self.assertGreater(len(tree), 0)
  256. if commit.parents:
  257. commit = repo[commit.parents[0]]
  258. else:
  259. break
  260. self.assertEqual(commit_count, 1) # We made 2 commits total
  261. repo.close()
  262. if __name__ == "__main__":
  263. import unittest
  264. unittest.main()