test_sha256.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. # test_sha256.py -- Compatibility tests for SHA256 support
  2. # Copyright (C) 2024 The Dulwich contributors
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for SHA256 support with git command line tools."""
  22. import os
  23. import tempfile
  24. from dulwich.hash import SHA256
  25. from dulwich.objects import Blob, Commit, Tree
  26. from dulwich.repo import Repo
  27. from .utils import CompatTestCase, run_git_or_fail
  28. class GitSHA256CompatibilityTests(CompatTestCase):
  29. """Test SHA256 compatibility with git command line tools."""
  30. min_git_version = (2, 29, 0)
  31. def _run_git(self, args, cwd=None):
  32. """Run git command in the specified directory."""
  33. return run_git_or_fail(args, cwd=cwd)
  34. def test_sha256_repo_creation_compat(self):
  35. """Test that dulwich-created SHA256 repos are readable by git."""
  36. # Create SHA256 repo with dulwich
  37. repo_path = tempfile.mkdtemp()
  38. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  39. repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
  40. # Add a blob and tree using dulwich
  41. blob = Blob.from_string(b"Hello SHA256 world!")
  42. tree = Tree()
  43. tree.add(b"hello.txt", 0o100644, blob.get_id(SHA256))
  44. # Create objects in the repository
  45. object_store = repo.object_store
  46. object_store.add_object(blob)
  47. object_store.add_object(tree)
  48. repo.close()
  49. # Verify git can read the repository
  50. config_output = self._run_git(
  51. ["config", "--get", "extensions.objectformat"], cwd=repo_path
  52. )
  53. self.assertEqual(config_output.strip(), b"sha256")
  54. # Verify git recognizes it as a SHA256 repository
  55. rev_parse_output = self._run_git(
  56. ["rev-parse", "--show-object-format"], cwd=repo_path
  57. )
  58. self.assertEqual(rev_parse_output.strip(), b"sha256")
  59. def test_git_created_sha256_repo_readable(self):
  60. """Test that git-created SHA256 repos are readable by dulwich."""
  61. # Create SHA256 repo with git
  62. repo_path = tempfile.mkdtemp()
  63. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  64. self._run_git(["init", "--object-format=sha256", repo_path])
  65. # Create a file and commit with git
  66. test_file = os.path.join(repo_path, "test.txt")
  67. with open(test_file, "w") as f:
  68. f.write("Test SHA256 content")
  69. self._run_git(["add", "test.txt"], cwd=repo_path)
  70. self._run_git(["commit", "-m", "Test SHA256 commit"], cwd=repo_path)
  71. # Read with dulwich
  72. repo = Repo(repo_path)
  73. # Verify dulwich detects SHA256
  74. hash_alg = repo.get_hash_algorithm()
  75. self.assertEqual(hash_alg, SHA256)
  76. # Verify dulwich can read objects
  77. # Try both main and master branches (git default changed over time)
  78. try:
  79. head_ref = repo.refs[b"refs/heads/main"]
  80. except KeyError:
  81. head_ref = repo.refs[b"refs/heads/master"]
  82. self.assertEqual(len(head_ref), 64) # SHA256 length
  83. # Read the commit object
  84. commit = repo[head_ref]
  85. self.assertIsInstance(commit, Commit)
  86. self.assertEqual(len(commit.tree), 64) # SHA256 tree ID
  87. repo.close()
  88. def test_object_hashing_consistency(self):
  89. """Test that object hashing is consistent between dulwich and git."""
  90. # Create SHA256 repo with git
  91. repo_path = tempfile.mkdtemp()
  92. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  93. self._run_git(["init", "--object-format=sha256", repo_path])
  94. # Create a test file with known content
  95. test_content = b"Test content for SHA256 hashing consistency"
  96. test_file = os.path.join(repo_path, "test.txt")
  97. with open(test_file, "wb") as f:
  98. f.write(test_content)
  99. # Get git's hash for the content
  100. git_hash = self._run_git(["hash-object", "test.txt"], cwd=repo_path)
  101. git_hash = git_hash.strip().decode("ascii")
  102. # Create same blob with dulwich
  103. blob = Blob.from_string(test_content)
  104. dulwich_hash = blob.get_id(SHA256).decode("ascii")
  105. # Hashes should match
  106. self.assertEqual(git_hash, dulwich_hash)
  107. def test_tree_hashing_consistency(self):
  108. """Test that tree hashing is consistent between dulwich and git."""
  109. # Create SHA256 repo with git
  110. repo_path = tempfile.mkdtemp()
  111. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  112. self._run_git(["init", "--object-format=sha256", repo_path])
  113. # Create a test file and add to index
  114. test_content = b"Tree test content"
  115. test_file = os.path.join(repo_path, "tree_test.txt")
  116. with open(test_file, "wb") as f:
  117. f.write(test_content)
  118. self._run_git(["add", "tree_test.txt"], cwd=repo_path)
  119. # Get git's tree hash
  120. git_tree_hash = self._run_git(["write-tree"], cwd=repo_path)
  121. git_tree_hash = git_tree_hash.strip().decode("ascii")
  122. # Create same tree with dulwich
  123. blob = Blob.from_string(test_content)
  124. tree = Tree()
  125. tree.add(b"tree_test.txt", 0o100644, blob.get_id(SHA256))
  126. dulwich_tree_hash = tree.get_id(SHA256).decode("ascii")
  127. # Tree hashes should match
  128. self.assertEqual(git_tree_hash, dulwich_tree_hash)
  129. def test_commit_creation_interop(self):
  130. """Test commit creation interoperability between dulwich and git."""
  131. # Create SHA256 repo with dulwich
  132. repo_path = tempfile.mkdtemp()
  133. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  134. repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
  135. # Create objects with dulwich
  136. blob = Blob.from_string(b"Interop test content")
  137. tree = Tree()
  138. tree.add(b"interop.txt", 0o100644, blob.get_id(SHA256))
  139. commit = Commit()
  140. commit.tree = tree.get_id(SHA256)
  141. commit.author = commit.committer = b"Test User <test@example.com>"
  142. commit.commit_time = commit.author_time = 1234567890
  143. commit.commit_timezone = commit.author_timezone = 0
  144. commit.message = b"Test SHA256 commit from dulwich"
  145. # Add objects to repo
  146. object_store = repo.object_store
  147. object_store.add_object(blob)
  148. object_store.add_object(tree)
  149. object_store.add_object(commit)
  150. # Update HEAD
  151. commit_id = commit.get_id(SHA256)
  152. repo.refs[b"refs/heads/master"] = commit_id
  153. repo.close()
  154. # Verify git can read the commit
  155. commit_hash = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
  156. commit_hash = commit_hash.strip().decode("ascii")
  157. self.assertEqual(len(commit_hash), 64) # SHA256 length
  158. # Verify git can show the commit
  159. commit_message = self._run_git(["log", "--format=%s", "-n", "1"], cwd=repo_path)
  160. self.assertEqual(commit_message.strip(), b"Test SHA256 commit from dulwich")
  161. # Verify git can list the tree
  162. tree_content = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
  163. self.assertIn(b"interop.txt", tree_content)
  164. def test_ref_updates_interop(self):
  165. """Test that ref updates work between dulwich and git."""
  166. # Create repo with git
  167. repo_path = tempfile.mkdtemp()
  168. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  169. self._run_git(["init", "--object-format=sha256", repo_path])
  170. # Create initial commit with git
  171. test_file = os.path.join(repo_path, "initial.txt")
  172. with open(test_file, "w") as f:
  173. f.write("Initial content")
  174. self._run_git(["add", "initial.txt"], cwd=repo_path)
  175. self._run_git(["commit", "-m", "Initial commit"], cwd=repo_path)
  176. initial_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
  177. initial_commit = initial_commit.strip()
  178. # Update ref with dulwich
  179. repo = Repo(repo_path)
  180. # Create new commit with dulwich
  181. blob = Blob.from_string(b"New content from dulwich")
  182. tree = Tree()
  183. tree.add(b"dulwich.txt", 0o100644, blob.get_id(SHA256))
  184. commit = Commit()
  185. commit.tree = tree.get_id(SHA256)
  186. commit.parents = [initial_commit]
  187. commit.author = commit.committer = b"Dulwich User <dulwich@example.com>"
  188. commit.commit_time = commit.author_time = 1234567891
  189. commit.commit_timezone = commit.author_timezone = 0
  190. commit.message = b"Commit from dulwich"
  191. # Add objects and update ref
  192. object_store = repo.object_store
  193. object_store.add_object(blob)
  194. object_store.add_object(tree)
  195. object_store.add_object(commit)
  196. new_commit_hash = commit.get_id(SHA256)
  197. repo.refs[b"refs/heads/master"] = new_commit_hash
  198. repo.close()
  199. # Verify git sees the update
  200. current_commit = self._run_git(["rev-parse", "HEAD"], cwd=repo_path)
  201. current_commit = current_commit.strip().decode("ascii")
  202. self.assertEqual(current_commit, new_commit_hash.decode("ascii"))
  203. # Verify git can access the new tree
  204. tree_listing = self._run_git(["ls-tree", "HEAD"], cwd=repo_path)
  205. self.assertIn(b"dulwich.txt", tree_listing)
  206. def test_clone_sha256_repo_git_to_dulwich(self):
  207. """Test cloning a git SHA256 repository with dulwich."""
  208. # Create source repo with git
  209. source_path = tempfile.mkdtemp()
  210. self.addCleanup(lambda: __import__("shutil").rmtree(source_path))
  211. self._run_git(["init", "--object-format=sha256", source_path])
  212. # Add content
  213. test_file = os.path.join(source_path, "clone_test.txt")
  214. with open(test_file, "w") as f:
  215. f.write("Content to be cloned")
  216. self._run_git(["add", "clone_test.txt"], cwd=source_path)
  217. self._run_git(["commit", "-m", "Initial commit"], cwd=source_path)
  218. # Clone with dulwich
  219. target_path = tempfile.mkdtemp()
  220. self.addCleanup(lambda: __import__("shutil").rmtree(target_path))
  221. target_repo = Repo.init(target_path, mkdir=False, object_format="sha256")
  222. # Copy objects (simplified clone)
  223. source_repo = Repo(source_path)
  224. # Copy all objects
  225. for obj_id in source_repo.object_store:
  226. obj = source_repo.object_store[obj_id]
  227. target_repo.object_store.add_object(obj)
  228. # Copy refs
  229. for ref_name in source_repo.refs.keys():
  230. ref_id = source_repo.refs[ref_name]
  231. target_repo.refs[ref_name] = ref_id
  232. # Set HEAD
  233. target_repo.refs.set_symbolic_ref(b"HEAD", b"refs/heads/master")
  234. source_repo.close()
  235. target_repo.close()
  236. # Verify with git
  237. output = self._run_git(["rev-parse", "--show-object-format"], cwd=target_path)
  238. self.assertEqual(output.strip(), b"sha256")
  239. # Verify content
  240. self._run_git(["checkout", "HEAD", "--", "."], cwd=target_path)
  241. cloned_file = os.path.join(target_path, "clone_test.txt")
  242. with open(cloned_file) as f:
  243. content = f.read()
  244. self.assertEqual(content, "Content to be cloned")
  245. def test_fsck_sha256_repo(self):
  246. """Test that git fsck works on dulwich-created SHA256 repos."""
  247. # Create repo with dulwich
  248. repo_path = tempfile.mkdtemp()
  249. self.addCleanup(lambda: __import__("shutil").rmtree(repo_path))
  250. repo = Repo.init(repo_path, mkdir=False, object_format="sha256")
  251. # Create a more complex object graph
  252. # Multiple blobs
  253. blobs = []
  254. for i in range(5):
  255. blob = Blob.from_string(f"Blob content {i}".encode())
  256. repo.object_store.add_object(blob)
  257. blobs.append(blob)
  258. # Multiple trees
  259. subtree = Tree()
  260. subtree.add(b"subfile1.txt", 0o100644, blobs[0].get_id(SHA256))
  261. subtree.add(b"subfile2.txt", 0o100644, blobs[1].get_id(SHA256))
  262. repo.object_store.add_object(subtree)
  263. main_tree = Tree()
  264. main_tree.add(b"file1.txt", 0o100644, blobs[2].get_id(SHA256))
  265. main_tree.add(b"file2.txt", 0o100644, blobs[3].get_id(SHA256))
  266. main_tree.add(b"subdir", 0o040000, subtree.get_id(SHA256))
  267. repo.object_store.add_object(main_tree)
  268. # Create commits
  269. commit1 = Commit()
  270. commit1.tree = main_tree.get_id(SHA256)
  271. commit1.author = commit1.committer = b"Test <test@example.com>"
  272. commit1.commit_time = commit1.author_time = 1234567890
  273. commit1.commit_timezone = commit1.author_timezone = 0
  274. commit1.message = b"First commit"
  275. repo.object_store.add_object(commit1)
  276. commit2 = Commit()
  277. commit2.tree = main_tree.get_id(SHA256)
  278. commit2.parents = [commit1.get_id(SHA256)]
  279. commit2.author = commit2.committer = b"Test <test@example.com>"
  280. commit2.commit_time = commit2.author_time = 1234567891
  281. commit2.commit_timezone = commit2.author_timezone = 0
  282. commit2.message = b"Second commit"
  283. repo.object_store.add_object(commit2)
  284. # Set refs
  285. repo.refs[b"refs/heads/master"] = commit2.get_id(SHA256)
  286. repo.refs[b"refs/heads/branch1"] = commit1.get_id(SHA256)
  287. repo.close()
  288. # Run git fsck
  289. fsck_output = self._run_git(["fsck", "--full"], cwd=repo_path)
  290. # fsck should not report any errors (empty output or success message)
  291. self.assertNotIn(b"error", fsck_output.lower())
  292. self.assertNotIn(b"missing", fsck_output.lower())
  293. self.assertNotIn(b"broken", fsck_output.lower())