2
0

test_gc.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. """Tests for dulwich.gc."""
  2. import shutil
  3. import tempfile
  4. from unittest import TestCase
  5. from dulwich.gc import (
  6. GCStats,
  7. find_reachable_objects,
  8. find_unreachable_objects,
  9. garbage_collect,
  10. prune_unreachable_objects,
  11. )
  12. from dulwich.objects import Blob, Commit, Tree
  13. from dulwich.repo import Repo
  14. class GCTestCase(TestCase):
  15. """Tests for garbage collection functionality."""
  16. def setUp(self):
  17. self.tmpdir = tempfile.mkdtemp()
  18. self.addCleanup(shutil.rmtree, self.tmpdir)
  19. self.repo = Repo.init(self.tmpdir)
  20. self.addCleanup(self.repo.close)
  21. def test_find_reachable_objects_empty_repo(self):
  22. """Test finding reachable objects in empty repository."""
  23. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  24. self.assertEqual(set(), reachable)
  25. def test_find_reachable_objects_with_commit(self):
  26. """Test finding reachable objects with a commit."""
  27. # Create a blob
  28. blob = Blob.from_string(b"test content")
  29. self.repo.object_store.add_object(blob)
  30. # Create a tree
  31. tree = Tree()
  32. tree.add(b"test.txt", 0o100644, blob.id)
  33. self.repo.object_store.add_object(tree)
  34. # Create a commit
  35. commit = Commit()
  36. commit.tree = tree.id
  37. commit.author = commit.committer = b"Test User <test@example.com>"
  38. commit.commit_time = commit.author_time = 1234567890
  39. commit.commit_timezone = commit.author_timezone = 0
  40. commit.message = b"Test commit"
  41. self.repo.object_store.add_object(commit)
  42. # Set HEAD to the commit
  43. self.repo.refs[b"HEAD"] = commit.id
  44. # Find reachable objects
  45. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  46. # All three objects should be reachable
  47. self.assertEqual({blob.id, tree.id, commit.id}, reachable)
  48. def test_find_unreachable_objects(self):
  49. """Test finding unreachable objects."""
  50. # Create a reachable blob
  51. reachable_blob = Blob.from_string(b"reachable content")
  52. self.repo.object_store.add_object(reachable_blob)
  53. # Create a tree
  54. tree = Tree()
  55. tree.add(b"reachable.txt", 0o100644, reachable_blob.id)
  56. self.repo.object_store.add_object(tree)
  57. # Create a commit
  58. commit = Commit()
  59. commit.tree = tree.id
  60. commit.author = commit.committer = b"Test User <test@example.com>"
  61. commit.commit_time = commit.author_time = 1234567890
  62. commit.commit_timezone = commit.author_timezone = 0
  63. commit.message = b"Test commit"
  64. self.repo.object_store.add_object(commit)
  65. # Set HEAD to the commit
  66. self.repo.refs[b"HEAD"] = commit.id
  67. # Create an unreachable blob
  68. unreachable_blob = Blob.from_string(b"unreachable content")
  69. self.repo.object_store.add_object(unreachable_blob)
  70. # Find unreachable objects
  71. unreachable = find_unreachable_objects(self.repo.object_store, self.repo.refs)
  72. # Only the unreachable blob should be found
  73. self.assertEqual({unreachable_blob.id}, unreachable)
  74. def test_prune_unreachable_objects(self):
  75. """Test pruning unreachable objects."""
  76. # Create an unreachable blob
  77. unreachable_blob = Blob.from_string(b"unreachable content")
  78. self.repo.object_store.add_object(unreachable_blob)
  79. # Verify it exists
  80. self.assertIn(unreachable_blob.id, self.repo.object_store)
  81. # Prune unreachable objects (grace_period=None means no grace period check)
  82. pruned, bytes_freed = prune_unreachable_objects(
  83. self.repo.object_store, self.repo.refs, grace_period=None
  84. )
  85. # Verify the blob was pruned
  86. self.assertEqual({unreachable_blob.id}, pruned)
  87. self.assertGreater(bytes_freed, 0)
  88. # Note: We can't test that the object is gone because delete()
  89. # only supports loose objects and may not be fully implemented
  90. def test_prune_unreachable_objects_dry_run(self):
  91. """Test pruning unreachable objects with dry run."""
  92. # Create an unreachable blob
  93. unreachable_blob = Blob.from_string(b"unreachable content")
  94. self.repo.object_store.add_object(unreachable_blob)
  95. # Prune with dry run (grace_period=None means no grace period check)
  96. pruned, bytes_freed = prune_unreachable_objects(
  97. self.repo.object_store, self.repo.refs, grace_period=None, dry_run=True
  98. )
  99. # Verify the blob would be pruned but still exists
  100. self.assertEqual({unreachable_blob.id}, pruned)
  101. self.assertGreater(bytes_freed, 0)
  102. self.assertIn(unreachable_blob.id, self.repo.object_store)
  103. def test_garbage_collect(self):
  104. """Test full garbage collection."""
  105. # Create some reachable objects
  106. blob = Blob.from_string(b"test content")
  107. self.repo.object_store.add_object(blob)
  108. tree = Tree()
  109. tree.add(b"test.txt", 0o100644, blob.id)
  110. self.repo.object_store.add_object(tree)
  111. commit = Commit()
  112. commit.tree = tree.id
  113. commit.author = commit.committer = b"Test User <test@example.com>"
  114. commit.commit_time = commit.author_time = 1234567890
  115. commit.commit_timezone = commit.author_timezone = 0
  116. commit.message = b"Test commit"
  117. self.repo.object_store.add_object(commit)
  118. self.repo.refs[b"HEAD"] = commit.id
  119. # Create an unreachable blob
  120. unreachable_blob = Blob.from_string(b"unreachable content")
  121. self.repo.object_store.add_object(unreachable_blob)
  122. # Run garbage collection (grace_period=None means no grace period check)
  123. stats = garbage_collect(self.repo, prune=True, grace_period=None)
  124. # Check results
  125. self.assertIsInstance(stats, GCStats)
  126. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  127. self.assertGreater(stats.bytes_freed, 0)
  128. def test_garbage_collect_no_prune(self):
  129. """Test garbage collection without pruning."""
  130. # Create an unreachable blob
  131. unreachable_blob = Blob.from_string(b"unreachable content")
  132. self.repo.object_store.add_object(unreachable_blob)
  133. # Run garbage collection without pruning
  134. stats = garbage_collect(self.repo, prune=False)
  135. # Check that nothing was pruned
  136. self.assertEqual(set(), stats.pruned_objects)
  137. self.assertEqual(0, stats.bytes_freed)
  138. self.assertIn(unreachable_blob.id, self.repo.object_store)
  139. def test_garbage_collect_dry_run(self):
  140. """Test garbage collection with dry run."""
  141. # Create an unreachable blob
  142. unreachable_blob = Blob.from_string(b"unreachable content")
  143. self.repo.object_store.add_object(unreachable_blob)
  144. # Run garbage collection with dry run (grace_period=None means no grace period check)
  145. stats = garbage_collect(self.repo, prune=True, grace_period=None, dry_run=True)
  146. # Check that object would be pruned but still exists
  147. # On Windows, the repository initialization might create additional unreachable objects
  148. # So we check that our blob is in the pruned objects, not that it's the only one
  149. self.assertIn(unreachable_blob.id, stats.pruned_objects)
  150. self.assertGreater(stats.bytes_freed, 0)
  151. self.assertIn(unreachable_blob.id, self.repo.object_store)
  152. def test_grace_period(self):
  153. """Test that grace period prevents pruning recent objects."""
  154. # Create an unreachable blob
  155. unreachable_blob = Blob.from_string(b"recent unreachable content")
  156. self.repo.object_store.add_object(unreachable_blob)
  157. # Ensure the object is loose
  158. self.assertTrue(self.repo.object_store.contains_loose(unreachable_blob.id))
  159. # Run garbage collection with a 1 hour grace period, but dry run to avoid packing
  160. # The object was just created, so it should not be pruned
  161. stats = garbage_collect(self.repo, prune=True, grace_period=3600, dry_run=True)
  162. # Check that the object was NOT pruned
  163. self.assertEqual(set(), stats.pruned_objects)
  164. self.assertEqual(0, stats.bytes_freed)
  165. self.assertIn(unreachable_blob.id, self.repo.object_store)
  166. # Now test with no grace period - it should be pruned
  167. stats = garbage_collect(self.repo, prune=True, grace_period=None)
  168. # Check that the object was pruned
  169. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  170. self.assertGreater(stats.bytes_freed, 0)
  171. def test_grace_period_old_object(self):
  172. """Test that old objects are pruned even with grace period."""
  173. import os
  174. import time
  175. # Create an unreachable blob
  176. old_blob = Blob.from_string(b"old unreachable content")
  177. self.repo.object_store.add_object(old_blob)
  178. # Ensure the object is loose
  179. self.assertTrue(self.repo.object_store.contains_loose(old_blob.id))
  180. # Manually set the mtime to 2 hours ago
  181. path = self.repo.object_store._get_shafile_path(old_blob.id)
  182. old_time = time.time() - 7200 # 2 hours ago
  183. os.utime(path, (old_time, old_time))
  184. # Run garbage collection with a 1 hour grace period
  185. # The object is 2 hours old, so it should be pruned
  186. stats = garbage_collect(self.repo, prune=True, grace_period=3600)
  187. # Check that the object was pruned
  188. self.assertEqual({old_blob.id}, stats.pruned_objects)
  189. self.assertGreater(stats.bytes_freed, 0)
  190. def test_packed_objects_pruned(self):
  191. """Test that packed objects are pruned via repack with exclusion."""
  192. # Create an unreachable blob
  193. unreachable_blob = Blob.from_string(b"unreachable packed content")
  194. self.repo.object_store.add_object(unreachable_blob)
  195. # Pack the objects to ensure the blob is in a pack
  196. self.repo.object_store.pack_loose_objects()
  197. # Ensure the object is NOT loose anymore
  198. self.assertFalse(self.repo.object_store.contains_loose(unreachable_blob.id))
  199. self.assertIn(unreachable_blob.id, self.repo.object_store)
  200. # Run garbage collection (grace_period=None means no grace period check)
  201. stats = garbage_collect(self.repo, prune=True, grace_period=None)
  202. # Check that the packed object was pruned
  203. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  204. self.assertGreater(stats.bytes_freed, 0)
  205. self.assertNotIn(unreachable_blob.id, self.repo.object_store)