test_gc.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. """Tests for dulwich.gc."""
  2. import shutil
  3. import tempfile
  4. from unittest import TestCase
  5. from dulwich.gc import (
  6. GCStats,
  7. find_reachable_objects,
  8. find_unreachable_objects,
  9. garbage_collect,
  10. prune_unreachable_objects,
  11. )
  12. from dulwich.objects import Blob, Commit, Tree
  13. from dulwich.repo import Repo
  14. class GCTestCase(TestCase):
  15. """Tests for garbage collection functionality."""
  16. def setUp(self):
  17. self.tmpdir = tempfile.mkdtemp()
  18. self.repo = Repo.init(self.tmpdir)
  19. def tearDown(self):
  20. shutil.rmtree(self.tmpdir)
  21. def test_find_reachable_objects_empty_repo(self):
  22. """Test finding reachable objects in empty repository."""
  23. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  24. self.assertEqual(set(), reachable)
  25. def test_find_reachable_objects_with_commit(self):
  26. """Test finding reachable objects with a commit."""
  27. # Create a blob
  28. blob = Blob.from_string(b"test content")
  29. self.repo.object_store.add_object(blob)
  30. # Create a tree
  31. tree = Tree()
  32. tree.add(b"test.txt", 0o100644, blob.id)
  33. self.repo.object_store.add_object(tree)
  34. # Create a commit
  35. commit = Commit()
  36. commit.tree = tree.id
  37. commit.author = commit.committer = b"Test User <test@example.com>"
  38. commit.commit_time = commit.author_time = 1234567890
  39. commit.commit_timezone = commit.author_timezone = 0
  40. commit.message = b"Test commit"
  41. self.repo.object_store.add_object(commit)
  42. # Set HEAD to the commit
  43. self.repo.refs[b"HEAD"] = commit.id
  44. # Find reachable objects
  45. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  46. # All three objects should be reachable
  47. self.assertEqual({blob.id, tree.id, commit.id}, reachable)
  48. def test_find_unreachable_objects(self):
  49. """Test finding unreachable objects."""
  50. # Create a reachable blob
  51. reachable_blob = Blob.from_string(b"reachable content")
  52. self.repo.object_store.add_object(reachable_blob)
  53. # Create a tree
  54. tree = Tree()
  55. tree.add(b"reachable.txt", 0o100644, reachable_blob.id)
  56. self.repo.object_store.add_object(tree)
  57. # Create a commit
  58. commit = Commit()
  59. commit.tree = tree.id
  60. commit.author = commit.committer = b"Test User <test@example.com>"
  61. commit.commit_time = commit.author_time = 1234567890
  62. commit.commit_timezone = commit.author_timezone = 0
  63. commit.message = b"Test commit"
  64. self.repo.object_store.add_object(commit)
  65. # Set HEAD to the commit
  66. self.repo.refs[b"HEAD"] = commit.id
  67. # Create an unreachable blob
  68. unreachable_blob = Blob.from_string(b"unreachable content")
  69. self.repo.object_store.add_object(unreachable_blob)
  70. # Find unreachable objects
  71. unreachable = find_unreachable_objects(self.repo.object_store, self.repo.refs)
  72. # Only the unreachable blob should be found
  73. self.assertEqual({unreachable_blob.id}, unreachable)
  74. def test_prune_unreachable_objects(self):
  75. """Test pruning unreachable objects."""
  76. # Create an unreachable blob
  77. unreachable_blob = Blob.from_string(b"unreachable content")
  78. self.repo.object_store.add_object(unreachable_blob)
  79. # Verify it exists
  80. self.assertIn(unreachable_blob.id, self.repo.object_store)
  81. # Prune unreachable objects
  82. pruned, bytes_freed = prune_unreachable_objects(
  83. self.repo.object_store, self.repo.refs, grace_period=0
  84. )
  85. # Verify the blob was pruned
  86. self.assertEqual({unreachable_blob.id}, pruned)
  87. self.assertGreater(bytes_freed, 0)
  88. # Note: We can't test that the object is gone because delete()
  89. # only supports loose objects and may not be fully implemented
  90. def test_prune_unreachable_objects_dry_run(self):
  91. """Test pruning unreachable objects with dry run."""
  92. # Create an unreachable blob
  93. unreachable_blob = Blob.from_string(b"unreachable content")
  94. self.repo.object_store.add_object(unreachable_blob)
  95. # Prune with dry run
  96. pruned, bytes_freed = prune_unreachable_objects(
  97. self.repo.object_store, self.repo.refs, grace_period=0, dry_run=True
  98. )
  99. # Verify the blob would be pruned but still exists
  100. self.assertEqual({unreachable_blob.id}, pruned)
  101. self.assertGreater(bytes_freed, 0)
  102. self.assertIn(unreachable_blob.id, self.repo.object_store)
  103. def test_garbage_collect(self):
  104. """Test full garbage collection."""
  105. # Create some reachable objects
  106. blob = Blob.from_string(b"test content")
  107. self.repo.object_store.add_object(blob)
  108. tree = Tree()
  109. tree.add(b"test.txt", 0o100644, blob.id)
  110. self.repo.object_store.add_object(tree)
  111. commit = Commit()
  112. commit.tree = tree.id
  113. commit.author = commit.committer = b"Test User <test@example.com>"
  114. commit.commit_time = commit.author_time = 1234567890
  115. commit.commit_timezone = commit.author_timezone = 0
  116. commit.message = b"Test commit"
  117. self.repo.object_store.add_object(commit)
  118. self.repo.refs[b"HEAD"] = commit.id
  119. # Create an unreachable blob
  120. unreachable_blob = Blob.from_string(b"unreachable content")
  121. self.repo.object_store.add_object(unreachable_blob)
  122. # Run garbage collection
  123. stats = garbage_collect(self.repo, prune=True, grace_period=0)
  124. # Check results
  125. self.assertIsInstance(stats, GCStats)
  126. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  127. self.assertGreater(stats.bytes_freed, 0)
  128. def test_garbage_collect_no_prune(self):
  129. """Test garbage collection without pruning."""
  130. # Create an unreachable blob
  131. unreachable_blob = Blob.from_string(b"unreachable content")
  132. self.repo.object_store.add_object(unreachable_blob)
  133. # Run garbage collection without pruning
  134. stats = garbage_collect(self.repo, prune=False)
  135. # Check that nothing was pruned
  136. self.assertEqual(set(), stats.pruned_objects)
  137. self.assertEqual(0, stats.bytes_freed)
  138. self.assertIn(unreachable_blob.id, self.repo.object_store)
  139. def test_garbage_collect_dry_run(self):
  140. """Test garbage collection with dry run."""
  141. # Create an unreachable blob
  142. unreachable_blob = Blob.from_string(b"unreachable content")
  143. self.repo.object_store.add_object(unreachable_blob)
  144. # Run garbage collection with dry run
  145. stats = garbage_collect(self.repo, prune=True, grace_period=0, dry_run=True)
  146. # Check that object would be pruned but still exists
  147. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  148. self.assertGreater(stats.bytes_freed, 0)
  149. self.assertIn(unreachable_blob.id, self.repo.object_store)
  150. def test_grace_period(self):
  151. """Test that grace period prevents pruning recent objects."""
  152. # Create an unreachable blob
  153. unreachable_blob = Blob.from_string(b"recent unreachable content")
  154. self.repo.object_store.add_object(unreachable_blob)
  155. # Ensure the object is loose
  156. self.assertTrue(self.repo.object_store.contains_loose(unreachable_blob.id))
  157. # Run garbage collection with a 1 hour grace period, but dry run to avoid packing
  158. # The object was just created, so it should not be pruned
  159. stats = garbage_collect(self.repo, prune=True, grace_period=3600, dry_run=True)
  160. # Check that the object was NOT pruned
  161. self.assertEqual(set(), stats.pruned_objects)
  162. self.assertEqual(0, stats.bytes_freed)
  163. self.assertIn(unreachable_blob.id, self.repo.object_store)
  164. # Now test with zero grace period - it should be pruned
  165. stats = garbage_collect(self.repo, prune=True, grace_period=0)
  166. # Check that the object was pruned
  167. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  168. self.assertGreater(stats.bytes_freed, 0)
  169. def test_grace_period_old_object(self):
  170. """Test that old objects are pruned even with grace period."""
  171. import os
  172. import time
  173. # Create an unreachable blob
  174. old_blob = Blob.from_string(b"old unreachable content")
  175. self.repo.object_store.add_object(old_blob)
  176. # Ensure the object is loose
  177. self.assertTrue(self.repo.object_store.contains_loose(old_blob.id))
  178. # Manually set the mtime to 2 hours ago
  179. path = self.repo.object_store._get_shafile_path(old_blob.id)
  180. old_time = time.time() - 7200 # 2 hours ago
  181. os.utime(path, (old_time, old_time))
  182. # Run garbage collection with a 1 hour grace period
  183. # The object is 2 hours old, so it should be pruned
  184. stats = garbage_collect(self.repo, prune=True, grace_period=3600)
  185. # Check that the object was pruned
  186. self.assertEqual({old_blob.id}, stats.pruned_objects)
  187. self.assertGreater(stats.bytes_freed, 0)
  188. def test_packed_objects_pruned(self):
  189. """Test that packed objects are pruned via repack with exclusion."""
  190. # Create an unreachable blob
  191. unreachable_blob = Blob.from_string(b"unreachable packed content")
  192. self.repo.object_store.add_object(unreachable_blob)
  193. # Pack the objects to ensure the blob is in a pack
  194. self.repo.object_store.pack_loose_objects()
  195. # Ensure the object is NOT loose anymore
  196. self.assertFalse(self.repo.object_store.contains_loose(unreachable_blob.id))
  197. self.assertIn(unreachable_blob.id, self.repo.object_store)
  198. # Run garbage collection
  199. stats = garbage_collect(self.repo, prune=True, grace_period=0)
  200. # Check that the packed object was pruned
  201. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  202. self.assertGreater(stats.bytes_freed, 0)
  203. self.assertNotIn(unreachable_blob.id, self.repo.object_store)