test_gc.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. """Tests for dulwich.gc."""
  2. import os
  3. import shutil
  4. import tempfile
  5. import time
  6. from unittest import TestCase
  7. from unittest.mock import patch
  8. from dulwich.config import ConfigDict
  9. from dulwich.gc import (
  10. GCStats,
  11. find_reachable_objects,
  12. find_unreachable_objects,
  13. garbage_collect,
  14. maybe_auto_gc,
  15. prune_unreachable_objects,
  16. should_run_gc,
  17. )
  18. from dulwich.objects import Blob, Commit, Tag, Tree
  19. from dulwich.repo import MemoryRepo, Repo
  20. def no_op_progress(msg):
  21. """Progress callback that does nothing."""
  22. pass
  23. class GCTestCase(TestCase):
  24. """Tests for garbage collection functionality."""
  25. def setUp(self):
  26. self.tmpdir = tempfile.mkdtemp()
  27. self.addCleanup(shutil.rmtree, self.tmpdir)
  28. self.repo = Repo.init(self.tmpdir)
  29. self.addCleanup(self.repo.close)
  30. def test_find_reachable_objects_empty_repo(self):
  31. """Test finding reachable objects in empty repository."""
  32. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  33. self.assertEqual(set(), reachable)
  34. def test_find_reachable_objects_with_commit(self):
  35. """Test finding reachable objects with a commit."""
  36. # Create a blob
  37. blob = Blob.from_string(b"test content")
  38. self.repo.object_store.add_object(blob)
  39. # Create a tree
  40. tree = Tree()
  41. tree.add(b"test.txt", 0o100644, blob.id)
  42. self.repo.object_store.add_object(tree)
  43. # Create a commit
  44. commit = Commit()
  45. commit.tree = tree.id
  46. commit.author = commit.committer = b"Test User <test@example.com>"
  47. commit.commit_time = commit.author_time = 1234567890
  48. commit.commit_timezone = commit.author_timezone = 0
  49. commit.message = b"Test commit"
  50. self.repo.object_store.add_object(commit)
  51. # Set HEAD to the commit
  52. self.repo.refs[b"HEAD"] = commit.id
  53. # Find reachable objects
  54. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  55. # All three objects should be reachable
  56. self.assertEqual({blob.id, tree.id, commit.id}, reachable)
  57. def test_find_unreachable_objects(self):
  58. """Test finding unreachable objects."""
  59. # Create a reachable blob
  60. reachable_blob = Blob.from_string(b"reachable content")
  61. self.repo.object_store.add_object(reachable_blob)
  62. # Create a tree
  63. tree = Tree()
  64. tree.add(b"reachable.txt", 0o100644, reachable_blob.id)
  65. self.repo.object_store.add_object(tree)
  66. # Create a commit
  67. commit = Commit()
  68. commit.tree = tree.id
  69. commit.author = commit.committer = b"Test User <test@example.com>"
  70. commit.commit_time = commit.author_time = 1234567890
  71. commit.commit_timezone = commit.author_timezone = 0
  72. commit.message = b"Test commit"
  73. self.repo.object_store.add_object(commit)
  74. # Set HEAD to the commit
  75. self.repo.refs[b"HEAD"] = commit.id
  76. # Create an unreachable blob
  77. unreachable_blob = Blob.from_string(b"unreachable content")
  78. self.repo.object_store.add_object(unreachable_blob)
  79. # Find unreachable objects
  80. unreachable = find_unreachable_objects(self.repo.object_store, self.repo.refs)
  81. # Only the unreachable blob should be found
  82. self.assertEqual({unreachable_blob.id}, unreachable)
  83. def test_prune_unreachable_objects(self):
  84. """Test pruning unreachable objects."""
  85. # Create an unreachable blob
  86. unreachable_blob = Blob.from_string(b"unreachable content")
  87. self.repo.object_store.add_object(unreachable_blob)
  88. # Verify it exists
  89. self.assertIn(unreachable_blob.id, self.repo.object_store)
  90. # Prune unreachable objects (grace_period=None means no grace period check)
  91. pruned, bytes_freed = prune_unreachable_objects(
  92. self.repo.object_store, self.repo.refs, grace_period=None
  93. )
  94. # Verify the blob was pruned
  95. self.assertEqual({unreachable_blob.id}, pruned)
  96. self.assertGreater(bytes_freed, 0)
  97. # Note: We can't test that the object is gone because delete()
  98. # only supports loose objects and may not be fully implemented
  99. def test_prune_unreachable_objects_dry_run(self):
  100. """Test pruning unreachable objects with dry run."""
  101. # Create an unreachable blob
  102. unreachable_blob = Blob.from_string(b"unreachable content")
  103. self.repo.object_store.add_object(unreachable_blob)
  104. # Prune with dry run (grace_period=None means no grace period check)
  105. pruned, bytes_freed = prune_unreachable_objects(
  106. self.repo.object_store, self.repo.refs, grace_period=None, dry_run=True
  107. )
  108. # Verify the blob would be pruned but still exists
  109. self.assertEqual({unreachable_blob.id}, pruned)
  110. self.assertGreater(bytes_freed, 0)
  111. self.assertIn(unreachable_blob.id, self.repo.object_store)
  112. def test_garbage_collect(self):
  113. """Test full garbage collection."""
  114. # Create some reachable objects
  115. blob = Blob.from_string(b"test content")
  116. self.repo.object_store.add_object(blob)
  117. tree = Tree()
  118. tree.add(b"test.txt", 0o100644, blob.id)
  119. self.repo.object_store.add_object(tree)
  120. commit = Commit()
  121. commit.tree = tree.id
  122. commit.author = commit.committer = b"Test User <test@example.com>"
  123. commit.commit_time = commit.author_time = 1234567890
  124. commit.commit_timezone = commit.author_timezone = 0
  125. commit.message = b"Test commit"
  126. self.repo.object_store.add_object(commit)
  127. self.repo.refs[b"HEAD"] = commit.id
  128. # Create an unreachable blob
  129. unreachable_blob = Blob.from_string(b"unreachable content")
  130. self.repo.object_store.add_object(unreachable_blob)
  131. # Run garbage collection (grace_period=None means no grace period check)
  132. stats = garbage_collect(self.repo, prune=True, grace_period=None, progress=no_op_progress)
  133. # Check results
  134. self.assertIsInstance(stats, GCStats)
  135. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  136. self.assertGreater(stats.bytes_freed, 0)
  137. # Check that loose objects were counted
  138. self.assertGreaterEqual(
  139. stats.loose_objects_before, 4
  140. ) # At least blob, tree, commit, unreachable
  141. self.assertLess(
  142. stats.loose_objects_after, stats.loose_objects_before
  143. ) # Should have fewer after GC
  144. def test_garbage_collect_no_prune(self):
  145. """Test garbage collection without pruning."""
  146. # Create an unreachable blob
  147. unreachable_blob = Blob.from_string(b"unreachable content")
  148. self.repo.object_store.add_object(unreachable_blob)
  149. # Run garbage collection without pruning
  150. stats = garbage_collect(self.repo, prune=False, progress=no_op_progress)
  151. # Check that nothing was pruned
  152. self.assertEqual(set(), stats.pruned_objects)
  153. self.assertEqual(0, stats.bytes_freed)
  154. self.assertIn(unreachable_blob.id, self.repo.object_store)
  155. def test_garbage_collect_dry_run(self):
  156. """Test garbage collection with dry run."""
  157. # Create an unreachable blob
  158. unreachable_blob = Blob.from_string(b"unreachable content")
  159. self.repo.object_store.add_object(unreachable_blob)
  160. # Run garbage collection with dry run (grace_period=None means no grace period check)
  161. stats = garbage_collect(self.repo, prune=True, grace_period=None, dry_run=True, progress=no_op_progress)
  162. # Check that object would be pruned but still exists
  163. # On Windows, the repository initialization might create additional unreachable objects
  164. # So we check that our blob is in the pruned objects, not that it's the only one
  165. self.assertIn(unreachable_blob.id, stats.pruned_objects)
  166. self.assertGreater(stats.bytes_freed, 0)
  167. self.assertIn(unreachable_blob.id, self.repo.object_store)
  168. def test_grace_period(self):
  169. """Test that grace period prevents pruning recent objects."""
  170. # Create an unreachable blob
  171. unreachable_blob = Blob.from_string(b"recent unreachable content")
  172. self.repo.object_store.add_object(unreachable_blob)
  173. # Ensure the object is loose
  174. self.assertTrue(self.repo.object_store.contains_loose(unreachable_blob.id))
  175. # Run garbage collection with a 1 hour grace period, but dry run to avoid packing
  176. # The object was just created, so it should not be pruned
  177. stats = garbage_collect(self.repo, prune=True, grace_period=3600, dry_run=True, progress=no_op_progress)
  178. # Check that the object was NOT pruned
  179. self.assertEqual(set(), stats.pruned_objects)
  180. self.assertEqual(0, stats.bytes_freed)
  181. self.assertIn(unreachable_blob.id, self.repo.object_store)
  182. # Now test with no grace period - it should be pruned
  183. stats = garbage_collect(self.repo, prune=True, grace_period=None)
  184. # Check that the object was pruned
  185. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  186. self.assertGreater(stats.bytes_freed, 0)
  187. def test_grace_period_old_object(self):
  188. """Test that old objects are pruned even with grace period."""
  189. # Create an unreachable blob
  190. old_blob = Blob.from_string(b"old unreachable content")
  191. self.repo.object_store.add_object(old_blob)
  192. # Ensure the object is loose
  193. self.assertTrue(self.repo.object_store.contains_loose(old_blob.id))
  194. # Manually set the mtime to 2 hours ago
  195. path = self.repo.object_store._get_shafile_path(old_blob.id)
  196. old_time = time.time() - 7200 # 2 hours ago
  197. os.utime(path, (old_time, old_time))
  198. # Run garbage collection with a 1 hour grace period
  199. # The object is 2 hours old, so it should be pruned
  200. stats = garbage_collect(self.repo, prune=True, grace_period=3600, progress=no_op_progress)
  201. # Check that the object was pruned
  202. self.assertEqual({old_blob.id}, stats.pruned_objects)
  203. self.assertGreater(stats.bytes_freed, 0)
  204. def test_packed_objects_pruned(self):
  205. """Test that packed objects are pruned via repack with exclusion."""
  206. # Create an unreachable blob
  207. unreachable_blob = Blob.from_string(b"unreachable packed content")
  208. self.repo.object_store.add_object(unreachable_blob)
  209. # Pack the objects to ensure the blob is in a pack
  210. self.repo.object_store.pack_loose_objects(progress=no_op_progress)
  211. # Ensure the object is NOT loose anymore
  212. self.assertFalse(self.repo.object_store.contains_loose(unreachable_blob.id))
  213. self.assertIn(unreachable_blob.id, self.repo.object_store)
  214. # Run garbage collection (grace_period=None means no grace period check)
  215. stats = garbage_collect(self.repo, prune=True, grace_period=None, progress=no_op_progress)
  216. # Check that the packed object was pruned
  217. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  218. self.assertGreater(stats.bytes_freed, 0)
  219. self.assertNotIn(unreachable_blob.id, self.repo.object_store)
  220. def test_garbage_collect_with_progress(self):
  221. """Test garbage collection with progress callback."""
  222. # Create some objects
  223. blob = Blob.from_string(b"test content")
  224. self.repo.object_store.add_object(blob)
  225. tree = Tree()
  226. tree.add(b"test.txt", 0o100644, blob.id)
  227. self.repo.object_store.add_object(tree)
  228. commit = Commit()
  229. commit.tree = tree.id
  230. commit.author = commit.committer = b"Test User <test@example.com>"
  231. commit.commit_time = commit.author_time = 1234567890
  232. commit.commit_timezone = commit.author_timezone = 0
  233. commit.message = b"Test commit"
  234. self.repo.object_store.add_object(commit)
  235. self.repo.refs[b"HEAD"] = commit.id
  236. # Create an unreachable blob
  237. unreachable_blob = Blob.from_string(b"unreachable content")
  238. self.repo.object_store.add_object(unreachable_blob)
  239. # Track progress messages
  240. progress_messages = []
  241. def progress_callback(msg):
  242. progress_messages.append(msg)
  243. # Run garbage collection with progress
  244. garbage_collect(
  245. self.repo, prune=True, grace_period=None, progress=progress_callback
  246. )
  247. # Check that progress was reported
  248. self.assertGreater(len(progress_messages), 0)
  249. self.assertIn("Finding unreachable objects", progress_messages)
  250. self.assertIn("Packing references", progress_messages)
  251. self.assertIn("Repacking repository", progress_messages)
  252. self.assertIn("Pruning temporary files", progress_messages)
  253. def test_find_reachable_objects_with_broken_ref(self):
  254. """Test finding reachable objects with a broken ref."""
  255. # Create a valid object
  256. blob = Blob.from_string(b"test content")
  257. self.repo.object_store.add_object(blob)
  258. # Create a commit pointing to the blob
  259. tree = Tree()
  260. tree.add(b"test.txt", 0o100644, blob.id)
  261. self.repo.object_store.add_object(tree)
  262. commit = Commit()
  263. commit.tree = tree.id
  264. commit.author = commit.committer = b"Test User <test@example.com>"
  265. commit.commit_time = commit.author_time = 1234567890
  266. commit.commit_timezone = commit.author_timezone = 0
  267. commit.message = b"Test commit"
  268. self.repo.object_store.add_object(commit)
  269. self.repo.refs[b"HEAD"] = commit.id
  270. # Create a broken ref pointing to non-existent object
  271. broken_sha = b"0" * 40
  272. self.repo.refs[b"refs/heads/broken"] = broken_sha
  273. # Track progress to see warning
  274. progress_messages = []
  275. def progress_callback(msg):
  276. progress_messages.append(msg)
  277. # Find reachable objects
  278. reachable = find_reachable_objects(
  279. self.repo.object_store, self.repo.refs, progress=progress_callback
  280. )
  281. # Valid objects should still be found, plus the broken ref SHA
  282. # (which will be included in reachable but won't be walkable)
  283. self.assertEqual({blob.id, tree.id, commit.id, broken_sha}, reachable)
  284. # Check that we got a message about checking the broken object
  285. # The warning happens when trying to walk from the broken SHA
  286. check_messages = [msg for msg in progress_messages if "Checking object" in msg]
  287. self.assertTrue(
  288. any(broken_sha.decode("ascii") in msg for msg in check_messages)
  289. )
  290. def test_find_reachable_objects_with_tag(self):
  291. """Test finding reachable objects through tags."""
  292. # Create a blob
  293. blob = Blob.from_string(b"tagged content")
  294. self.repo.object_store.add_object(blob)
  295. # Create a tree
  296. tree = Tree()
  297. tree.add(b"tagged.txt", 0o100644, blob.id)
  298. self.repo.object_store.add_object(tree)
  299. # Create a commit
  300. commit = Commit()
  301. commit.tree = tree.id
  302. commit.author = commit.committer = b"Test User <test@example.com>"
  303. commit.commit_time = commit.author_time = 1234567890
  304. commit.commit_timezone = commit.author_timezone = 0
  305. commit.message = b"Tagged commit"
  306. self.repo.object_store.add_object(commit)
  307. # Create a tag pointing to the commit
  308. tag = Tag()
  309. tag.name = b"v1.0"
  310. tag.message = b"Version 1.0"
  311. tag.tag_time = 1234567890
  312. tag.tag_timezone = 0
  313. tag.object = (Commit, commit.id)
  314. tag.tagger = b"Test Tagger <tagger@example.com>"
  315. self.repo.object_store.add_object(tag)
  316. # Set a ref to the tag
  317. self.repo.refs[b"refs/tags/v1.0"] = tag.id
  318. # Find reachable objects
  319. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  320. # All objects should be reachable through the tag
  321. self.assertEqual({blob.id, tree.id, commit.id, tag.id}, reachable)
  322. def test_prune_with_missing_mtime(self):
  323. """Test pruning when get_object_mtime raises KeyError."""
  324. # Create an unreachable blob
  325. unreachable_blob = Blob.from_string(b"unreachable content")
  326. self.repo.object_store.add_object(unreachable_blob)
  327. # Mock get_object_mtime to raise KeyError
  328. with patch.object(
  329. self.repo.object_store, "get_object_mtime", side_effect=KeyError
  330. ):
  331. # Run garbage collection with grace period
  332. stats = garbage_collect(self.repo, prune=True, grace_period=3600, progress=no_op_progress)
  333. # Object should be kept because mtime couldn't be determined
  334. self.assertEqual(set(), stats.pruned_objects)
  335. self.assertEqual(0, stats.bytes_freed)
  336. class AutoGCTestCase(TestCase):
  337. """Tests for auto GC functionality."""
  338. def test_should_run_gc_disabled(self):
  339. """Test that auto GC doesn't run when gc.auto is 0."""
  340. r = MemoryRepo()
  341. config = ConfigDict()
  342. config.set(b"gc", b"auto", b"0")
  343. self.assertFalse(should_run_gc(r, config))
  344. def test_should_run_gc_disabled_by_env_var(self):
  345. """Test that auto GC doesn't run when GIT_AUTO_GC environment variable is 0."""
  346. r = MemoryRepo()
  347. config = ConfigDict()
  348. config.set(b"gc", b"auto", b"10") # Should normally run
  349. with patch.dict(os.environ, {"GIT_AUTO_GC": "0"}):
  350. self.assertFalse(should_run_gc(r, config))
  351. def test_should_run_gc_disabled_programmatically(self):
  352. """Test that auto GC doesn't run when disabled via _autogc_disabled attribute."""
  353. r = MemoryRepo()
  354. config = ConfigDict()
  355. config.set(b"gc", b"auto", b"10") # Should normally run
  356. # Disable autogc programmatically
  357. r._autogc_disabled = True
  358. self.assertFalse(should_run_gc(r, config))
  359. # Re-enable autogc
  360. r._autogc_disabled = False
  361. # Still false because MemoryRepo doesn't support counting loose objects
  362. self.assertFalse(should_run_gc(r, config))
  363. def test_should_run_gc_default_values(self):
  364. """Test auto GC with default configuration values."""
  365. r = MemoryRepo()
  366. config = ConfigDict()
  367. # Should not run with empty repo
  368. self.assertFalse(should_run_gc(r, config))
  369. def test_should_run_gc_with_loose_objects(self):
  370. """Test that auto GC triggers based on loose object count."""
  371. with tempfile.TemporaryDirectory() as tmpdir:
  372. r = Repo.init(tmpdir)
  373. config = ConfigDict()
  374. config.set(b"gc", b"auto", b"10") # Low threshold for testing
  375. # Add some loose objects
  376. for i in range(15):
  377. blob = Blob()
  378. blob.data = f"test blob {i}".encode()
  379. r.object_store.add_object(blob)
  380. self.assertTrue(should_run_gc(r, config))
  381. def test_should_run_gc_with_pack_limit(self):
  382. """Test that auto GC triggers based on pack file count."""
  383. with tempfile.TemporaryDirectory() as tmpdir:
  384. r = Repo.init(tmpdir)
  385. config = ConfigDict()
  386. config.set(b"gc", b"autoPackLimit", b"2") # Low threshold for testing
  387. # Create some pack files by repacking
  388. for i in range(3):
  389. blob = Blob()
  390. blob.data = f"test blob {i}".encode()
  391. r.object_store.add_object(blob)
  392. r.object_store.pack_loose_objects(progress=no_op_progress)
  393. # Force re-enumeration of packs
  394. r.object_store._update_pack_cache()
  395. self.assertTrue(should_run_gc(r, config))
  396. def test_count_loose_objects(self):
  397. """Test counting loose objects."""
  398. with tempfile.TemporaryDirectory() as tmpdir:
  399. r = Repo.init(tmpdir)
  400. # Initially should have no loose objects
  401. count = r.object_store.count_loose_objects()
  402. self.assertEqual(0, count)
  403. # Add some loose objects
  404. for i in range(5):
  405. blob = Blob()
  406. blob.data = f"test blob {i}".encode()
  407. r.object_store.add_object(blob)
  408. count = r.object_store.count_loose_objects()
  409. self.assertEqual(5, count)
  410. def test_count_pack_files(self):
  411. """Test counting pack files."""
  412. with tempfile.TemporaryDirectory() as tmpdir:
  413. r = Repo.init(tmpdir)
  414. # Initially should have no packs
  415. count = r.object_store.count_pack_files()
  416. self.assertEqual(0, count)
  417. # Create a pack
  418. blob = Blob()
  419. blob.data = b"test blob"
  420. r.object_store.add_object(blob)
  421. r.object_store.pack_loose_objects(progress=no_op_progress)
  422. # Force re-enumeration of packs
  423. r.object_store._update_pack_cache()
  424. count = r.object_store.count_pack_files()
  425. self.assertEqual(1, count)
  426. def test_maybe_auto_gc_runs_when_needed(self):
  427. """Test that auto GC runs when thresholds are exceeded."""
  428. with tempfile.TemporaryDirectory() as tmpdir:
  429. r = Repo.init(tmpdir)
  430. config = ConfigDict()
  431. config.set(b"gc", b"auto", b"5") # Low threshold for testing
  432. # Add enough loose objects to trigger GC
  433. for i in range(10):
  434. blob = Blob()
  435. blob.data = f"test blob {i}".encode()
  436. r.object_store.add_object(blob)
  437. with patch("dulwich.gc.garbage_collect") as mock_gc:
  438. result = maybe_auto_gc(r, config, progress=no_op_progress)
  439. self.assertTrue(result)
  440. mock_gc.assert_called_once_with(r, auto=True, progress=no_op_progress)
  441. def test_maybe_auto_gc_skips_when_not_needed(self):
  442. """Test that auto GC doesn't run when thresholds are not exceeded."""
  443. r = MemoryRepo()
  444. config = ConfigDict()
  445. with patch("dulwich.gc.garbage_collect") as mock_gc:
  446. result = maybe_auto_gc(r, config, progress=no_op_progress)
  447. self.assertFalse(result)
  448. mock_gc.assert_not_called()
  449. def test_maybe_auto_gc_with_gc_log(self):
  450. """Test that auto GC is skipped when gc.log exists and is recent."""
  451. with tempfile.TemporaryDirectory() as tmpdir:
  452. r = Repo.init(tmpdir)
  453. config = ConfigDict()
  454. config.set(b"gc", b"auto", b"1") # Low threshold
  455. # Create gc.log file
  456. gc_log_path = os.path.join(r.controldir(), "gc.log")
  457. with open(gc_log_path, "wb") as f:
  458. f.write(b"Previous GC failed\n")
  459. # Add objects to trigger GC
  460. blob = Blob()
  461. blob.data = b"test"
  462. r.object_store.add_object(blob)
  463. # Capture log messages
  464. import logging
  465. with self.assertLogs(level=logging.INFO) as cm:
  466. result = maybe_auto_gc(r, config, progress=no_op_progress)
  467. self.assertFalse(result)
  468. # Verify gc.log contents were logged
  469. self.assertTrue(any("Previous GC failed" in msg for msg in cm.output))
  470. def test_maybe_auto_gc_with_expired_gc_log(self):
  471. """Test that auto GC runs when gc.log exists but is expired."""
  472. with tempfile.TemporaryDirectory() as tmpdir:
  473. r = Repo.init(tmpdir)
  474. config = ConfigDict()
  475. config.set(b"gc", b"auto", b"1") # Low threshold
  476. config.set(b"gc", b"logExpiry", b"0.days") # Expire immediately
  477. # Create gc.log file
  478. gc_log_path = os.path.join(r.controldir(), "gc.log")
  479. with open(gc_log_path, "wb") as f:
  480. f.write(b"Previous GC failed\n")
  481. # Make the file old
  482. old_time = time.time() - 86400 # 1 day ago
  483. os.utime(gc_log_path, (old_time, old_time))
  484. # Add objects to trigger GC
  485. blob = Blob()
  486. blob.data = b"test"
  487. r.object_store.add_object(blob)
  488. with patch("dulwich.gc.garbage_collect") as mock_gc:
  489. result = maybe_auto_gc(r, config, progress=no_op_progress)
  490. self.assertTrue(result)
  491. mock_gc.assert_called_once_with(r, auto=True, progress=no_op_progress)
  492. # gc.log should be removed after successful GC
  493. self.assertFalse(os.path.exists(gc_log_path))
  494. def test_maybe_auto_gc_handles_gc_failure(self):
  495. """Test that auto GC handles failures gracefully."""
  496. with tempfile.TemporaryDirectory() as tmpdir:
  497. r = Repo.init(tmpdir)
  498. config = ConfigDict()
  499. config.set(b"gc", b"auto", b"1") # Low threshold
  500. # Add objects to trigger GC
  501. blob = Blob()
  502. blob.data = b"test"
  503. r.object_store.add_object(blob)
  504. with patch(
  505. "dulwich.gc.garbage_collect", side_effect=OSError("GC failed")
  506. ) as mock_gc:
  507. result = maybe_auto_gc(r, config, progress=no_op_progress)
  508. self.assertFalse(result)
  509. mock_gc.assert_called_once_with(r, auto=True, progress=no_op_progress)
  510. # Check that error was written to gc.log
  511. gc_log_path = os.path.join(r.controldir(), "gc.log")
  512. self.assertTrue(os.path.exists(gc_log_path))
  513. with open(gc_log_path, "rb") as f:
  514. content = f.read()
  515. self.assertIn(b"Auto GC failed: GC failed", content)
  516. def test_gc_log_expiry_singular_day(self):
  517. """Test that gc.logExpiry supports singular '.day' format."""
  518. with tempfile.TemporaryDirectory() as tmpdir:
  519. r = Repo.init(tmpdir)
  520. config = ConfigDict()
  521. config.set(b"gc", b"auto", b"1") # Low threshold
  522. config.set(b"gc", b"logExpiry", b"1.day") # Singular form
  523. # Create gc.log file
  524. gc_log_path = os.path.join(r.controldir(), "gc.log")
  525. with open(gc_log_path, "wb") as f:
  526. f.write(b"Previous GC failed\n")
  527. # Make the file 2 days old (older than 1 day expiry)
  528. old_time = time.time() - (2 * 86400)
  529. os.utime(gc_log_path, (old_time, old_time))
  530. # Add objects to trigger GC
  531. blob = Blob()
  532. blob.data = b"test"
  533. r.object_store.add_object(blob)
  534. with patch("dulwich.gc.garbage_collect") as mock_gc:
  535. result = maybe_auto_gc(r, config, progress=no_op_progress)
  536. self.assertTrue(result)
  537. mock_gc.assert_called_once_with(r, auto=True, progress=no_op_progress)
  538. def test_gc_log_expiry_invalid_format(self):
  539. """Test that invalid gc.logExpiry format defaults to 1 day."""
  540. with tempfile.TemporaryDirectory() as tmpdir:
  541. r = Repo.init(tmpdir)
  542. config = ConfigDict()
  543. config.set(b"gc", b"auto", b"1") # Low threshold
  544. config.set(b"gc", b"logExpiry", b"invalid") # Invalid format
  545. # Create gc.log file
  546. gc_log_path = os.path.join(r.controldir(), "gc.log")
  547. with open(gc_log_path, "wb") as f:
  548. f.write(b"Previous GC failed\n")
  549. # Make the file recent (within default 1 day)
  550. recent_time = time.time() - 3600 # 1 hour ago
  551. os.utime(gc_log_path, (recent_time, recent_time))
  552. # Add objects to trigger GC
  553. blob = Blob()
  554. blob.data = b"test"
  555. r.object_store.add_object(blob)
  556. # Capture log messages
  557. import logging
  558. with self.assertLogs(level=logging.INFO) as cm:
  559. result = maybe_auto_gc(r, config, progress=no_op_progress)
  560. # Should not run GC because gc.log is recent (within default 1 day)
  561. self.assertFalse(result)
  562. # Check that gc.log content was logged
  563. self.assertTrue(any("gc.log content:" in msg for msg in cm.output))
  564. def test_maybe_auto_gc_non_disk_repo(self):
  565. """Test auto GC on non-disk repository (MemoryRepo)."""
  566. r = MemoryRepo()
  567. config = ConfigDict()
  568. config.set(b"gc", b"auto", b"1") # Would trigger if it were disk-based
  569. # Add objects that would trigger GC in a disk repo
  570. for i in range(10):
  571. blob = Blob()
  572. blob.data = f"test {i}".encode()
  573. r.object_store.add_object(blob)
  574. # For non-disk repos, should_run_gc returns False
  575. # because it can't count loose objects
  576. result = maybe_auto_gc(r, config, progress=no_op_progress)
  577. self.assertFalse(result)
  578. def test_gc_removes_existing_gc_log_on_success(self):
  579. """Test that successful GC removes pre-existing gc.log file."""
  580. with tempfile.TemporaryDirectory() as tmpdir:
  581. r = Repo.init(tmpdir)
  582. config = ConfigDict()
  583. config.set(b"gc", b"auto", b"1") # Low threshold
  584. # Create gc.log file from previous failure
  585. gc_log_path = os.path.join(r.controldir(), "gc.log")
  586. with open(gc_log_path, "wb") as f:
  587. f.write(b"Previous GC failed\n")
  588. # Make it old enough to be expired
  589. old_time = time.time() - (2 * 86400) # 2 days ago
  590. os.utime(gc_log_path, (old_time, old_time))
  591. # Add objects to trigger GC
  592. blob = Blob()
  593. blob.data = b"test"
  594. r.object_store.add_object(blob)
  595. # Run auto GC
  596. result = maybe_auto_gc(r, config, progress=no_op_progress)
  597. self.assertTrue(result)
  598. # gc.log should be removed after successful GC
  599. self.assertFalse(os.path.exists(gc_log_path))