test_gc.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782
  1. """Tests for dulwich.gc."""
  2. import os
  3. import shutil
  4. import tempfile
  5. import time
  6. from unittest import TestCase
  7. from unittest.mock import patch
  8. from dulwich.config import ConfigDict
  9. from dulwich.gc import (
  10. GCStats,
  11. find_reachable_objects,
  12. find_unreachable_objects,
  13. garbage_collect,
  14. maybe_auto_gc,
  15. prune_unreachable_objects,
  16. should_run_gc,
  17. )
  18. from dulwich.objects import ZERO_SHA, Blob, Commit, Tag, Tree
  19. from dulwich.repo import MemoryRepo, Repo
  20. def no_op_progress(msg):
  21. """Progress callback that does nothing."""
  22. class GCTestCase(TestCase):
  23. """Tests for garbage collection functionality."""
  24. def setUp(self):
  25. self.tmpdir = tempfile.mkdtemp()
  26. self.addCleanup(shutil.rmtree, self.tmpdir)
  27. self.repo = Repo.init(self.tmpdir)
  28. self.addCleanup(self.repo.close)
  29. def test_find_reachable_objects_empty_repo(self):
  30. """Test finding reachable objects in empty repository."""
  31. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  32. self.assertEqual(set(), reachable)
  33. def test_find_reachable_objects_with_commit(self):
  34. """Test finding reachable objects with a commit."""
  35. # Create a blob
  36. blob = Blob.from_string(b"test content")
  37. self.repo.object_store.add_object(blob)
  38. # Create a tree
  39. tree = Tree()
  40. tree.add(b"test.txt", 0o100644, blob.id)
  41. self.repo.object_store.add_object(tree)
  42. # Create a commit
  43. commit = Commit()
  44. commit.tree = tree.id
  45. commit.author = commit.committer = b"Test User <test@example.com>"
  46. commit.commit_time = commit.author_time = 1234567890
  47. commit.commit_timezone = commit.author_timezone = 0
  48. commit.message = b"Test commit"
  49. self.repo.object_store.add_object(commit)
  50. # Set HEAD to the commit
  51. self.repo.refs[b"HEAD"] = commit.id
  52. # Find reachable objects
  53. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  54. # All three objects should be reachable
  55. self.assertEqual({blob.id, tree.id, commit.id}, reachable)
  56. def test_find_unreachable_objects(self):
  57. """Test finding unreachable objects."""
  58. # Create a reachable blob
  59. reachable_blob = Blob.from_string(b"reachable content")
  60. self.repo.object_store.add_object(reachable_blob)
  61. # Create a tree
  62. tree = Tree()
  63. tree.add(b"reachable.txt", 0o100644, reachable_blob.id)
  64. self.repo.object_store.add_object(tree)
  65. # Create a commit
  66. commit = Commit()
  67. commit.tree = tree.id
  68. commit.author = commit.committer = b"Test User <test@example.com>"
  69. commit.commit_time = commit.author_time = 1234567890
  70. commit.commit_timezone = commit.author_timezone = 0
  71. commit.message = b"Test commit"
  72. self.repo.object_store.add_object(commit)
  73. # Set HEAD to the commit
  74. self.repo.refs[b"HEAD"] = commit.id
  75. # Create an unreachable blob
  76. unreachable_blob = Blob.from_string(b"unreachable content")
  77. self.repo.object_store.add_object(unreachable_blob)
  78. # Find unreachable objects
  79. unreachable = find_unreachable_objects(self.repo.object_store, self.repo.refs)
  80. # Only the unreachable blob should be found
  81. self.assertEqual({unreachable_blob.id}, unreachable)
  82. def test_prune_unreachable_objects(self):
  83. """Test pruning unreachable objects."""
  84. # Create an unreachable blob
  85. unreachable_blob = Blob.from_string(b"unreachable content")
  86. self.repo.object_store.add_object(unreachable_blob)
  87. # Verify it exists
  88. self.assertIn(unreachable_blob.id, self.repo.object_store)
  89. # Prune unreachable objects (grace_period=None means no grace period check)
  90. pruned, bytes_freed = prune_unreachable_objects(
  91. self.repo.object_store, self.repo.refs, grace_period=None
  92. )
  93. # Verify the blob was pruned
  94. self.assertEqual({unreachable_blob.id}, pruned)
  95. self.assertGreater(bytes_freed, 0)
  96. # Note: We can't test that the object is gone because delete()
  97. # only supports loose objects and may not be fully implemented
  98. def test_prune_unreachable_objects_dry_run(self):
  99. """Test pruning unreachable objects with dry run."""
  100. # Create an unreachable blob
  101. unreachable_blob = Blob.from_string(b"unreachable content")
  102. self.repo.object_store.add_object(unreachable_blob)
  103. # Prune with dry run (grace_period=None means no grace period check)
  104. pruned, bytes_freed = prune_unreachable_objects(
  105. self.repo.object_store, self.repo.refs, grace_period=None, dry_run=True
  106. )
  107. # Verify the blob would be pruned but still exists
  108. self.assertEqual({unreachable_blob.id}, pruned)
  109. self.assertGreater(bytes_freed, 0)
  110. self.assertIn(unreachable_blob.id, self.repo.object_store)
  111. def test_garbage_collect(self):
  112. """Test full garbage collection."""
  113. # Create some reachable objects
  114. blob = Blob.from_string(b"test content")
  115. self.repo.object_store.add_object(blob)
  116. tree = Tree()
  117. tree.add(b"test.txt", 0o100644, blob.id)
  118. self.repo.object_store.add_object(tree)
  119. commit = Commit()
  120. commit.tree = tree.id
  121. commit.author = commit.committer = b"Test User <test@example.com>"
  122. commit.commit_time = commit.author_time = 1234567890
  123. commit.commit_timezone = commit.author_timezone = 0
  124. commit.message = b"Test commit"
  125. self.repo.object_store.add_object(commit)
  126. self.repo.refs[b"HEAD"] = commit.id
  127. # Create an unreachable blob
  128. unreachable_blob = Blob.from_string(b"unreachable content")
  129. self.repo.object_store.add_object(unreachable_blob)
  130. # Run garbage collection (grace_period=None means no grace period check)
  131. stats = garbage_collect(
  132. self.repo, prune=True, grace_period=None, progress=no_op_progress
  133. )
  134. # Check results
  135. self.assertIsInstance(stats, GCStats)
  136. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  137. self.assertGreater(stats.bytes_freed, 0)
  138. # Check that loose objects were counted
  139. self.assertGreaterEqual(
  140. stats.loose_objects_before, 4
  141. ) # At least blob, tree, commit, unreachable
  142. self.assertLess(
  143. stats.loose_objects_after, stats.loose_objects_before
  144. ) # Should have fewer after GC
  145. def test_garbage_collect_no_prune(self):
  146. """Test garbage collection without pruning."""
  147. # Create an unreachable blob
  148. unreachable_blob = Blob.from_string(b"unreachable content")
  149. self.repo.object_store.add_object(unreachable_blob)
  150. # Run garbage collection without pruning
  151. stats = garbage_collect(self.repo, prune=False, progress=no_op_progress)
  152. # Check that nothing was pruned
  153. self.assertEqual(set(), stats.pruned_objects)
  154. self.assertEqual(0, stats.bytes_freed)
  155. self.assertIn(unreachable_blob.id, self.repo.object_store)
  156. def test_garbage_collect_dry_run(self):
  157. """Test garbage collection with dry run."""
  158. # Create an unreachable blob
  159. unreachable_blob = Blob.from_string(b"unreachable content")
  160. self.repo.object_store.add_object(unreachable_blob)
  161. # Run garbage collection with dry run (grace_period=None means no grace period check)
  162. stats = garbage_collect(
  163. self.repo,
  164. prune=True,
  165. grace_period=None,
  166. dry_run=True,
  167. progress=no_op_progress,
  168. )
  169. # Check that object would be pruned but still exists
  170. # On Windows, the repository initialization might create additional unreachable objects
  171. # So we check that our blob is in the pruned objects, not that it's the only one
  172. self.assertIn(unreachable_blob.id, stats.pruned_objects)
  173. self.assertGreater(stats.bytes_freed, 0)
  174. self.assertIn(unreachable_blob.id, self.repo.object_store)
  175. def test_grace_period(self):
  176. """Test that grace period prevents pruning recent objects."""
  177. # Create an unreachable blob
  178. unreachable_blob = Blob.from_string(b"recent unreachable content")
  179. self.repo.object_store.add_object(unreachable_blob)
  180. # Ensure the object is loose
  181. self.assertTrue(self.repo.object_store.contains_loose(unreachable_blob.id))
  182. # Run garbage collection with a 1 hour grace period, but dry run to avoid packing
  183. # The object was just created, so it should not be pruned
  184. stats = garbage_collect(
  185. self.repo,
  186. prune=True,
  187. grace_period=3600,
  188. dry_run=True,
  189. progress=no_op_progress,
  190. )
  191. # Check that the object was NOT pruned
  192. self.assertEqual(set(), stats.pruned_objects)
  193. self.assertEqual(0, stats.bytes_freed)
  194. self.assertIn(unreachable_blob.id, self.repo.object_store)
  195. # Now test with no grace period - it should be pruned
  196. stats = garbage_collect(self.repo, prune=True, grace_period=None)
  197. # Check that the object was pruned
  198. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  199. self.assertGreater(stats.bytes_freed, 0)
  200. def test_grace_period_old_object(self):
  201. """Test that old objects are pruned even with grace period."""
  202. # Create an unreachable blob
  203. old_blob = Blob.from_string(b"old unreachable content")
  204. self.repo.object_store.add_object(old_blob)
  205. # Ensure the object is loose
  206. self.assertTrue(self.repo.object_store.contains_loose(old_blob.id))
  207. # Manually set the mtime to 2 hours ago
  208. path = self.repo.object_store._get_shafile_path(old_blob.id)
  209. old_time = time.time() - 7200 # 2 hours ago
  210. os.utime(path, (old_time, old_time))
  211. # Run garbage collection with a 1 hour grace period
  212. # The object is 2 hours old, so it should be pruned
  213. stats = garbage_collect(
  214. self.repo, prune=True, grace_period=3600, progress=no_op_progress
  215. )
  216. # Check that the object was pruned
  217. self.assertEqual({old_blob.id}, stats.pruned_objects)
  218. self.assertGreater(stats.bytes_freed, 0)
  219. def test_packed_objects_pruned(self):
  220. """Test that packed objects are pruned via repack with exclusion."""
  221. # Create an unreachable blob
  222. unreachable_blob = Blob.from_string(b"unreachable packed content")
  223. self.repo.object_store.add_object(unreachable_blob)
  224. # Pack the objects to ensure the blob is in a pack
  225. self.repo.object_store.pack_loose_objects(progress=no_op_progress)
  226. # Ensure the object is NOT loose anymore
  227. self.assertFalse(self.repo.object_store.contains_loose(unreachable_blob.id))
  228. self.assertIn(unreachable_blob.id, self.repo.object_store)
  229. # Run garbage collection (grace_period=None means no grace period check)
  230. stats = garbage_collect(
  231. self.repo, prune=True, grace_period=None, progress=no_op_progress
  232. )
  233. # Check that the packed object was pruned
  234. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  235. self.assertGreater(stats.bytes_freed, 0)
  236. self.assertNotIn(unreachable_blob.id, self.repo.object_store)
  237. def test_garbage_collect_with_progress(self):
  238. """Test garbage collection with progress callback."""
  239. # Create some objects
  240. blob = Blob.from_string(b"test content")
  241. self.repo.object_store.add_object(blob)
  242. tree = Tree()
  243. tree.add(b"test.txt", 0o100644, blob.id)
  244. self.repo.object_store.add_object(tree)
  245. commit = Commit()
  246. commit.tree = tree.id
  247. commit.author = commit.committer = b"Test User <test@example.com>"
  248. commit.commit_time = commit.author_time = 1234567890
  249. commit.commit_timezone = commit.author_timezone = 0
  250. commit.message = b"Test commit"
  251. self.repo.object_store.add_object(commit)
  252. self.repo.refs[b"HEAD"] = commit.id
  253. # Create an unreachable blob
  254. unreachable_blob = Blob.from_string(b"unreachable content")
  255. self.repo.object_store.add_object(unreachable_blob)
  256. # Track progress messages
  257. progress_messages = []
  258. def progress_callback(msg):
  259. progress_messages.append(msg)
  260. # Run garbage collection with progress
  261. garbage_collect(
  262. self.repo, prune=True, grace_period=None, progress=progress_callback
  263. )
  264. # Check that progress was reported
  265. self.assertGreater(len(progress_messages), 0)
  266. self.assertIn("Finding unreachable objects", progress_messages)
  267. self.assertIn("Packing references", progress_messages)
  268. self.assertIn("Repacking repository", progress_messages)
  269. self.assertIn("Pruning temporary files", progress_messages)
  270. def test_find_reachable_objects_with_broken_ref(self):
  271. """Test finding reachable objects with a broken ref."""
  272. # Create a valid object
  273. blob = Blob.from_string(b"test content")
  274. self.repo.object_store.add_object(blob)
  275. # Create a commit pointing to the blob
  276. tree = Tree()
  277. tree.add(b"test.txt", 0o100644, blob.id)
  278. self.repo.object_store.add_object(tree)
  279. commit = Commit()
  280. commit.tree = tree.id
  281. commit.author = commit.committer = b"Test User <test@example.com>"
  282. commit.commit_time = commit.author_time = 1234567890
  283. commit.commit_timezone = commit.author_timezone = 0
  284. commit.message = b"Test commit"
  285. self.repo.object_store.add_object(commit)
  286. self.repo.refs[b"HEAD"] = commit.id
  287. # Create a broken ref pointing to non-existent object
  288. broken_sha = ZERO_SHA
  289. self.repo.refs[b"refs/heads/broken"] = broken_sha
  290. # Track progress to see warning
  291. progress_messages = []
  292. def progress_callback(msg):
  293. progress_messages.append(msg)
  294. # Find reachable objects
  295. reachable = find_reachable_objects(
  296. self.repo.object_store, self.repo.refs, progress=progress_callback
  297. )
  298. # Valid objects should still be found, plus the broken ref SHA
  299. # (which will be included in reachable but won't be walkable)
  300. self.assertEqual({blob.id, tree.id, commit.id, broken_sha}, reachable)
  301. # Check that we got a message about checking the broken object
  302. # The warning happens when trying to walk from the broken SHA
  303. check_messages = [msg for msg in progress_messages if "Checking object" in msg]
  304. self.assertTrue(
  305. any(broken_sha.decode("ascii") in msg for msg in check_messages)
  306. )
  307. def test_find_reachable_objects_with_tag(self):
  308. """Test finding reachable objects through tags."""
  309. # Create a blob
  310. blob = Blob.from_string(b"tagged content")
  311. self.repo.object_store.add_object(blob)
  312. # Create a tree
  313. tree = Tree()
  314. tree.add(b"tagged.txt", 0o100644, blob.id)
  315. self.repo.object_store.add_object(tree)
  316. # Create a commit
  317. commit = Commit()
  318. commit.tree = tree.id
  319. commit.author = commit.committer = b"Test User <test@example.com>"
  320. commit.commit_time = commit.author_time = 1234567890
  321. commit.commit_timezone = commit.author_timezone = 0
  322. commit.message = b"Tagged commit"
  323. self.repo.object_store.add_object(commit)
  324. # Create a tag pointing to the commit
  325. tag = Tag()
  326. tag.name = b"v1.0"
  327. tag.message = b"Version 1.0"
  328. tag.tag_time = 1234567890
  329. tag.tag_timezone = 0
  330. tag.object = (Commit, commit.id)
  331. tag.tagger = b"Test Tagger <tagger@example.com>"
  332. self.repo.object_store.add_object(tag)
  333. # Set a ref to the tag
  334. self.repo.refs[b"refs/tags/v1.0"] = tag.id
  335. # Find reachable objects
  336. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  337. # All objects should be reachable through the tag
  338. self.assertEqual({blob.id, tree.id, commit.id, tag.id}, reachable)
  339. def test_prune_with_missing_mtime(self):
  340. """Test pruning when get_object_mtime raises KeyError."""
  341. # Create an unreachable blob
  342. unreachable_blob = Blob.from_string(b"unreachable content")
  343. self.repo.object_store.add_object(unreachable_blob)
  344. # Mock get_object_mtime to raise KeyError
  345. with patch.object(
  346. self.repo.object_store, "get_object_mtime", side_effect=KeyError
  347. ):
  348. # Run garbage collection with grace period
  349. stats = garbage_collect(
  350. self.repo, prune=True, grace_period=3600, progress=no_op_progress
  351. )
  352. # Object should be kept because mtime couldn't be determined
  353. self.assertEqual(set(), stats.pruned_objects)
  354. self.assertEqual(0, stats.bytes_freed)
  355. class AutoGCTestCase(TestCase):
  356. """Tests for auto GC functionality."""
  357. def test_should_run_gc_disabled(self):
  358. """Test that auto GC doesn't run when gc.auto is 0."""
  359. r = MemoryRepo()
  360. self.addCleanup(r.close)
  361. config = ConfigDict()
  362. config.set(b"gc", b"auto", b"0")
  363. self.assertFalse(should_run_gc(r, config))
  364. def test_should_run_gc_disabled_by_env_var(self):
  365. """Test that auto GC doesn't run when GIT_AUTO_GC environment variable is 0."""
  366. r = MemoryRepo()
  367. self.addCleanup(r.close)
  368. config = ConfigDict()
  369. config.set(b"gc", b"auto", b"10") # Should normally run
  370. with patch.dict(os.environ, {"GIT_AUTO_GC": "0"}):
  371. self.assertFalse(should_run_gc(r, config))
  372. def test_should_run_gc_disabled_programmatically(self):
  373. """Test that auto GC doesn't run when disabled via _autogc_disabled attribute."""
  374. r = MemoryRepo()
  375. self.addCleanup(r.close)
  376. config = ConfigDict()
  377. config.set(b"gc", b"auto", b"10") # Should normally run
  378. # Disable autogc programmatically
  379. r._autogc_disabled = True
  380. self.assertFalse(should_run_gc(r, config))
  381. # Re-enable autogc
  382. r._autogc_disabled = False
  383. # Still false because MemoryRepo doesn't support counting loose objects
  384. self.assertFalse(should_run_gc(r, config))
  385. def test_should_run_gc_default_values(self):
  386. """Test auto GC with default configuration values."""
  387. r = MemoryRepo()
  388. self.addCleanup(r.close)
  389. config = ConfigDict()
  390. # Should not run with empty repo
  391. self.assertFalse(should_run_gc(r, config))
  392. def test_should_run_gc_with_loose_objects(self):
  393. """Test that auto GC triggers based on loose object count."""
  394. with tempfile.TemporaryDirectory() as tmpdir:
  395. with Repo.init(tmpdir) as r:
  396. config = ConfigDict()
  397. config.set(b"gc", b"auto", b"10") # Low threshold for testing
  398. # Add some loose objects
  399. for i in range(15):
  400. blob = Blob()
  401. blob.data = f"test blob {i}".encode()
  402. r.object_store.add_object(blob)
  403. self.assertTrue(should_run_gc(r, config))
  404. def test_should_run_gc_with_pack_limit(self):
  405. """Test that auto GC triggers based on pack file count."""
  406. with tempfile.TemporaryDirectory() as tmpdir:
  407. with Repo.init(tmpdir) as r:
  408. config = ConfigDict()
  409. config.set(b"gc", b"autoPackLimit", b"2") # Low threshold for testing
  410. # Create some pack files by repacking
  411. for i in range(3):
  412. blob = Blob()
  413. blob.data = f"test blob {i}".encode()
  414. r.object_store.add_object(blob)
  415. r.object_store.pack_loose_objects(progress=no_op_progress)
  416. # Force re-enumeration of packs
  417. r.object_store._update_pack_cache()
  418. self.assertTrue(should_run_gc(r, config))
  419. def test_count_loose_objects(self):
  420. """Test counting loose objects."""
  421. with tempfile.TemporaryDirectory() as tmpdir:
  422. with Repo.init(tmpdir) as r:
  423. # Initially should have no loose objects
  424. count = r.object_store.count_loose_objects()
  425. self.assertEqual(0, count)
  426. # Add some loose objects
  427. for i in range(5):
  428. blob = Blob()
  429. blob.data = f"test blob {i}".encode()
  430. r.object_store.add_object(blob)
  431. count = r.object_store.count_loose_objects()
  432. self.assertEqual(5, count)
  433. def test_count_pack_files(self):
  434. """Test counting pack files."""
  435. with tempfile.TemporaryDirectory() as tmpdir:
  436. with Repo.init(tmpdir) as r:
  437. # Initially should have no packs
  438. count = r.object_store.count_pack_files()
  439. self.assertEqual(0, count)
  440. # Create a pack
  441. blob = Blob()
  442. blob.data = b"test blob"
  443. r.object_store.add_object(blob)
  444. r.object_store.pack_loose_objects(progress=no_op_progress)
  445. # Force re-enumeration of packs
  446. r.object_store._update_pack_cache()
  447. count = r.object_store.count_pack_files()
  448. self.assertEqual(1, count)
  449. def test_maybe_auto_gc_runs_when_needed(self):
  450. """Test that auto GC runs when thresholds are exceeded."""
  451. with tempfile.TemporaryDirectory() as tmpdir:
  452. with Repo.init(tmpdir) as r:
  453. config = ConfigDict()
  454. config.set(b"gc", b"auto", b"5") # Low threshold for testing
  455. # Add enough loose objects to trigger GC
  456. for i in range(10):
  457. blob = Blob()
  458. blob.data = f"test blob {i}".encode()
  459. r.object_store.add_object(blob)
  460. with patch("dulwich.gc.garbage_collect") as mock_gc:
  461. result = maybe_auto_gc(r, config, progress=no_op_progress)
  462. self.assertTrue(result)
  463. mock_gc.assert_called_once_with(r, auto=True, progress=no_op_progress)
  464. def test_maybe_auto_gc_skips_when_not_needed(self):
  465. """Test that auto GC doesn't run when thresholds are not exceeded."""
  466. r = MemoryRepo()
  467. self.addCleanup(r.close)
  468. config = ConfigDict()
  469. with patch("dulwich.gc.garbage_collect") as mock_gc:
  470. result = maybe_auto_gc(r, config, progress=no_op_progress)
  471. self.assertFalse(result)
  472. mock_gc.assert_not_called()
  473. def test_maybe_auto_gc_with_gc_log(self):
  474. """Test that auto GC is skipped when gc.log exists and is recent."""
  475. with tempfile.TemporaryDirectory() as tmpdir:
  476. with Repo.init(tmpdir) as r:
  477. config = ConfigDict()
  478. config.set(b"gc", b"auto", b"1") # Low threshold
  479. # Create gc.log file
  480. gc_log_path = os.path.join(r.controldir(), "gc.log")
  481. with open(gc_log_path, "wb") as f:
  482. f.write(b"Previous GC failed\n")
  483. # Add objects to trigger GC
  484. blob = Blob()
  485. blob.data = b"test"
  486. r.object_store.add_object(blob)
  487. # Capture log messages
  488. import logging
  489. with self.assertLogs(level=logging.INFO) as cm:
  490. result = maybe_auto_gc(r, config, progress=no_op_progress)
  491. self.assertFalse(result)
  492. # Verify gc.log contents were logged
  493. self.assertTrue(any("Previous GC failed" in msg for msg in cm.output))
  494. def test_maybe_auto_gc_with_expired_gc_log(self):
  495. """Test that auto GC runs when gc.log exists but is expired."""
  496. with tempfile.TemporaryDirectory() as tmpdir:
  497. with Repo.init(tmpdir) as r:
  498. config = ConfigDict()
  499. config.set(b"gc", b"auto", b"1") # Low threshold
  500. config.set(b"gc", b"logExpiry", b"0.days") # Expire immediately
  501. # Create gc.log file
  502. gc_log_path = os.path.join(r.controldir(), "gc.log")
  503. with open(gc_log_path, "wb") as f:
  504. f.write(b"Previous GC failed\n")
  505. # Make the file old
  506. old_time = time.time() - 86400 # 1 day ago
  507. os.utime(gc_log_path, (old_time, old_time))
  508. # Add objects to trigger GC
  509. blob = Blob()
  510. blob.data = b"test"
  511. r.object_store.add_object(blob)
  512. with patch("dulwich.gc.garbage_collect") as mock_gc:
  513. result = maybe_auto_gc(r, config, progress=no_op_progress)
  514. self.assertTrue(result)
  515. mock_gc.assert_called_once_with(r, auto=True, progress=no_op_progress)
  516. # gc.log should be removed after successful GC
  517. self.assertFalse(os.path.exists(gc_log_path))
  518. def test_maybe_auto_gc_handles_gc_failure(self):
  519. """Test that auto GC handles failures gracefully."""
  520. with tempfile.TemporaryDirectory() as tmpdir:
  521. with Repo.init(tmpdir) as r:
  522. config = ConfigDict()
  523. config.set(b"gc", b"auto", b"1") # Low threshold
  524. # Add objects to trigger GC
  525. blob = Blob()
  526. blob.data = b"test"
  527. r.object_store.add_object(blob)
  528. with patch(
  529. "dulwich.gc.garbage_collect", side_effect=OSError("GC failed")
  530. ) as mock_gc:
  531. result = maybe_auto_gc(r, config, progress=no_op_progress)
  532. self.assertFalse(result)
  533. mock_gc.assert_called_once_with(r, auto=True, progress=no_op_progress)
  534. # Check that error was written to gc.log
  535. gc_log_path = os.path.join(r.controldir(), "gc.log")
  536. self.assertTrue(os.path.exists(gc_log_path))
  537. with open(gc_log_path, "rb") as f:
  538. content = f.read()
  539. self.assertIn(b"Auto GC failed: GC failed", content)
  540. def test_gc_log_expiry_singular_day(self):
  541. """Test that gc.logExpiry supports singular '.day' format."""
  542. with tempfile.TemporaryDirectory() as tmpdir:
  543. with Repo.init(tmpdir) as r:
  544. config = ConfigDict()
  545. config.set(b"gc", b"auto", b"1") # Low threshold
  546. config.set(b"gc", b"logExpiry", b"1.day") # Singular form
  547. # Create gc.log file
  548. gc_log_path = os.path.join(r.controldir(), "gc.log")
  549. with open(gc_log_path, "wb") as f:
  550. f.write(b"Previous GC failed\n")
  551. # Make the file 2 days old (older than 1 day expiry)
  552. old_time = time.time() - (2 * 86400)
  553. os.utime(gc_log_path, (old_time, old_time))
  554. # Add objects to trigger GC
  555. blob = Blob()
  556. blob.data = b"test"
  557. r.object_store.add_object(blob)
  558. with patch("dulwich.gc.garbage_collect") as mock_gc:
  559. result = maybe_auto_gc(r, config, progress=no_op_progress)
  560. self.assertTrue(result)
  561. mock_gc.assert_called_once_with(r, auto=True, progress=no_op_progress)
  562. def test_gc_log_expiry_invalid_format(self):
  563. """Test that invalid gc.logExpiry format defaults to 1 day."""
  564. with tempfile.TemporaryDirectory() as tmpdir:
  565. with Repo.init(tmpdir) as r:
  566. config = ConfigDict()
  567. config.set(b"gc", b"auto", b"1") # Low threshold
  568. config.set(b"gc", b"logExpiry", b"invalid") # Invalid format
  569. # Create gc.log file
  570. gc_log_path = os.path.join(r.controldir(), "gc.log")
  571. with open(gc_log_path, "wb") as f:
  572. f.write(b"Previous GC failed\n")
  573. # Make the file recent (within default 1 day)
  574. recent_time = time.time() - 3600 # 1 hour ago
  575. os.utime(gc_log_path, (recent_time, recent_time))
  576. # Add objects to trigger GC
  577. blob = Blob()
  578. blob.data = b"test"
  579. r.object_store.add_object(blob)
  580. # Capture log messages
  581. import logging
  582. with self.assertLogs(level=logging.INFO) as cm:
  583. result = maybe_auto_gc(r, config, progress=no_op_progress)
  584. # Should not run GC because gc.log is recent (within default 1 day)
  585. self.assertFalse(result)
  586. # Check that gc.log content was logged
  587. self.assertTrue(any("gc.log content:" in msg for msg in cm.output))
  588. def test_maybe_auto_gc_non_disk_repo(self):
  589. """Test auto GC on non-disk repository (MemoryRepo)."""
  590. r = MemoryRepo()
  591. self.addCleanup(r.close)
  592. config = ConfigDict()
  593. config.set(b"gc", b"auto", b"1") # Would trigger if it were disk-based
  594. # Add objects that would trigger GC in a disk repo
  595. for i in range(10):
  596. blob = Blob()
  597. blob.data = f"test {i}".encode()
  598. r.object_store.add_object(blob)
  599. # For non-disk repos, should_run_gc returns False
  600. # because it can't count loose objects
  601. result = maybe_auto_gc(r, config, progress=no_op_progress)
  602. self.assertFalse(result)
  603. def test_gc_removes_existing_gc_log_on_success(self):
  604. """Test that successful GC removes pre-existing gc.log file."""
  605. with tempfile.TemporaryDirectory() as tmpdir:
  606. with Repo.init(tmpdir) as r:
  607. config = ConfigDict()
  608. config.set(b"gc", b"auto", b"1") # Low threshold
  609. # Create gc.log file from previous failure
  610. gc_log_path = os.path.join(r.controldir(), "gc.log")
  611. with open(gc_log_path, "wb") as f:
  612. f.write(b"Previous GC failed\n")
  613. # Make it old enough to be expired
  614. old_time = time.time() - (2 * 86400) # 2 days ago
  615. os.utime(gc_log_path, (old_time, old_time))
  616. # Add objects to trigger GC
  617. blob = Blob()
  618. blob.data = b"test"
  619. r.object_store.add_object(blob)
  620. # Run auto GC
  621. result = maybe_auto_gc(r, config, progress=no_op_progress)
  622. self.assertTrue(result)
  623. # gc.log should be removed after successful GC
  624. self.assertFalse(os.path.exists(gc_log_path))