2
0

test_gc.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747
  1. """Tests for dulwich.gc."""
  2. import os
  3. import shutil
  4. import tempfile
  5. import time
  6. from unittest import TestCase
  7. from unittest.mock import patch
  8. from dulwich.config import ConfigDict
  9. from dulwich.gc import (
  10. GCStats,
  11. find_reachable_objects,
  12. find_unreachable_objects,
  13. garbage_collect,
  14. maybe_auto_gc,
  15. prune_unreachable_objects,
  16. should_run_gc,
  17. )
  18. from dulwich.objects import Blob, Commit, Tag, Tree
  19. from dulwich.repo import MemoryRepo, Repo
  20. class GCTestCase(TestCase):
  21. """Tests for garbage collection functionality."""
  22. def setUp(self):
  23. self.tmpdir = tempfile.mkdtemp()
  24. self.addCleanup(shutil.rmtree, self.tmpdir)
  25. self.repo = Repo.init(self.tmpdir)
  26. self.addCleanup(self.repo.close)
  27. def test_find_reachable_objects_empty_repo(self):
  28. """Test finding reachable objects in empty repository."""
  29. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  30. self.assertEqual(set(), reachable)
  31. def test_find_reachable_objects_with_commit(self):
  32. """Test finding reachable objects with a commit."""
  33. # Create a blob
  34. blob = Blob.from_string(b"test content")
  35. self.repo.object_store.add_object(blob)
  36. # Create a tree
  37. tree = Tree()
  38. tree.add(b"test.txt", 0o100644, blob.id)
  39. self.repo.object_store.add_object(tree)
  40. # Create a commit
  41. commit = Commit()
  42. commit.tree = tree.id
  43. commit.author = commit.committer = b"Test User <test@example.com>"
  44. commit.commit_time = commit.author_time = 1234567890
  45. commit.commit_timezone = commit.author_timezone = 0
  46. commit.message = b"Test commit"
  47. self.repo.object_store.add_object(commit)
  48. # Set HEAD to the commit
  49. self.repo.refs[b"HEAD"] = commit.id
  50. # Find reachable objects
  51. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  52. # All three objects should be reachable
  53. self.assertEqual({blob.id, tree.id, commit.id}, reachable)
  54. def test_find_unreachable_objects(self):
  55. """Test finding unreachable objects."""
  56. # Create a reachable blob
  57. reachable_blob = Blob.from_string(b"reachable content")
  58. self.repo.object_store.add_object(reachable_blob)
  59. # Create a tree
  60. tree = Tree()
  61. tree.add(b"reachable.txt", 0o100644, reachable_blob.id)
  62. self.repo.object_store.add_object(tree)
  63. # Create a commit
  64. commit = Commit()
  65. commit.tree = tree.id
  66. commit.author = commit.committer = b"Test User <test@example.com>"
  67. commit.commit_time = commit.author_time = 1234567890
  68. commit.commit_timezone = commit.author_timezone = 0
  69. commit.message = b"Test commit"
  70. self.repo.object_store.add_object(commit)
  71. # Set HEAD to the commit
  72. self.repo.refs[b"HEAD"] = commit.id
  73. # Create an unreachable blob
  74. unreachable_blob = Blob.from_string(b"unreachable content")
  75. self.repo.object_store.add_object(unreachable_blob)
  76. # Find unreachable objects
  77. unreachable = find_unreachable_objects(self.repo.object_store, self.repo.refs)
  78. # Only the unreachable blob should be found
  79. self.assertEqual({unreachable_blob.id}, unreachable)
  80. def test_prune_unreachable_objects(self):
  81. """Test pruning unreachable objects."""
  82. # Create an unreachable blob
  83. unreachable_blob = Blob.from_string(b"unreachable content")
  84. self.repo.object_store.add_object(unreachable_blob)
  85. # Verify it exists
  86. self.assertIn(unreachable_blob.id, self.repo.object_store)
  87. # Prune unreachable objects (grace_period=None means no grace period check)
  88. pruned, bytes_freed = prune_unreachable_objects(
  89. self.repo.object_store, self.repo.refs, grace_period=None
  90. )
  91. # Verify the blob was pruned
  92. self.assertEqual({unreachable_blob.id}, pruned)
  93. self.assertGreater(bytes_freed, 0)
  94. # Note: We can't test that the object is gone because delete()
  95. # only supports loose objects and may not be fully implemented
  96. def test_prune_unreachable_objects_dry_run(self):
  97. """Test pruning unreachable objects with dry run."""
  98. # Create an unreachable blob
  99. unreachable_blob = Blob.from_string(b"unreachable content")
  100. self.repo.object_store.add_object(unreachable_blob)
  101. # Prune with dry run (grace_period=None means no grace period check)
  102. pruned, bytes_freed = prune_unreachable_objects(
  103. self.repo.object_store, self.repo.refs, grace_period=None, dry_run=True
  104. )
  105. # Verify the blob would be pruned but still exists
  106. self.assertEqual({unreachable_blob.id}, pruned)
  107. self.assertGreater(bytes_freed, 0)
  108. self.assertIn(unreachable_blob.id, self.repo.object_store)
  109. def test_garbage_collect(self):
  110. """Test full garbage collection."""
  111. # Create some reachable objects
  112. blob = Blob.from_string(b"test content")
  113. self.repo.object_store.add_object(blob)
  114. tree = Tree()
  115. tree.add(b"test.txt", 0o100644, blob.id)
  116. self.repo.object_store.add_object(tree)
  117. commit = Commit()
  118. commit.tree = tree.id
  119. commit.author = commit.committer = b"Test User <test@example.com>"
  120. commit.commit_time = commit.author_time = 1234567890
  121. commit.commit_timezone = commit.author_timezone = 0
  122. commit.message = b"Test commit"
  123. self.repo.object_store.add_object(commit)
  124. self.repo.refs[b"HEAD"] = commit.id
  125. # Create an unreachable blob
  126. unreachable_blob = Blob.from_string(b"unreachable content")
  127. self.repo.object_store.add_object(unreachable_blob)
  128. # Run garbage collection (grace_period=None means no grace period check)
  129. stats = garbage_collect(self.repo, prune=True, grace_period=None)
  130. # Check results
  131. self.assertIsInstance(stats, GCStats)
  132. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  133. self.assertGreater(stats.bytes_freed, 0)
  134. # Check that loose objects were counted
  135. self.assertGreaterEqual(
  136. stats.loose_objects_before, 4
  137. ) # At least blob, tree, commit, unreachable
  138. self.assertLess(
  139. stats.loose_objects_after, stats.loose_objects_before
  140. ) # Should have fewer after GC
  141. def test_garbage_collect_no_prune(self):
  142. """Test garbage collection without pruning."""
  143. # Create an unreachable blob
  144. unreachable_blob = Blob.from_string(b"unreachable content")
  145. self.repo.object_store.add_object(unreachable_blob)
  146. # Run garbage collection without pruning
  147. stats = garbage_collect(self.repo, prune=False)
  148. # Check that nothing was pruned
  149. self.assertEqual(set(), stats.pruned_objects)
  150. self.assertEqual(0, stats.bytes_freed)
  151. self.assertIn(unreachable_blob.id, self.repo.object_store)
  152. def test_garbage_collect_dry_run(self):
  153. """Test garbage collection with dry run."""
  154. # Create an unreachable blob
  155. unreachable_blob = Blob.from_string(b"unreachable content")
  156. self.repo.object_store.add_object(unreachable_blob)
  157. # Run garbage collection with dry run (grace_period=None means no grace period check)
  158. stats = garbage_collect(self.repo, prune=True, grace_period=None, dry_run=True)
  159. # Check that object would be pruned but still exists
  160. # On Windows, the repository initialization might create additional unreachable objects
  161. # So we check that our blob is in the pruned objects, not that it's the only one
  162. self.assertIn(unreachable_blob.id, stats.pruned_objects)
  163. self.assertGreater(stats.bytes_freed, 0)
  164. self.assertIn(unreachable_blob.id, self.repo.object_store)
  165. def test_grace_period(self):
  166. """Test that grace period prevents pruning recent objects."""
  167. # Create an unreachable blob
  168. unreachable_blob = Blob.from_string(b"recent unreachable content")
  169. self.repo.object_store.add_object(unreachable_blob)
  170. # Ensure the object is loose
  171. self.assertTrue(self.repo.object_store.contains_loose(unreachable_blob.id))
  172. # Run garbage collection with a 1 hour grace period, but dry run to avoid packing
  173. # The object was just created, so it should not be pruned
  174. stats = garbage_collect(self.repo, prune=True, grace_period=3600, dry_run=True)
  175. # Check that the object was NOT pruned
  176. self.assertEqual(set(), stats.pruned_objects)
  177. self.assertEqual(0, stats.bytes_freed)
  178. self.assertIn(unreachable_blob.id, self.repo.object_store)
  179. # Now test with no grace period - it should be pruned
  180. stats = garbage_collect(self.repo, prune=True, grace_period=None)
  181. # Check that the object was pruned
  182. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  183. self.assertGreater(stats.bytes_freed, 0)
  184. def test_grace_period_old_object(self):
  185. """Test that old objects are pruned even with grace period."""
  186. # Create an unreachable blob
  187. old_blob = Blob.from_string(b"old unreachable content")
  188. self.repo.object_store.add_object(old_blob)
  189. # Ensure the object is loose
  190. self.assertTrue(self.repo.object_store.contains_loose(old_blob.id))
  191. # Manually set the mtime to 2 hours ago
  192. path = self.repo.object_store._get_shafile_path(old_blob.id)
  193. old_time = time.time() - 7200 # 2 hours ago
  194. os.utime(path, (old_time, old_time))
  195. # Run garbage collection with a 1 hour grace period
  196. # The object is 2 hours old, so it should be pruned
  197. stats = garbage_collect(self.repo, prune=True, grace_period=3600)
  198. # Check that the object was pruned
  199. self.assertEqual({old_blob.id}, stats.pruned_objects)
  200. self.assertGreater(stats.bytes_freed, 0)
  201. def test_packed_objects_pruned(self):
  202. """Test that packed objects are pruned via repack with exclusion."""
  203. # Create an unreachable blob
  204. unreachable_blob = Blob.from_string(b"unreachable packed content")
  205. self.repo.object_store.add_object(unreachable_blob)
  206. # Pack the objects to ensure the blob is in a pack
  207. self.repo.object_store.pack_loose_objects()
  208. # Ensure the object is NOT loose anymore
  209. self.assertFalse(self.repo.object_store.contains_loose(unreachable_blob.id))
  210. self.assertIn(unreachable_blob.id, self.repo.object_store)
  211. # Run garbage collection (grace_period=None means no grace period check)
  212. stats = garbage_collect(self.repo, prune=True, grace_period=None)
  213. # Check that the packed object was pruned
  214. self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
  215. self.assertGreater(stats.bytes_freed, 0)
  216. self.assertNotIn(unreachable_blob.id, self.repo.object_store)
  217. def test_garbage_collect_with_progress(self):
  218. """Test garbage collection with progress callback."""
  219. # Create some objects
  220. blob = Blob.from_string(b"test content")
  221. self.repo.object_store.add_object(blob)
  222. tree = Tree()
  223. tree.add(b"test.txt", 0o100644, blob.id)
  224. self.repo.object_store.add_object(tree)
  225. commit = Commit()
  226. commit.tree = tree.id
  227. commit.author = commit.committer = b"Test User <test@example.com>"
  228. commit.commit_time = commit.author_time = 1234567890
  229. commit.commit_timezone = commit.author_timezone = 0
  230. commit.message = b"Test commit"
  231. self.repo.object_store.add_object(commit)
  232. self.repo.refs[b"HEAD"] = commit.id
  233. # Create an unreachable blob
  234. unreachable_blob = Blob.from_string(b"unreachable content")
  235. self.repo.object_store.add_object(unreachable_blob)
  236. # Track progress messages
  237. progress_messages = []
  238. def progress_callback(msg):
  239. progress_messages.append(msg)
  240. # Run garbage collection with progress
  241. garbage_collect(
  242. self.repo, prune=True, grace_period=None, progress=progress_callback
  243. )
  244. # Check that progress was reported
  245. self.assertGreater(len(progress_messages), 0)
  246. self.assertIn("Finding unreachable objects", progress_messages)
  247. self.assertIn("Packing references", progress_messages)
  248. self.assertIn("Repacking repository", progress_messages)
  249. self.assertIn("Pruning temporary files", progress_messages)
  250. def test_find_reachable_objects_with_broken_ref(self):
  251. """Test finding reachable objects with a broken ref."""
  252. # Create a valid object
  253. blob = Blob.from_string(b"test content")
  254. self.repo.object_store.add_object(blob)
  255. # Create a commit pointing to the blob
  256. tree = Tree()
  257. tree.add(b"test.txt", 0o100644, blob.id)
  258. self.repo.object_store.add_object(tree)
  259. commit = Commit()
  260. commit.tree = tree.id
  261. commit.author = commit.committer = b"Test User <test@example.com>"
  262. commit.commit_time = commit.author_time = 1234567890
  263. commit.commit_timezone = commit.author_timezone = 0
  264. commit.message = b"Test commit"
  265. self.repo.object_store.add_object(commit)
  266. self.repo.refs[b"HEAD"] = commit.id
  267. # Create a broken ref pointing to non-existent object
  268. broken_sha = b"0" * 40
  269. self.repo.refs[b"refs/heads/broken"] = broken_sha
  270. # Track progress to see warning
  271. progress_messages = []
  272. def progress_callback(msg):
  273. progress_messages.append(msg)
  274. # Find reachable objects
  275. reachable = find_reachable_objects(
  276. self.repo.object_store, self.repo.refs, progress=progress_callback
  277. )
  278. # Valid objects should still be found, plus the broken ref SHA
  279. # (which will be included in reachable but won't be walkable)
  280. self.assertEqual({blob.id, tree.id, commit.id, broken_sha}, reachable)
  281. # Check that we got a message about checking the broken object
  282. # The warning happens when trying to walk from the broken SHA
  283. check_messages = [msg for msg in progress_messages if "Checking object" in msg]
  284. self.assertTrue(
  285. any(broken_sha.decode("ascii") in msg for msg in check_messages)
  286. )
  287. def test_find_reachable_objects_with_tag(self):
  288. """Test finding reachable objects through tags."""
  289. # Create a blob
  290. blob = Blob.from_string(b"tagged content")
  291. self.repo.object_store.add_object(blob)
  292. # Create a tree
  293. tree = Tree()
  294. tree.add(b"tagged.txt", 0o100644, blob.id)
  295. self.repo.object_store.add_object(tree)
  296. # Create a commit
  297. commit = Commit()
  298. commit.tree = tree.id
  299. commit.author = commit.committer = b"Test User <test@example.com>"
  300. commit.commit_time = commit.author_time = 1234567890
  301. commit.commit_timezone = commit.author_timezone = 0
  302. commit.message = b"Tagged commit"
  303. self.repo.object_store.add_object(commit)
  304. # Create a tag pointing to the commit
  305. tag = Tag()
  306. tag.name = b"v1.0"
  307. tag.message = b"Version 1.0"
  308. tag.tag_time = 1234567890
  309. tag.tag_timezone = 0
  310. tag.object = (Commit, commit.id)
  311. tag.tagger = b"Test Tagger <tagger@example.com>"
  312. self.repo.object_store.add_object(tag)
  313. # Set a ref to the tag
  314. self.repo.refs[b"refs/tags/v1.0"] = tag.id
  315. # Find reachable objects
  316. reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
  317. # All objects should be reachable through the tag
  318. self.assertEqual({blob.id, tree.id, commit.id, tag.id}, reachable)
  319. def test_prune_with_missing_mtime(self):
  320. """Test pruning when get_object_mtime raises KeyError."""
  321. # Create an unreachable blob
  322. unreachable_blob = Blob.from_string(b"unreachable content")
  323. self.repo.object_store.add_object(unreachable_blob)
  324. # Mock get_object_mtime to raise KeyError
  325. with patch.object(
  326. self.repo.object_store, "get_object_mtime", side_effect=KeyError
  327. ):
  328. # Run garbage collection with grace period
  329. stats = garbage_collect(self.repo, prune=True, grace_period=3600)
  330. # Object should be kept because mtime couldn't be determined
  331. self.assertEqual(set(), stats.pruned_objects)
  332. self.assertEqual(0, stats.bytes_freed)
  333. class AutoGCTestCase(TestCase):
  334. """Tests for auto GC functionality."""
  335. def test_should_run_gc_disabled(self):
  336. """Test that auto GC doesn't run when gc.auto is 0."""
  337. r = MemoryRepo()
  338. config = ConfigDict()
  339. config.set(b"gc", b"auto", b"0")
  340. self.assertFalse(should_run_gc(r, config))
  341. def test_should_run_gc_disabled_by_env_var(self):
  342. """Test that auto GC doesn't run when GIT_AUTO_GC environment variable is 0."""
  343. r = MemoryRepo()
  344. config = ConfigDict()
  345. config.set(b"gc", b"auto", b"10") # Should normally run
  346. with patch.dict(os.environ, {"GIT_AUTO_GC": "0"}):
  347. self.assertFalse(should_run_gc(r, config))
  348. def test_should_run_gc_disabled_programmatically(self):
  349. """Test that auto GC doesn't run when disabled via _autogc_disabled attribute."""
  350. r = MemoryRepo()
  351. config = ConfigDict()
  352. config.set(b"gc", b"auto", b"10") # Should normally run
  353. # Disable autogc programmatically
  354. r._autogc_disabled = True
  355. self.assertFalse(should_run_gc(r, config))
  356. # Re-enable autogc
  357. r._autogc_disabled = False
  358. # Still false because MemoryRepo doesn't support counting loose objects
  359. self.assertFalse(should_run_gc(r, config))
  360. def test_should_run_gc_default_values(self):
  361. """Test auto GC with default configuration values."""
  362. r = MemoryRepo()
  363. config = ConfigDict()
  364. # Should not run with empty repo
  365. self.assertFalse(should_run_gc(r, config))
  366. def test_should_run_gc_with_loose_objects(self):
  367. """Test that auto GC triggers based on loose object count."""
  368. with tempfile.TemporaryDirectory() as tmpdir:
  369. r = Repo.init(tmpdir)
  370. config = ConfigDict()
  371. config.set(b"gc", b"auto", b"10") # Low threshold for testing
  372. # Add some loose objects
  373. for i in range(15):
  374. blob = Blob()
  375. blob.data = f"test blob {i}".encode()
  376. r.object_store.add_object(blob)
  377. self.assertTrue(should_run_gc(r, config))
  378. def test_should_run_gc_with_pack_limit(self):
  379. """Test that auto GC triggers based on pack file count."""
  380. with tempfile.TemporaryDirectory() as tmpdir:
  381. r = Repo.init(tmpdir)
  382. config = ConfigDict()
  383. config.set(b"gc", b"autoPackLimit", b"2") # Low threshold for testing
  384. # Create some pack files by repacking
  385. for i in range(3):
  386. blob = Blob()
  387. blob.data = f"test blob {i}".encode()
  388. r.object_store.add_object(blob)
  389. r.object_store.pack_loose_objects()
  390. # Force re-enumeration of packs
  391. r.object_store._update_pack_cache()
  392. self.assertTrue(should_run_gc(r, config))
  393. def test_count_loose_objects(self):
  394. """Test counting loose objects."""
  395. with tempfile.TemporaryDirectory() as tmpdir:
  396. r = Repo.init(tmpdir)
  397. # Initially should have no loose objects
  398. count = r.object_store.count_loose_objects()
  399. self.assertEqual(0, count)
  400. # Add some loose objects
  401. for i in range(5):
  402. blob = Blob()
  403. blob.data = f"test blob {i}".encode()
  404. r.object_store.add_object(blob)
  405. count = r.object_store.count_loose_objects()
  406. self.assertEqual(5, count)
  407. def test_count_pack_files(self):
  408. """Test counting pack files."""
  409. with tempfile.TemporaryDirectory() as tmpdir:
  410. r = Repo.init(tmpdir)
  411. # Initially should have no packs
  412. count = r.object_store.count_pack_files()
  413. self.assertEqual(0, count)
  414. # Create a pack
  415. blob = Blob()
  416. blob.data = b"test blob"
  417. r.object_store.add_object(blob)
  418. r.object_store.pack_loose_objects()
  419. # Force re-enumeration of packs
  420. r.object_store._update_pack_cache()
  421. count = r.object_store.count_pack_files()
  422. self.assertEqual(1, count)
  423. def test_maybe_auto_gc_runs_when_needed(self):
  424. """Test that auto GC runs when thresholds are exceeded."""
  425. with tempfile.TemporaryDirectory() as tmpdir:
  426. r = Repo.init(tmpdir)
  427. config = ConfigDict()
  428. config.set(b"gc", b"auto", b"5") # Low threshold for testing
  429. # Add enough loose objects to trigger GC
  430. for i in range(10):
  431. blob = Blob()
  432. blob.data = f"test blob {i}".encode()
  433. r.object_store.add_object(blob)
  434. with patch("dulwich.gc.garbage_collect") as mock_gc:
  435. result = maybe_auto_gc(r, config)
  436. self.assertTrue(result)
  437. mock_gc.assert_called_once_with(r, auto=True)
  438. def test_maybe_auto_gc_skips_when_not_needed(self):
  439. """Test that auto GC doesn't run when thresholds are not exceeded."""
  440. r = MemoryRepo()
  441. config = ConfigDict()
  442. with patch("dulwich.gc.garbage_collect") as mock_gc:
  443. result = maybe_auto_gc(r, config)
  444. self.assertFalse(result)
  445. mock_gc.assert_not_called()
  446. def test_maybe_auto_gc_with_gc_log(self):
  447. """Test that auto GC is skipped when gc.log exists and is recent."""
  448. with tempfile.TemporaryDirectory() as tmpdir:
  449. r = Repo.init(tmpdir)
  450. config = ConfigDict()
  451. config.set(b"gc", b"auto", b"1") # Low threshold
  452. # Create gc.log file
  453. gc_log_path = os.path.join(r.controldir(), "gc.log")
  454. with open(gc_log_path, "wb") as f:
  455. f.write(b"Previous GC failed\n")
  456. # Add objects to trigger GC
  457. blob = Blob()
  458. blob.data = b"test"
  459. r.object_store.add_object(blob)
  460. with patch("builtins.print") as mock_print:
  461. result = maybe_auto_gc(r, config)
  462. self.assertFalse(result)
  463. # Verify gc.log contents were printed
  464. mock_print.assert_called_once_with("Previous GC failed\n")
  465. def test_maybe_auto_gc_with_expired_gc_log(self):
  466. """Test that auto GC runs when gc.log exists but is expired."""
  467. with tempfile.TemporaryDirectory() as tmpdir:
  468. r = Repo.init(tmpdir)
  469. config = ConfigDict()
  470. config.set(b"gc", b"auto", b"1") # Low threshold
  471. config.set(b"gc", b"logExpiry", b"0.days") # Expire immediately
  472. # Create gc.log file
  473. gc_log_path = os.path.join(r.controldir(), "gc.log")
  474. with open(gc_log_path, "wb") as f:
  475. f.write(b"Previous GC failed\n")
  476. # Make the file old
  477. old_time = time.time() - 86400 # 1 day ago
  478. os.utime(gc_log_path, (old_time, old_time))
  479. # Add objects to trigger GC
  480. blob = Blob()
  481. blob.data = b"test"
  482. r.object_store.add_object(blob)
  483. with patch("dulwich.gc.garbage_collect") as mock_gc:
  484. result = maybe_auto_gc(r, config)
  485. self.assertTrue(result)
  486. mock_gc.assert_called_once_with(r, auto=True)
  487. # gc.log should be removed after successful GC
  488. self.assertFalse(os.path.exists(gc_log_path))
  489. def test_maybe_auto_gc_handles_gc_failure(self):
  490. """Test that auto GC handles failures gracefully."""
  491. with tempfile.TemporaryDirectory() as tmpdir:
  492. r = Repo.init(tmpdir)
  493. config = ConfigDict()
  494. config.set(b"gc", b"auto", b"1") # Low threshold
  495. # Add objects to trigger GC
  496. blob = Blob()
  497. blob.data = b"test"
  498. r.object_store.add_object(blob)
  499. with patch(
  500. "dulwich.gc.garbage_collect", side_effect=OSError("GC failed")
  501. ) as mock_gc:
  502. result = maybe_auto_gc(r, config)
  503. self.assertFalse(result)
  504. mock_gc.assert_called_once_with(r, auto=True)
  505. # Check that error was written to gc.log
  506. gc_log_path = os.path.join(r.controldir(), "gc.log")
  507. self.assertTrue(os.path.exists(gc_log_path))
  508. with open(gc_log_path, "rb") as f:
  509. content = f.read()
  510. self.assertIn(b"Auto GC failed: GC failed", content)
  511. def test_gc_log_expiry_singular_day(self):
  512. """Test that gc.logExpiry supports singular '.day' format."""
  513. with tempfile.TemporaryDirectory() as tmpdir:
  514. r = Repo.init(tmpdir)
  515. config = ConfigDict()
  516. config.set(b"gc", b"auto", b"1") # Low threshold
  517. config.set(b"gc", b"logExpiry", b"1.day") # Singular form
  518. # Create gc.log file
  519. gc_log_path = os.path.join(r.controldir(), "gc.log")
  520. with open(gc_log_path, "wb") as f:
  521. f.write(b"Previous GC failed\n")
  522. # Make the file 2 days old (older than 1 day expiry)
  523. old_time = time.time() - (2 * 86400)
  524. os.utime(gc_log_path, (old_time, old_time))
  525. # Add objects to trigger GC
  526. blob = Blob()
  527. blob.data = b"test"
  528. r.object_store.add_object(blob)
  529. with patch("dulwich.gc.garbage_collect") as mock_gc:
  530. result = maybe_auto_gc(r, config)
  531. self.assertTrue(result)
  532. mock_gc.assert_called_once_with(r, auto=True)
  533. def test_gc_log_expiry_invalid_format(self):
  534. """Test that invalid gc.logExpiry format defaults to 1 day."""
  535. with tempfile.TemporaryDirectory() as tmpdir:
  536. r = Repo.init(tmpdir)
  537. config = ConfigDict()
  538. config.set(b"gc", b"auto", b"1") # Low threshold
  539. config.set(b"gc", b"logExpiry", b"invalid") # Invalid format
  540. # Create gc.log file
  541. gc_log_path = os.path.join(r.controldir(), "gc.log")
  542. with open(gc_log_path, "wb") as f:
  543. f.write(b"Previous GC failed\n")
  544. # Make the file recent (within default 1 day)
  545. recent_time = time.time() - 3600 # 1 hour ago
  546. os.utime(gc_log_path, (recent_time, recent_time))
  547. # Add objects to trigger GC
  548. blob = Blob()
  549. blob.data = b"test"
  550. r.object_store.add_object(blob)
  551. with patch("builtins.print") as mock_print:
  552. result = maybe_auto_gc(r, config)
  553. # Should not run GC because gc.log is recent (within default 1 day)
  554. self.assertFalse(result)
  555. mock_print.assert_called_once()
  556. def test_maybe_auto_gc_non_disk_repo(self):
  557. """Test auto GC on non-disk repository (MemoryRepo)."""
  558. r = MemoryRepo()
  559. config = ConfigDict()
  560. config.set(b"gc", b"auto", b"1") # Would trigger if it were disk-based
  561. # Add objects that would trigger GC in a disk repo
  562. for i in range(10):
  563. blob = Blob()
  564. blob.data = f"test {i}".encode()
  565. r.object_store.add_object(blob)
  566. # For non-disk repos, should_run_gc returns False
  567. # because it can't count loose objects
  568. result = maybe_auto_gc(r, config)
  569. self.assertFalse(result)
  570. def test_gc_removes_existing_gc_log_on_success(self):
  571. """Test that successful GC removes pre-existing gc.log file."""
  572. with tempfile.TemporaryDirectory() as tmpdir:
  573. r = Repo.init(tmpdir)
  574. config = ConfigDict()
  575. config.set(b"gc", b"auto", b"1") # Low threshold
  576. # Create gc.log file from previous failure
  577. gc_log_path = os.path.join(r.controldir(), "gc.log")
  578. with open(gc_log_path, "wb") as f:
  579. f.write(b"Previous GC failed\n")
  580. # Make it old enough to be expired
  581. old_time = time.time() - (2 * 86400) # 2 days ago
  582. os.utime(gc_log_path, (old_time, old_time))
  583. # Add objects to trigger GC
  584. blob = Blob()
  585. blob.data = b"test"
  586. r.object_store.add_object(blob)
  587. # Run auto GC
  588. result = maybe_auto_gc(r, config)
  589. self.assertTrue(result)
  590. # gc.log should be removed after successful GC
  591. self.assertFalse(os.path.exists(gc_log_path))