Browse Source

Add prune command to dulwich.porcelain

This adds a high-level porcelain wrapper for the prune functionality,
making it easy to clean up orphaned temporary pack files from Python
code or scripts.
Jelmer Vernooij 1 month ago
parent
commit
726502052f
3 changed files with 105 additions and 1 deletions
  1. 2 1
      NEWS
  2. 26 0
      dulwich/porcelain.py
  3. 77 0
      tests/test_porcelain.py

+ 2 - 1
NEWS

@@ -46,7 +46,8 @@
 
  * Add ``prune`` method to object stores for cleaning up orphaned temporary
    pack files. This is now called by ``garbage_collect()`` to match Git's
-   behavior. (#558, Jelmer Vernooij)
+   behavior. Also added ``prune`` command to ``dulwich.porcelain``.
+   (Jelmer Vernooij, #558)
 
 0.23.0	2025-06-21
 

+ 26 - 0
dulwich/porcelain.py

@@ -43,6 +43,7 @@ Currently implemented:
  * ls_tree
  * merge
  * merge_tree
+ * prune
  * pull
  * push
  * rm
@@ -3436,6 +3437,31 @@ def gc(
         )
 
 
+def prune(
+    repo,
+    grace_period: Optional[int] = None,
+    dry_run: bool = False,
+    progress=None,
+):
+    """Prune/clean up a repository's object store.
+    
+    This removes temporary files that were left behind by interrupted
+    pack operations.
+    
+    Args:
+      repo: Path to the repository or a Repo object
+      grace_period: Grace period in seconds for removing temporary files
+                    (default 2 weeks)
+      dry_run: If True, only report what would be done
+      progress: Optional progress callback
+    """
+    with open_repo_closing(repo) as r:
+        if progress:
+            progress("Pruning temporary files")
+        if not dry_run:
+            r.object_store.prune(grace_period=grace_period)
+
+
 def count_objects(repo=".", verbose=False) -> CountObjectsResult:
     """Count unpacked objects and their disk usage.
 

+ 77 - 0
tests/test_porcelain.py

@@ -39,6 +39,7 @@ from unittest import skipIf
 from dulwich import porcelain
 from dulwich.diff_tree import tree_changes
 from dulwich.errors import CommitError
+from dulwich.object_store import DEFAULT_TEMPFILE_GRACE_PERIOD
 from dulwich.objects import ZERO_SHA, Blob, Tag, Tree
 from dulwich.porcelain import (
     CheckoutError,  # Hypothetical or real error class
@@ -5502,3 +5503,79 @@ class CountObjectsTests(PorcelainTestCase):
 
         # Verify it's the correct dataclass type
         self.assertIsInstance(stats, CountObjectsResult)
+
+
+class PruneTests(PorcelainTestCase):
+    def test_prune_removes_old_tempfiles(self):
+        """Test that prune removes old temporary files."""
+        # Create an old temporary file in the objects directory
+        objects_dir = os.path.join(self.repo.path, ".git", "objects")
+        tmp_pack_path = os.path.join(objects_dir, "tmp_pack_test")
+        with open(tmp_pack_path, "wb") as f:
+            f.write(b"old temporary data")
+        
+        # Make it old
+        old_time = time.time() - (DEFAULT_TEMPFILE_GRACE_PERIOD + 3600)
+        os.utime(tmp_pack_path, (old_time, old_time))
+        
+        # Run prune
+        porcelain.prune(self.repo.path)
+        
+        # Verify the file was removed
+        self.assertFalse(os.path.exists(tmp_pack_path))
+    
+    def test_prune_keeps_recent_tempfiles(self):
+        """Test that prune keeps recent temporary files."""
+        # Create a recent temporary file
+        objects_dir = os.path.join(self.repo.path, ".git", "objects")
+        tmp_pack_path = os.path.join(objects_dir, "tmp_pack_recent")
+        with open(tmp_pack_path, "wb") as f:
+            f.write(b"recent temporary data")
+        
+        # Run prune
+        porcelain.prune(self.repo.path)
+        
+        # Verify the file was NOT removed
+        self.assertTrue(os.path.exists(tmp_pack_path))
+        
+        # Clean up
+        os.remove(tmp_pack_path)
+    
+    def test_prune_with_custom_grace_period(self):
+        """Test prune with custom grace period."""
+        # Create a 1-hour-old temporary file
+        objects_dir = os.path.join(self.repo.path, ".git", "objects")
+        tmp_pack_path = os.path.join(objects_dir, "tmp_pack_1hour")
+        with open(tmp_pack_path, "wb") as f:
+            f.write(b"1 hour old data")
+        
+        # Make it 1 hour old
+        old_time = time.time() - 3600
+        os.utime(tmp_pack_path, (old_time, old_time))
+        
+        # Prune with 30-minute grace period should remove it
+        porcelain.prune(self.repo.path, grace_period=1800)
+        
+        # Verify the file was removed
+        self.assertFalse(os.path.exists(tmp_pack_path))
+    
+    def test_prune_dry_run(self):
+        """Test prune in dry-run mode."""
+        # Create an old temporary file
+        objects_dir = os.path.join(self.repo.path, ".git", "objects")
+        tmp_pack_path = os.path.join(objects_dir, "tmp_pack_dryrun")
+        with open(tmp_pack_path, "wb") as f:
+            f.write(b"old temporary data")
+        
+        # Make it old
+        old_time = time.time() - (DEFAULT_TEMPFILE_GRACE_PERIOD + 3600)
+        os.utime(tmp_pack_path, (old_time, old_time))
+        
+        # Run prune in dry-run mode
+        porcelain.prune(self.repo.path, dry_run=True)
+        
+        # Verify the file was NOT removed (dry run)
+        self.assertTrue(os.path.exists(tmp_pack_path))
+        
+        # Clean up
+        os.remove(tmp_pack_path)