Переглянути джерело

Support garbage collection (#1593)

Fixes #92
Jelmer Vernooij 1 місяць тому
батько
коміт
669c881c5d
9 змінених файлів з 1019 додано та 23 видалено
  1. 2 0
      NEWS
  2. 162 0
      dulwich/cli.py
  3. 286 0
      dulwich/gc.py
  4. 69 15
      dulwich/object_store.py
  5. 38 8
      dulwich/porcelain.py
  6. 31 0
      dulwich/refs.py
  7. 128 0
      dulwich/tests/test_cli.py
  8. 261 0
      dulwich/tests/test_gc.py
  9. 42 0
      dulwich/tests/test_object_store.py

+ 2 - 0
NEWS

@@ -1,5 +1,7 @@
 0.22.9	UNRELEASED
 
+ * Add ``gc`` command to ``dulwich.porcelain.`` (Jelmer Vernooij, #92)
+
  * Add ``unpack-objects`` plumbing command to unpack objects from pack files
    into loose objects in the repository. This command extracts all objects
    from a pack file and writes them to the object store as individual files.

+ 162 - 0
dulwich/cli.py

@@ -58,6 +58,71 @@ def signal_quit(signal, frame) -> None:
     pdb.set_trace()
 
 
+def parse_relative_time(time_str):
+    """Parse a relative time string like '2 weeks ago' into seconds.
+
+    Args:
+        time_str: String like '2 weeks ago' or 'now'
+
+    Returns:
+        Number of seconds
+
+    Raises:
+        ValueError: If the time string cannot be parsed
+    """
+    if time_str == "now":
+        return 0
+
+    if not time_str.endswith(" ago"):
+        raise ValueError(f"Invalid relative time format: {time_str}")
+
+    parts = time_str[:-4].split()
+    if len(parts) != 2:
+        raise ValueError(f"Invalid relative time format: {time_str}")
+
+    try:
+        num = int(parts[0])
+        unit = parts[1]
+
+        multipliers = {
+            "second": 1,
+            "seconds": 1,
+            "minute": 60,
+            "minutes": 60,
+            "hour": 3600,
+            "hours": 3600,
+            "day": 86400,
+            "days": 86400,
+            "week": 604800,
+            "weeks": 604800,
+        }
+
+        if unit in multipliers:
+            return num * multipliers[unit]
+        else:
+            raise ValueError(f"Unknown time unit: {unit}")
+    except ValueError as e:
+        if "invalid literal" in str(e):
+            raise ValueError(f"Invalid number in relative time: {parts[0]}")
+        raise
+
+
+def format_bytes(bytes):
+    """Format bytes as human-readable string.
+
+    Args:
+        bytes: Number of bytes
+
+    Returns:
+        Human-readable string like "1.5 MB"
+    """
+    for unit in ["B", "KB", "MB", "GB"]:
+        if bytes < 1024.0:
+            return f"{bytes:.1f} {unit}"
+        bytes /= 1024.0
+    return f"{bytes:.1f} TB"
+
+
 class Command:
     """A Dulwich subcommand."""
 
@@ -979,6 +1044,102 @@ class cmd_merge_tree(Command):
             return 1
 
 
+class cmd_gc(Command):
+    def run(self, args) -> Optional[int]:
+        import datetime
+        import time
+
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--auto",
+            action="store_true",
+            help="Only run gc if needed",
+        )
+        parser.add_argument(
+            "--aggressive",
+            action="store_true",
+            help="Use more aggressive settings",
+        )
+        parser.add_argument(
+            "--no-prune",
+            action="store_true",
+            help="Do not prune unreachable objects",
+        )
+        parser.add_argument(
+            "--prune",
+            nargs="?",
+            const="now",
+            help="Prune unreachable objects older than date (default: 2 weeks ago)",
+        )
+        parser.add_argument(
+            "--dry-run",
+            "-n",
+            action="store_true",
+            help="Only report what would be done",
+        )
+        parser.add_argument(
+            "--quiet",
+            "-q",
+            action="store_true",
+            help="Only report errors",
+        )
+        args = parser.parse_args(args)
+
+        # Parse prune grace period
+        grace_period = None
+        if args.prune:
+            try:
+                grace_period = parse_relative_time(args.prune)
+            except ValueError:
+                # Try to parse as absolute date
+                try:
+                    date = datetime.datetime.strptime(args.prune, "%Y-%m-%d")
+                    grace_period = int(time.time() - date.timestamp())
+                except ValueError:
+                    print(f"Error: Invalid prune date: {args.prune}")
+                    return 1
+        elif not args.no_prune:
+            # Default to 2 weeks
+            grace_period = 1209600
+
+        # Progress callback
+        def progress(msg):
+            if not args.quiet:
+                print(msg)
+
+        try:
+            stats = porcelain.gc(
+                ".",
+                auto=args.auto,
+                aggressive=args.aggressive,
+                prune=not args.no_prune,
+                grace_period=grace_period,
+                dry_run=args.dry_run,
+                progress=progress if not args.quiet else None,
+            )
+
+            # Report results
+            if not args.quiet:
+                if args.dry_run:
+                    print("\nDry run results:")
+                else:
+                    print("\nGarbage collection complete:")
+
+                if stats.pruned_objects:
+                    print(f"  Pruned {len(stats.pruned_objects)} unreachable objects")
+                    print(f"  Freed {format_bytes(stats.bytes_freed)}")
+
+                if stats.packs_before != stats.packs_after:
+                    print(
+                        f"  Reduced pack files from {stats.packs_before} to {stats.packs_after}"
+                    )
+
+        except porcelain.Error as e:
+            print(f"Error: {e}")
+            return 1
+        return None
+
+
 class cmd_help(Command):
     def run(self, args) -> None:
         parser = argparse.ArgumentParser()
@@ -1025,6 +1186,7 @@ commands = {
     "fetch": cmd_fetch,
     "for-each-ref": cmd_for_each_ref,
     "fsck": cmd_fsck,
+    "gc": cmd_gc,
     "help": cmd_help,
     "init": cmd_init,
     "log": cmd_log,

+ 286 - 0
dulwich/gc.py

@@ -0,0 +1,286 @@
+"""Git garbage collection implementation."""
+
+import collections
+import time
+from dataclasses import dataclass, field
+from typing import Optional
+
+from dulwich.object_store import BaseObjectStore, PackBasedObjectStore
+from dulwich.objects import Commit, ObjectID, Tag, Tree
+from dulwich.refs import RefsContainer
+
+
+@dataclass
+class GCStats:
+    """Statistics from garbage collection."""
+
+    pruned_objects: set[bytes] = field(default_factory=set)
+    bytes_freed: int = 0
+    packs_before: int = 0
+    packs_after: int = 0
+    loose_objects_before: int = 0
+    loose_objects_after: int = 0
+
+
+def find_reachable_objects(
+    object_store: BaseObjectStore,
+    refs_container: RefsContainer,
+    include_reflogs: bool = True,
+    progress=None,
+) -> set[bytes]:
+    """Find all reachable objects in the repository.
+
+    Args:
+        object_store: Object store to search
+        refs_container: Reference container
+        include_reflogs: Whether to include reflog entries
+        progress: Optional progress callback
+
+    Returns:
+        Set of reachable object SHAs
+    """
+    reachable = set()
+    pending: collections.deque[ObjectID] = collections.deque()
+
+    # Start with all refs
+    for ref in refs_container.allkeys():
+        try:
+            sha = refs_container[ref]  # This follows symbolic refs
+            if sha and sha not in reachable:
+                pending.append(sha)
+                reachable.add(sha)
+        except KeyError:
+            # Broken ref
+            if progress:
+                progress(f"Warning: Broken ref {ref.decode('utf-8', 'replace')}")
+            continue
+
+    # TODO: Add reflog support when reflog functionality is available
+
+    # Walk all reachable objects
+    while pending:
+        sha = pending.popleft()
+
+        if progress:
+            progress(f"Checking object {sha.decode('ascii', 'replace')}")
+
+        try:
+            obj = object_store[sha]
+        except KeyError:
+            continue
+
+        # Add referenced objects
+        if isinstance(obj, Commit):
+            # Tree
+            if obj.tree not in reachable:
+                pending.append(obj.tree)
+                reachable.add(obj.tree)
+            # Parents
+            for parent in obj.parents:
+                if parent not in reachable:
+                    pending.append(parent)
+                    reachable.add(parent)
+        elif isinstance(obj, Tree):
+            # Tree entries
+            for entry in obj.items():
+                if entry.sha not in reachable:
+                    pending.append(entry.sha)
+                    reachable.add(entry.sha)
+        elif isinstance(obj, Tag):
+            # Tagged object
+            if obj.object[1] not in reachable:
+                pending.append(obj.object[1])
+                reachable.add(obj.object[1])
+
+    return reachable
+
+
+def find_unreachable_objects(
+    object_store: BaseObjectStore,
+    refs_container: RefsContainer,
+    include_reflogs: bool = True,
+    progress=None,
+) -> set[bytes]:
+    """Find all unreachable objects in the repository.
+
+    Args:
+        object_store: Object store to search
+        refs_container: Reference container
+        include_reflogs: Whether to include reflog entries
+        progress: Optional progress callback
+
+    Returns:
+        Set of unreachable object SHAs
+    """
+    reachable = find_reachable_objects(
+        object_store, refs_container, include_reflogs, progress
+    )
+
+    unreachable = set()
+    for sha in object_store:
+        if sha not in reachable:
+            unreachable.add(sha)
+
+    return unreachable
+
+
+def prune_unreachable_objects(
+    object_store: PackBasedObjectStore,
+    refs_container: RefsContainer,
+    grace_period: Optional[int] = None,
+    dry_run: bool = False,
+    progress=None,
+) -> tuple[set[bytes], int]:
+    """Remove unreachable objects from the repository.
+
+    Args:
+        object_store: Object store to prune
+        refs_container: Reference container
+        grace_period: Grace period in seconds (objects newer than this are kept)
+        dry_run: If True, only report what would be deleted
+        progress: Optional progress callback
+
+    Returns:
+        Tuple of (set of pruned object SHAs, total bytes freed)
+    """
+    unreachable = find_unreachable_objects(
+        object_store, refs_container, progress=progress
+    )
+
+    pruned = set()
+    bytes_freed = 0
+
+    for sha in unreachable:
+        try:
+            obj = object_store[sha]
+
+            # Check grace period
+            if grace_period is not None:
+                mtime = object_store.get_object_mtime(sha)
+                if mtime is not None:
+                    age = time.time() - mtime
+                    if age < grace_period:
+                        if progress:
+                            progress(
+                                f"Keeping {sha.decode('ascii', 'replace')} (age: {age:.0f}s < grace period: {grace_period}s)"
+                            )
+                        continue
+
+            if progress:
+                progress(f"Pruning {sha.decode('ascii', 'replace')}")
+
+            # Calculate size before attempting deletion
+            obj_size = len(obj.as_raw_string())
+
+            if not dry_run:
+                object_store.delete_loose_object(sha)
+
+            # Only count as pruned if we get here (deletion succeeded or dry run)
+            pruned.add(sha)
+            bytes_freed += obj_size
+
+        except KeyError:
+            # Object already gone
+            pass
+        except OSError as e:
+            # File system errors during deletion
+            if progress:
+                progress(f"Error pruning {sha.decode('ascii', 'replace')}: {e}")
+    return pruned, bytes_freed
+
+
+def garbage_collect(
+    repo,
+    auto: bool = False,
+    aggressive: bool = False,
+    prune: bool = True,
+    grace_period: Optional[int] = 1209600,  # 2 weeks default
+    dry_run: bool = False,
+    progress=None,
+) -> GCStats:
+    """Run garbage collection on a repository.
+
+    Args:
+        repo: Repository to garbage collect
+        auto: Whether this is an automatic gc
+        aggressive: Whether to use aggressive settings
+        prune: Whether to prune unreachable objects
+        grace_period: Grace period for pruning in seconds
+        dry_run: If True, only report what would be done
+        progress: Optional progress callback
+
+    Returns:
+        GCStats object with garbage collection statistics
+    """
+    stats = GCStats()
+
+    object_store = repo.object_store
+    refs_container = repo.refs
+
+    # Count initial state
+    stats.packs_before = len(list(object_store.packs))
+    # TODO: Count loose objects when we have a method for it
+
+    # Find unreachable objects to exclude from repacking
+    unreachable_to_prune = set()
+    if prune:
+        if progress:
+            progress("Finding unreachable objects")
+        unreachable = find_unreachable_objects(
+            object_store, refs_container, progress=progress
+        )
+
+        # Apply grace period check
+        for sha in unreachable:
+            try:
+                if grace_period is not None:
+                    mtime = object_store.get_object_mtime(sha)
+                    if mtime is not None:
+                        age = time.time() - mtime
+                        if age < grace_period:
+                            if progress:
+                                progress(
+                                    f"Keeping {sha.decode('ascii', 'replace')} (age: {age:.0f}s < grace period: {grace_period}s)"
+                                )
+                            continue
+
+                unreachable_to_prune.add(sha)
+                obj = object_store[sha]
+                stats.bytes_freed += len(obj.as_raw_string())
+            except KeyError:
+                pass
+
+        stats.pruned_objects = unreachable_to_prune
+
+    # Pack refs
+    if progress:
+        progress("Packing references")
+    if not dry_run:
+        repo.refs.pack_refs()
+
+    # Delete loose unreachable objects
+    if prune and not dry_run:
+        for sha in unreachable_to_prune:
+            if object_store.contains_loose(sha):
+                try:
+                    object_store.delete_loose_object(sha)
+                except OSError:
+                    pass
+
+    # Repack everything, excluding unreachable objects
+    # This handles both loose object packing and pack consolidation
+    if progress:
+        progress("Repacking repository")
+    if not dry_run:
+        if prune and unreachable_to_prune:
+            # Repack excluding unreachable objects
+            object_store.repack(exclude=unreachable_to_prune)
+        else:
+            # Normal repack
+            object_store.repack()
+
+    # Count final state
+    stats.packs_after = len(list(object_store.packs))
+    # TODO: Count loose objects when we have a method for it
+
+    return stats

+ 69 - 15
dulwich/object_store.py

@@ -387,6 +387,19 @@ class BaseObjectStore:
         """
         raise NotImplementedError(self.write_commit_graph)
 
+    def get_object_mtime(self, sha):
+        """Get the modification time of an object.
+
+        Args:
+          sha: SHA1 of the object
+
+        Returns:
+          Modification time as seconds since epoch, or None if not available
+        """
+        # Default implementation returns None
+        # Subclasses can override to provide actual mtime
+        return None
+
 
 class PackBasedObjectStore(BaseObjectStore):
     def __init__(self, pack_compression_level=-1) -> None:
@@ -518,8 +531,13 @@ class PackBasedObjectStore(BaseObjectStore):
     def _get_loose_object(self, sha) -> Optional[ShaFile]:
         raise NotImplementedError(self._get_loose_object)
 
-    def _remove_loose_object(self, sha) -> None:
-        raise NotImplementedError(self._remove_loose_object)
+    def delete_loose_object(self, sha) -> None:
+        """Delete a loose object.
+
+        This method only handles loose objects. For packed objects,
+        use repack(exclude=...) to exclude them during repacking.
+        """
+        raise NotImplementedError(self.delete_loose_object)
 
     def _remove_pack(self, name) -> None:
         raise NotImplementedError(self._remove_pack)
@@ -534,32 +552,50 @@ class PackBasedObjectStore(BaseObjectStore):
             objects.add((self._get_loose_object(sha), None))
         self.add_objects(list(objects))
         for obj, path in objects:
-            self._remove_loose_object(obj.id)
+            self.delete_loose_object(obj.id)
         return len(objects)
 
-    def repack(self):
+    def repack(self, exclude=None):
         """Repack the packs in this repository.
 
         Note that this implementation is fairly naive and currently keeps all
         objects in memory while it repacks.
+
+        Args:
+          exclude: Optional set of object SHAs to exclude from repacking
         """
+        if exclude is None:
+            exclude = set()
+
         loose_objects = set()
+        excluded_loose_objects = set()
         for sha in self._iter_loose_objects():
-            loose_objects.add(self._get_loose_object(sha))
+            if sha not in exclude:
+                loose_objects.add(self._get_loose_object(sha))
+            else:
+                excluded_loose_objects.add(sha)
+
         objects = {(obj, None) for obj in loose_objects}
         old_packs = {p.name(): p for p in self.packs}
         for name, pack in old_packs.items():
-            objects.update((obj, None) for obj in pack.iterobjects())
-
-        # The name of the consolidated pack might match the name of a
-        # pre-existing pack. Take care not to remove the newly created
-        # consolidated pack.
+            objects.update(
+                (obj, None) for obj in pack.iterobjects() if obj.id not in exclude
+            )
 
-        consolidated = self.add_objects(objects)
-        old_packs.pop(consolidated.name(), None)
+        # Only create a new pack if there are objects to pack
+        if objects:
+            # The name of the consolidated pack might match the name of a
+            # pre-existing pack. Take care not to remove the newly created
+            # consolidated pack.
+            consolidated = self.add_objects(objects)
+            old_packs.pop(consolidated.name(), None)
 
+        # Delete loose objects that were packed
         for obj in loose_objects:
-            self._remove_loose_object(obj.id)
+            self.delete_loose_object(obj.id)
+        # Delete excluded loose objects
+        for sha in excluded_loose_objects:
+            self.delete_loose_object(sha)
         for name, pack in old_packs.items():
             self._remove_pack(pack)
         self._update_pack_cache()
@@ -893,9 +929,27 @@ class DiskObjectStore(PackBasedObjectStore):
         except FileNotFoundError:
             return None
 
-    def _remove_loose_object(self, sha) -> None:
+    def delete_loose_object(self, sha) -> None:
         os.remove(self._get_shafile_path(sha))
 
+    def get_object_mtime(self, sha):
+        """Get the modification time of a loose object.
+
+        Args:
+          sha: SHA1 of the object
+
+        Returns:
+          Modification time as seconds since epoch, or None if not a loose object
+        """
+        if not self.contains_loose(sha):
+            return None
+
+        path = self._get_shafile_path(sha)
+        try:
+            return os.path.getmtime(path)
+        except (OSError, FileNotFoundError):
+            return None
+
     def _remove_pack(self, pack) -> None:
         try:
             del self._pack_cache[os.path.basename(pack._basename)]
@@ -1781,7 +1835,7 @@ class BucketBasedObjectStore(PackBasedObjectStore):
     def _get_loose_object(self, sha) -> None:
         return None
 
-    def _remove_loose_object(self, sha) -> None:
+    def delete_loose_object(self, sha) -> None:
         # Doesn't exist..
         pass
 

+ 38 - 8
dulwich/porcelain.py

@@ -402,14 +402,7 @@ def symbolic_ref(repo, ref_name, force=False) -> None:
 
 def pack_refs(repo, all=False) -> None:
     with open_repo_closing(repo) as repo_obj:
-        refs = repo_obj.refs
-        packed_refs = {
-            ref: refs[ref]
-            for ref in refs
-            if (all or ref.startswith(LOCAL_TAG_PREFIX)) and ref != b"HEAD"
-        }
-
-        refs.add_packed_refs(packed_refs)
+        repo_obj.refs.pack_refs(all=all)
 
 
 def commit(
@@ -2893,3 +2886,40 @@ def merge_tree(repo, base_tree, our_tree, their_tree):
         r.object_store.add_object(merged_tree)
 
         return merged_tree.id, conflicts
+
+
+def gc(
+    repo,
+    auto: bool = False,
+    aggressive: bool = False,
+    prune: bool = True,
+    grace_period: Optional[int] = 1209600,  # 2 weeks default
+    dry_run: bool = False,
+    progress=None,
+):
+    """Run garbage collection on a repository.
+
+    Args:
+      repo: Path to the repository or a Repo object
+      auto: If True, only run gc if needed
+      aggressive: If True, use more aggressive settings
+      prune: If True, prune unreachable objects
+      grace_period: Grace period in seconds for pruning (default 2 weeks)
+      dry_run: If True, only report what would be done
+      progress: Optional progress callback
+
+    Returns:
+      GCStats object with garbage collection statistics
+    """
+    from .gc import garbage_collect
+
+    with open_repo_closing(repo) as r:
+        return garbage_collect(
+            r,
+            auto=auto,
+            aggressive=aggressive,
+            prune=prune,
+            grace_period=grace_period,
+            dry_run=dry_run,
+            progress=progress,
+        )

+ 31 - 0
dulwich/refs.py

@@ -437,6 +437,14 @@ class RefsContainer:
                 ret[src] = dst
         return ret
 
+    def pack_refs(self, all: bool = False) -> None:
+        """Pack loose refs into packed-refs file.
+
+        Args:
+            all: If True, pack all refs. If False, only pack tags.
+        """
+        raise NotImplementedError(self.pack_refs)
+
 
 class DictRefsContainer(RefsContainer):
     """RefsContainer backed by a simple dict.
@@ -1054,6 +1062,29 @@ class DiskRefsContainer(RefsContainer):
 
         return True
 
+    def pack_refs(self, all: bool = False) -> None:
+        """Pack loose refs into packed-refs file.
+
+        Args:
+            all: If True, pack all refs. If False, only pack tags.
+        """
+        refs_to_pack: dict[Ref, Optional[ObjectID]] = {}
+        for ref in self.allkeys():
+            if ref == HEADREF:
+                # Never pack HEAD
+                continue
+            if all or ref.startswith(LOCAL_TAG_PREFIX):
+                try:
+                    sha = self[ref]
+                    if sha:
+                        refs_to_pack[ref] = sha
+                except KeyError:
+                    # Broken ref, skip it
+                    pass
+
+        if refs_to_pack:
+            self.add_packed_refs(refs_to_pack)
+
 
 def _split_ref_line(line):
     """Split a single ref line into a tuple of SHA1 and name."""

+ 128 - 0
dulwich/tests/test_cli.py

@@ -0,0 +1,128 @@
+"""Tests for dulwich.cli utilities."""
+
+from unittest import TestCase
+
+from dulwich.cli import format_bytes, parse_relative_time
+
+
+class FormatBytesTestCase(TestCase):
+    """Tests for format_bytes function."""
+
+    def test_bytes(self):
+        """Test formatting bytes."""
+        self.assertEqual("0.0 B", format_bytes(0))
+        self.assertEqual("1.0 B", format_bytes(1))
+        self.assertEqual("512.0 B", format_bytes(512))
+        self.assertEqual("1023.0 B", format_bytes(1023))
+
+    def test_kilobytes(self):
+        """Test formatting kilobytes."""
+        self.assertEqual("1.0 KB", format_bytes(1024))
+        self.assertEqual("1.5 KB", format_bytes(1536))
+        self.assertEqual("2.0 KB", format_bytes(2048))
+        self.assertEqual("1023.0 KB", format_bytes(1024 * 1023))
+
+    def test_megabytes(self):
+        """Test formatting megabytes."""
+        self.assertEqual("1.0 MB", format_bytes(1024 * 1024))
+        self.assertEqual("1.5 MB", format_bytes(1024 * 1024 * 1.5))
+        self.assertEqual("10.0 MB", format_bytes(1024 * 1024 * 10))
+        self.assertEqual("1023.0 MB", format_bytes(1024 * 1024 * 1023))
+
+    def test_gigabytes(self):
+        """Test formatting gigabytes."""
+        self.assertEqual("1.0 GB", format_bytes(1024 * 1024 * 1024))
+        self.assertEqual("2.5 GB", format_bytes(1024 * 1024 * 1024 * 2.5))
+        self.assertEqual("1023.0 GB", format_bytes(1024 * 1024 * 1024 * 1023))
+
+    def test_terabytes(self):
+        """Test formatting terabytes."""
+        self.assertEqual("1.0 TB", format_bytes(1024 * 1024 * 1024 * 1024))
+        self.assertEqual("5.0 TB", format_bytes(1024 * 1024 * 1024 * 1024 * 5))
+        self.assertEqual("1000.0 TB", format_bytes(1024 * 1024 * 1024 * 1024 * 1000))
+
+
+class ParseRelativeTimeTestCase(TestCase):
+    """Tests for parse_relative_time function."""
+
+    def test_now(self):
+        """Test parsing 'now'."""
+        self.assertEqual(0, parse_relative_time("now"))
+
+    def test_seconds(self):
+        """Test parsing seconds."""
+        self.assertEqual(1, parse_relative_time("1 second ago"))
+        self.assertEqual(5, parse_relative_time("5 seconds ago"))
+        self.assertEqual(30, parse_relative_time("30 seconds ago"))
+
+    def test_minutes(self):
+        """Test parsing minutes."""
+        self.assertEqual(60, parse_relative_time("1 minute ago"))
+        self.assertEqual(300, parse_relative_time("5 minutes ago"))
+        self.assertEqual(1800, parse_relative_time("30 minutes ago"))
+
+    def test_hours(self):
+        """Test parsing hours."""
+        self.assertEqual(3600, parse_relative_time("1 hour ago"))
+        self.assertEqual(7200, parse_relative_time("2 hours ago"))
+        self.assertEqual(86400, parse_relative_time("24 hours ago"))
+
+    def test_days(self):
+        """Test parsing days."""
+        self.assertEqual(86400, parse_relative_time("1 day ago"))
+        self.assertEqual(604800, parse_relative_time("7 days ago"))
+        self.assertEqual(2592000, parse_relative_time("30 days ago"))
+
+    def test_weeks(self):
+        """Test parsing weeks."""
+        self.assertEqual(604800, parse_relative_time("1 week ago"))
+        self.assertEqual(1209600, parse_relative_time("2 weeks ago"))
+        self.assertEqual(
+            36288000, parse_relative_time("60 weeks ago")
+        )  # 60 * 7 * 24 * 60 * 60
+
+    def test_invalid_format(self):
+        """Test invalid time formats."""
+        with self.assertRaises(ValueError) as cm:
+            parse_relative_time("invalid")
+        self.assertIn("Invalid relative time format", str(cm.exception))
+
+        with self.assertRaises(ValueError) as cm:
+            parse_relative_time("2 weeks")
+        self.assertIn("Invalid relative time format", str(cm.exception))
+
+        with self.assertRaises(ValueError) as cm:
+            parse_relative_time("ago")
+        self.assertIn("Invalid relative time format", str(cm.exception))
+
+        with self.assertRaises(ValueError) as cm:
+            parse_relative_time("two weeks ago")
+        self.assertIn("Invalid number in relative time", str(cm.exception))
+
+    def test_invalid_unit(self):
+        """Test invalid time units."""
+        with self.assertRaises(ValueError) as cm:
+            parse_relative_time("5 months ago")
+        self.assertIn("Unknown time unit: months", str(cm.exception))
+
+        with self.assertRaises(ValueError) as cm:
+            parse_relative_time("2 years ago")
+        self.assertIn("Unknown time unit: years", str(cm.exception))
+
+    def test_singular_plural(self):
+        """Test that both singular and plural forms work."""
+        self.assertEqual(
+            parse_relative_time("1 second ago"), parse_relative_time("1 seconds ago")
+        )
+        self.assertEqual(
+            parse_relative_time("1 minute ago"), parse_relative_time("1 minutes ago")
+        )
+        self.assertEqual(
+            parse_relative_time("1 hour ago"), parse_relative_time("1 hours ago")
+        )
+        self.assertEqual(
+            parse_relative_time("1 day ago"), parse_relative_time("1 days ago")
+        )
+        self.assertEqual(
+            parse_relative_time("1 week ago"), parse_relative_time("1 weeks ago")
+        )

+ 261 - 0
dulwich/tests/test_gc.py

@@ -0,0 +1,261 @@
+"""Tests for dulwich.gc."""
+
+import shutil
+import tempfile
+from unittest import TestCase
+
+from dulwich.gc import (
+    GCStats,
+    find_reachable_objects,
+    find_unreachable_objects,
+    garbage_collect,
+    prune_unreachable_objects,
+)
+from dulwich.objects import Blob, Commit, Tree
+from dulwich.repo import Repo
+
+
+class GCTestCase(TestCase):
+    """Tests for garbage collection functionality."""
+
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp()
+        self.repo = Repo.init(self.tmpdir)
+
+    def tearDown(self):
+        shutil.rmtree(self.tmpdir)
+
+    def test_find_reachable_objects_empty_repo(self):
+        """Test finding reachable objects in empty repository."""
+        reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
+        self.assertEqual(set(), reachable)
+
+    def test_find_reachable_objects_with_commit(self):
+        """Test finding reachable objects with a commit."""
+        # Create a blob
+        blob = Blob.from_string(b"test content")
+        self.repo.object_store.add_object(blob)
+
+        # Create a tree
+        tree = Tree()
+        tree.add(b"test.txt", 0o100644, blob.id)
+        self.repo.object_store.add_object(tree)
+
+        # Create a commit
+        commit = Commit()
+        commit.tree = tree.id
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+        self.repo.object_store.add_object(commit)
+
+        # Set HEAD to the commit
+        self.repo.refs[b"HEAD"] = commit.id
+
+        # Find reachable objects
+        reachable = find_reachable_objects(self.repo.object_store, self.repo.refs)
+
+        # All three objects should be reachable
+        self.assertEqual({blob.id, tree.id, commit.id}, reachable)
+
+    def test_find_unreachable_objects(self):
+        """Test finding unreachable objects."""
+        # Create a reachable blob
+        reachable_blob = Blob.from_string(b"reachable content")
+        self.repo.object_store.add_object(reachable_blob)
+
+        # Create a tree
+        tree = Tree()
+        tree.add(b"reachable.txt", 0o100644, reachable_blob.id)
+        self.repo.object_store.add_object(tree)
+
+        # Create a commit
+        commit = Commit()
+        commit.tree = tree.id
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+        self.repo.object_store.add_object(commit)
+
+        # Set HEAD to the commit
+        self.repo.refs[b"HEAD"] = commit.id
+
+        # Create an unreachable blob
+        unreachable_blob = Blob.from_string(b"unreachable content")
+        self.repo.object_store.add_object(unreachable_blob)
+
+        # Find unreachable objects
+        unreachable = find_unreachable_objects(self.repo.object_store, self.repo.refs)
+
+        # Only the unreachable blob should be found
+        self.assertEqual({unreachable_blob.id}, unreachable)
+
+    def test_prune_unreachable_objects(self):
+        """Test pruning unreachable objects."""
+        # Create an unreachable blob
+        unreachable_blob = Blob.from_string(b"unreachable content")
+        self.repo.object_store.add_object(unreachable_blob)
+
+        # Verify it exists
+        self.assertIn(unreachable_blob.id, self.repo.object_store)
+
+        # Prune unreachable objects
+        pruned, bytes_freed = prune_unreachable_objects(
+            self.repo.object_store, self.repo.refs, grace_period=0
+        )
+
+        # Verify the blob was pruned
+        self.assertEqual({unreachable_blob.id}, pruned)
+        self.assertGreater(bytes_freed, 0)
+
+        # Note: We can't test that the object is gone because delete()
+        # only supports loose objects and may not be fully implemented
+
+    def test_prune_unreachable_objects_dry_run(self):
+        """Test pruning unreachable objects with dry run."""
+        # Create an unreachable blob
+        unreachable_blob = Blob.from_string(b"unreachable content")
+        self.repo.object_store.add_object(unreachable_blob)
+
+        # Prune with dry run
+        pruned, bytes_freed = prune_unreachable_objects(
+            self.repo.object_store, self.repo.refs, grace_period=0, dry_run=True
+        )
+
+        # Verify the blob would be pruned but still exists
+        self.assertEqual({unreachable_blob.id}, pruned)
+        self.assertGreater(bytes_freed, 0)
+        self.assertIn(unreachable_blob.id, self.repo.object_store)
+
+    def test_garbage_collect(self):
+        """Test full garbage collection."""
+        # Create some reachable objects
+        blob = Blob.from_string(b"test content")
+        self.repo.object_store.add_object(blob)
+
+        tree = Tree()
+        tree.add(b"test.txt", 0o100644, blob.id)
+        self.repo.object_store.add_object(tree)
+
+        commit = Commit()
+        commit.tree = tree.id
+        commit.author = commit.committer = b"Test User <test@example.com>"
+        commit.commit_time = commit.author_time = 1234567890
+        commit.commit_timezone = commit.author_timezone = 0
+        commit.message = b"Test commit"
+        self.repo.object_store.add_object(commit)
+
+        self.repo.refs[b"HEAD"] = commit.id
+
+        # Create an unreachable blob
+        unreachable_blob = Blob.from_string(b"unreachable content")
+        self.repo.object_store.add_object(unreachable_blob)
+
+        # Run garbage collection
+        stats = garbage_collect(self.repo, prune=True, grace_period=0)
+
+        # Check results
+        self.assertIsInstance(stats, GCStats)
+        self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
+        self.assertGreater(stats.bytes_freed, 0)
+
+    def test_garbage_collect_no_prune(self):
+        """Test garbage collection without pruning."""
+        # Create an unreachable blob
+        unreachable_blob = Blob.from_string(b"unreachable content")
+        self.repo.object_store.add_object(unreachable_blob)
+
+        # Run garbage collection without pruning
+        stats = garbage_collect(self.repo, prune=False)
+
+        # Check that nothing was pruned
+        self.assertEqual(set(), stats.pruned_objects)
+        self.assertEqual(0, stats.bytes_freed)
+        self.assertIn(unreachable_blob.id, self.repo.object_store)
+
+    def test_garbage_collect_dry_run(self):
+        """Test garbage collection with dry run."""
+        # Create an unreachable blob
+        unreachable_blob = Blob.from_string(b"unreachable content")
+        self.repo.object_store.add_object(unreachable_blob)
+
+        # Run garbage collection with dry run
+        stats = garbage_collect(self.repo, prune=True, grace_period=0, dry_run=True)
+
+        # Check that object would be pruned but still exists
+        self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
+        self.assertGreater(stats.bytes_freed, 0)
+        self.assertIn(unreachable_blob.id, self.repo.object_store)
+
+    def test_grace_period(self):
+        """Test that grace period prevents pruning recent objects."""
+        # Create an unreachable blob
+        unreachable_blob = Blob.from_string(b"recent unreachable content")
+        self.repo.object_store.add_object(unreachable_blob)
+
+        # Ensure the object is loose
+        self.assertTrue(self.repo.object_store.contains_loose(unreachable_blob.id))
+
+        # Run garbage collection with a 1 hour grace period, but dry run to avoid packing
+        # The object was just created, so it should not be pruned
+        stats = garbage_collect(self.repo, prune=True, grace_period=3600, dry_run=True)
+
+        # Check that the object was NOT pruned
+        self.assertEqual(set(), stats.pruned_objects)
+        self.assertEqual(0, stats.bytes_freed)
+        self.assertIn(unreachable_blob.id, self.repo.object_store)
+
+        # Now test with zero grace period - it should be pruned
+        stats = garbage_collect(self.repo, prune=True, grace_period=0)
+
+        # Check that the object was pruned
+        self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
+        self.assertGreater(stats.bytes_freed, 0)
+
+    def test_grace_period_old_object(self):
+        """Test that old objects are pruned even with grace period."""
+        import os
+        import time
+
+        # Create an unreachable blob
+        old_blob = Blob.from_string(b"old unreachable content")
+        self.repo.object_store.add_object(old_blob)
+
+        # Ensure the object is loose
+        self.assertTrue(self.repo.object_store.contains_loose(old_blob.id))
+
+        # Manually set the mtime to 2 hours ago
+        path = self.repo.object_store._get_shafile_path(old_blob.id)
+        old_time = time.time() - 7200  # 2 hours ago
+        os.utime(path, (old_time, old_time))
+
+        # Run garbage collection with a 1 hour grace period
+        # The object is 2 hours old, so it should be pruned
+        stats = garbage_collect(self.repo, prune=True, grace_period=3600)
+
+        # Check that the object was pruned
+        self.assertEqual({old_blob.id}, stats.pruned_objects)
+        self.assertGreater(stats.bytes_freed, 0)
+
+    def test_packed_objects_pruned(self):
+        """Test that packed objects are pruned via repack with exclusion."""
+        # Create an unreachable blob
+        unreachable_blob = Blob.from_string(b"unreachable packed content")
+        self.repo.object_store.add_object(unreachable_blob)
+
+        # Pack the objects to ensure the blob is in a pack
+        self.repo.object_store.pack_loose_objects()
+
+        # Ensure the object is NOT loose anymore
+        self.assertFalse(self.repo.object_store.contains_loose(unreachable_blob.id))
+        self.assertIn(unreachable_blob.id, self.repo.object_store)
+
+        # Run garbage collection
+        stats = garbage_collect(self.repo, prune=True, grace_period=0)
+
+        # Check that the packed object was pruned
+        self.assertEqual({unreachable_blob.id}, stats.pruned_objects)
+        self.assertGreater(stats.bytes_freed, 0)
+        self.assertNotIn(unreachable_blob.id, self.repo.object_store)

+ 42 - 0
dulwich/tests/test_object_store.py

@@ -55,6 +55,8 @@ class ObjectStoreTests:
     assertNotIn: Callable[[object, object], None]
     assertNotEqual: Callable[[object, object], None]
     assertIn: Callable[[object, object], None]
+    assertTrue: Callable[[bool], None]
+    assertFalse: Callable[[bool], None]
 
     def test_determine_wants_all(self) -> None:
         self.assertEqual(
@@ -353,3 +355,43 @@ class PackBasedObjectStoreTests(ObjectStoreTests):
         self.assertEqual(2, self.store.repack())
         self.assertEqual(1, len(self.store.packs))
         self.assertEqual(0, self.store.pack_loose_objects())
+
+    def test_repack_with_exclude(self) -> None:
+        """Test repacking while excluding specific objects."""
+        b1 = make_object(Blob, data=b"yummy data")
+        self.store.add_object(b1)
+        b2 = make_object(Blob, data=b"more yummy data")
+        self.store.add_object(b2)
+        b3 = make_object(Blob, data=b"even more yummy data")
+        b4 = make_object(Blob, data=b"and more yummy data")
+        self.store.add_objects([(b3, None), (b4, None)])
+
+        self.assertEqual({b1.id, b2.id, b3.id, b4.id}, set(self.store))
+        self.assertEqual(1, len(self.store.packs))
+
+        # Repack, excluding b2 and b3
+        excluded = {b2.id, b3.id}
+        self.assertEqual(2, self.store.repack(exclude=excluded))
+
+        # Should have repacked only b1 and b4
+        self.assertEqual(1, len(self.store.packs))
+        self.assertIn(b1.id, self.store)
+        self.assertNotIn(b2.id, self.store)
+        self.assertNotIn(b3.id, self.store)
+        self.assertIn(b4.id, self.store)
+
+    def test_delete_loose_object(self) -> None:
+        """Test deleting loose objects."""
+        b1 = make_object(Blob, data=b"test data")
+        self.store.add_object(b1)
+
+        # Verify it's loose
+        self.assertTrue(self.store.contains_loose(b1.id))
+        self.assertIn(b1.id, self.store)
+
+        # Delete it
+        self.store.delete_loose_object(b1.id)
+
+        # Verify it's gone
+        self.assertFalse(self.store.contains_loose(b1.id))
+        self.assertNotIn(b1.id, self.store)