浏览代码

Add porcelain.count_objects() function

Jelmer Vernooij 1 月之前
父节点
当前提交
6d36d48e42
共有 4 个文件被更改,包括 177 次插入0 次删除
  1. 5 0
      NEWS
  2. 29 0
      dulwich/cli.py
  3. 85 0
      dulwich/porcelain.py
  4. 58 0
      tests/test_porcelain.py

+ 5 - 0
NEWS

@@ -13,6 +13,11 @@
    directory or creating commits, similar to ``git merge-tree``. It outputs
    the merged tree SHA and lists any conflicted paths. (Jelmer Vernooij)
 
+ * Add ``porcelain.count_objects()`` function to count unpacked objects and
+   their disk usage. Returns a tuple of (count, size) for simple usage or
+   a ``CountObjectsResult`` dataclass with detailed statistics when
+   ``verbose=True``. (Jelmer Vernooij)
+
  * Add support for pack index format version 3. This format supports variable
    hash sizes to enable future SHA-256 support. The implementation includes
    reading and writing v3 indexes with proper hash algorithm identification

+ 29 - 0
dulwich/cli.py

@@ -1140,6 +1140,34 @@ class cmd_gc(Command):
         return None
 
 
+class cmd_count_objects(Command):
+    def run(self, args) -> None:
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "-v",
+            "--verbose",
+            action="store_true",
+            help="Display verbose information.",
+        )
+        args = parser.parse_args(args)
+
+        if args.verbose:
+            stats = porcelain.count_objects(".", verbose=True)
+            # Display verbose output
+            print(f"count: {stats.count}")
+            print(f"size: {stats.size // 1024}")  # Size in KiB
+            assert stats.in_pack is not None
+            print(f"in-pack: {stats.in_pack}")
+            assert stats.packs is not None
+            print(f"packs: {stats.packs}")
+            assert stats.size_pack is not None
+            print(f"size-pack: {stats.size_pack // 1024}")  # Size in KiB
+        else:
+            # Simple output
+            stats = porcelain.count_objects(".", verbose=False)
+            print(f"{stats.count} objects, {stats.size // 1024} kilobytes")
+
+
 class cmd_help(Command):
     def run(self, args) -> None:
         parser = argparse.ArgumentParser()
@@ -1176,6 +1204,7 @@ commands = {
     "clone": cmd_clone,
     "commit": cmd_commit,
     "commit-tree": cmd_commit_tree,
+    "count-objects": cmd_count_objects,
     "describe": cmd_describe,
     "daemon": cmd_daemon,
     "diff": cmd_diff,

+ 85 - 0
dulwich/porcelain.py

@@ -79,6 +79,7 @@ import sys
 import time
 from collections import namedtuple
 from contextlib import closing, contextmanager
+from dataclasses import dataclass
 from io import BytesIO, RawIOBase
 from pathlib import Path
 from typing import Optional, Union
@@ -147,6 +148,25 @@ from .sparse_patterns import (
 GitStatus = namedtuple("GitStatus", "staged unstaged untracked")
 
 
+@dataclass
+class CountObjectsResult:
+    """Result of counting objects in a repository.
+
+    Attributes:
+      count: Number of loose objects
+      size: Total size of loose objects in bytes
+      in_pack: Number of objects in pack files
+      packs: Number of pack files
+      size_pack: Total size of pack files in bytes
+    """
+
+    count: int
+    size: int
+    in_pack: Optional[int] = None
+    packs: Optional[int] = None
+    size_pack: Optional[int] = None
+
+
 class NoneStream(RawIOBase):
     """Fallback if stdout or stderr are unavailable, does nothing."""
 
@@ -2923,3 +2943,68 @@ def gc(
             dry_run=dry_run,
             progress=progress,
         )
+
+
+def count_objects(repo=".", verbose=False) -> CountObjectsResult:
+    """Count unpacked objects and their disk usage.
+
+    Args:
+      repo: Path to repository or repository object
+      verbose: Whether to return verbose information
+
+    Returns:
+      CountObjectsResult object with detailed statistics
+    """
+    with open_repo_closing(repo) as r:
+        object_store = r.object_store
+
+        # Count loose objects
+        loose_count = 0
+        loose_size = 0
+        for sha in object_store._iter_loose_objects():
+            loose_count += 1
+            path = object_store._get_shafile_path(sha)
+            try:
+                stat_info = os.stat(path)
+                # Git uses disk usage, not file size. st_blocks is always in
+                # 512-byte blocks per POSIX standard
+                if hasattr(stat_info, "st_blocks"):
+                    # Available on Linux and macOS
+                    loose_size += stat_info.st_blocks * 512  # type: ignore
+                else:
+                    # Fallback for Windows
+                    loose_size += stat_info.st_size
+            except FileNotFoundError:
+                # Object may have been removed between iteration and stat
+                pass
+
+        if not verbose:
+            return CountObjectsResult(count=loose_count, size=loose_size)
+
+        # Count pack information
+        pack_count = len(object_store.packs)
+        in_pack_count = 0
+        pack_size = 0
+
+        for pack in object_store.packs:
+            in_pack_count += len(pack)
+            # Get pack file size
+            pack_path = pack._data_path
+            try:
+                pack_size += os.path.getsize(pack_path)
+            except FileNotFoundError:
+                pass
+            # Get index file size
+            idx_path = pack._idx_path
+            try:
+                pack_size += os.path.getsize(idx_path)
+            except FileNotFoundError:
+                pass
+
+        return CountObjectsResult(
+            count=loose_count,
+            size=loose_size,
+            in_pack=in_pack_count,
+            packs=pack_count,
+            size_pack=pack_size,
+        )

+ 58 - 0
tests/test_porcelain.py

@@ -42,6 +42,7 @@ from dulwich.errors import CommitError
 from dulwich.objects import ZERO_SHA, Blob, Tag, Tree
 from dulwich.porcelain import (
     CheckoutError,  # Hypothetical or real error class
+    CountObjectsResult,
     add,
     commit,
 )
@@ -5016,3 +5017,60 @@ class UnpackObjectsTest(PorcelainTestCase):
         unpacked_b2 = target_repo.object_store[b2.id]
         self.assertEqual(b1.data, unpacked_b1.data)
         self.assertEqual(b2.data, unpacked_b2.data)
+
+
+class CountObjectsTests(PorcelainTestCase):
+    def test_count_objects_empty_repo(self):
+        """Test counting objects in an empty repository."""
+        stats = porcelain.count_objects(self.repo)
+        self.assertEqual(0, stats.count)
+        self.assertEqual(0, stats.size)
+
+    def test_count_objects_verbose_empty_repo(self):
+        """Test verbose counting in an empty repository."""
+        stats = porcelain.count_objects(self.repo, verbose=True)
+        self.assertEqual(0, stats.count)
+        self.assertEqual(0, stats.size)
+        self.assertEqual(0, stats.in_pack)
+        self.assertEqual(0, stats.packs)
+        self.assertEqual(0, stats.size_pack)
+
+    def test_count_objects_with_loose_objects(self):
+        """Test counting loose objects."""
+        # Create some loose objects
+        blob1 = make_object(Blob, data=b"data1")
+        blob2 = make_object(Blob, data=b"data2")
+        self.repo.object_store.add_object(blob1)
+        self.repo.object_store.add_object(blob2)
+
+        stats = porcelain.count_objects(self.repo)
+        self.assertEqual(2, stats.count)
+        self.assertGreater(stats.size, 0)
+
+    def test_count_objects_verbose_with_objects(self):
+        """Test verbose counting with both loose and packed objects."""
+        # Add some loose objects
+        for i in range(3):
+            blob = make_object(Blob, data=f"data{i}".encode())
+            self.repo.object_store.add_object(blob)
+
+        # Create a simple commit to have some objects in a pack
+        tree = Tree()
+        c1 = make_commit(tree=tree.id, message=b"Test commit")
+        self.repo.object_store.add_objects([(tree, None), (c1, None)])
+        self.repo.refs[b"HEAD"] = c1.id
+
+        # Repack to create a pack file
+        porcelain.repack(self.repo)
+
+        stats = porcelain.count_objects(self.repo, verbose=True)
+
+        # After repacking, loose objects might be cleaned up
+        self.assertIsInstance(stats.count, int)
+        self.assertIsInstance(stats.size, int)
+        self.assertGreater(stats.in_pack, 0)  # Should have packed objects
+        self.assertGreater(stats.packs, 0)  # Should have at least one pack
+        self.assertGreater(stats.size_pack, 0)  # Pack should have size
+
+        # Verify it's the correct dataclass type
+        self.assertIsInstance(stats, CountObjectsResult)