Explorar o código

Add porcelain.count_objects() function

Jelmer Vernooij hai 1 mes
pai
achega
6d36d48e42
Modificáronse 4 ficheiros con 177 adicións e 0 borrados
  1. 5 0
      NEWS
  2. 29 0
      dulwich/cli.py
  3. 85 0
      dulwich/porcelain.py
  4. 58 0
      tests/test_porcelain.py

+ 5 - 0
NEWS

@@ -13,6 +13,11 @@
    directory or creating commits, similar to ``git merge-tree``. It outputs
    directory or creating commits, similar to ``git merge-tree``. It outputs
    the merged tree SHA and lists any conflicted paths. (Jelmer Vernooij)
    the merged tree SHA and lists any conflicted paths. (Jelmer Vernooij)
 
 
+ * Add ``porcelain.count_objects()`` function to count unpacked objects and
+   their disk usage. Returns a tuple of (count, size) for simple usage or
+   a ``CountObjectsResult`` dataclass with detailed statistics when
+   ``verbose=True``. (Jelmer Vernooij)
+
  * Add support for pack index format version 3. This format supports variable
  * Add support for pack index format version 3. This format supports variable
    hash sizes to enable future SHA-256 support. The implementation includes
    hash sizes to enable future SHA-256 support. The implementation includes
    reading and writing v3 indexes with proper hash algorithm identification
    reading and writing v3 indexes with proper hash algorithm identification

+ 29 - 0
dulwich/cli.py

@@ -1140,6 +1140,34 @@ class cmd_gc(Command):
         return None
         return None
 
 
 
 
+class cmd_count_objects(Command):
+    def run(self, args) -> None:
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "-v",
+            "--verbose",
+            action="store_true",
+            help="Display verbose information.",
+        )
+        args = parser.parse_args(args)
+
+        if args.verbose:
+            stats = porcelain.count_objects(".", verbose=True)
+            # Display verbose output
+            print(f"count: {stats.count}")
+            print(f"size: {stats.size // 1024}")  # Size in KiB
+            assert stats.in_pack is not None
+            print(f"in-pack: {stats.in_pack}")
+            assert stats.packs is not None
+            print(f"packs: {stats.packs}")
+            assert stats.size_pack is not None
+            print(f"size-pack: {stats.size_pack // 1024}")  # Size in KiB
+        else:
+            # Simple output
+            stats = porcelain.count_objects(".", verbose=False)
+            print(f"{stats.count} objects, {stats.size // 1024} kilobytes")
+
+
 class cmd_help(Command):
 class cmd_help(Command):
     def run(self, args) -> None:
     def run(self, args) -> None:
         parser = argparse.ArgumentParser()
         parser = argparse.ArgumentParser()
@@ -1176,6 +1204,7 @@ commands = {
     "clone": cmd_clone,
     "clone": cmd_clone,
     "commit": cmd_commit,
     "commit": cmd_commit,
     "commit-tree": cmd_commit_tree,
     "commit-tree": cmd_commit_tree,
+    "count-objects": cmd_count_objects,
     "describe": cmd_describe,
     "describe": cmd_describe,
     "daemon": cmd_daemon,
     "daemon": cmd_daemon,
     "diff": cmd_diff,
     "diff": cmd_diff,

+ 85 - 0
dulwich/porcelain.py

@@ -79,6 +79,7 @@ import sys
 import time
 import time
 from collections import namedtuple
 from collections import namedtuple
 from contextlib import closing, contextmanager
 from contextlib import closing, contextmanager
+from dataclasses import dataclass
 from io import BytesIO, RawIOBase
 from io import BytesIO, RawIOBase
 from pathlib import Path
 from pathlib import Path
 from typing import Optional, Union
 from typing import Optional, Union
@@ -147,6 +148,25 @@ from .sparse_patterns import (
 GitStatus = namedtuple("GitStatus", "staged unstaged untracked")
 GitStatus = namedtuple("GitStatus", "staged unstaged untracked")
 
 
 
 
+@dataclass
+class CountObjectsResult:
+    """Result of counting objects in a repository.
+
+    Attributes:
+      count: Number of loose objects
+      size: Total size of loose objects in bytes
+      in_pack: Number of objects in pack files
+      packs: Number of pack files
+      size_pack: Total size of pack files in bytes
+    """
+
+    count: int
+    size: int
+    in_pack: Optional[int] = None
+    packs: Optional[int] = None
+    size_pack: Optional[int] = None
+
+
 class NoneStream(RawIOBase):
 class NoneStream(RawIOBase):
     """Fallback if stdout or stderr are unavailable, does nothing."""
     """Fallback if stdout or stderr are unavailable, does nothing."""
 
 
@@ -2923,3 +2943,68 @@ def gc(
             dry_run=dry_run,
             dry_run=dry_run,
             progress=progress,
             progress=progress,
         )
         )
+
+
+def count_objects(repo=".", verbose=False) -> CountObjectsResult:
+    """Count unpacked objects and their disk usage.
+
+    Args:
+      repo: Path to repository or repository object
+      verbose: Whether to return verbose information
+
+    Returns:
+      CountObjectsResult object with detailed statistics
+    """
+    with open_repo_closing(repo) as r:
+        object_store = r.object_store
+
+        # Count loose objects
+        loose_count = 0
+        loose_size = 0
+        for sha in object_store._iter_loose_objects():
+            loose_count += 1
+            path = object_store._get_shafile_path(sha)
+            try:
+                stat_info = os.stat(path)
+                # Git uses disk usage, not file size. st_blocks is always in
+                # 512-byte blocks per POSIX standard
+                if hasattr(stat_info, "st_blocks"):
+                    # Available on Linux and macOS
+                    loose_size += stat_info.st_blocks * 512  # type: ignore
+                else:
+                    # Fallback for Windows
+                    loose_size += stat_info.st_size
+            except FileNotFoundError:
+                # Object may have been removed between iteration and stat
+                pass
+
+        if not verbose:
+            return CountObjectsResult(count=loose_count, size=loose_size)
+
+        # Count pack information
+        pack_count = len(object_store.packs)
+        in_pack_count = 0
+        pack_size = 0
+
+        for pack in object_store.packs:
+            in_pack_count += len(pack)
+            # Get pack file size
+            pack_path = pack._data_path
+            try:
+                pack_size += os.path.getsize(pack_path)
+            except FileNotFoundError:
+                pass
+            # Get index file size
+            idx_path = pack._idx_path
+            try:
+                pack_size += os.path.getsize(idx_path)
+            except FileNotFoundError:
+                pass
+
+        return CountObjectsResult(
+            count=loose_count,
+            size=loose_size,
+            in_pack=in_pack_count,
+            packs=pack_count,
+            size_pack=pack_size,
+        )

+ 58 - 0
tests/test_porcelain.py

@@ -42,6 +42,7 @@ from dulwich.errors import CommitError
 from dulwich.objects import ZERO_SHA, Blob, Tag, Tree
 from dulwich.objects import ZERO_SHA, Blob, Tag, Tree
 from dulwich.porcelain import (
 from dulwich.porcelain import (
     CheckoutError,  # Hypothetical or real error class
     CheckoutError,  # Hypothetical or real error class
+    CountObjectsResult,
     add,
     add,
     commit,
     commit,
 )
 )
@@ -5016,3 +5017,60 @@ class UnpackObjectsTest(PorcelainTestCase):
         unpacked_b2 = target_repo.object_store[b2.id]
         unpacked_b2 = target_repo.object_store[b2.id]
         self.assertEqual(b1.data, unpacked_b1.data)
         self.assertEqual(b1.data, unpacked_b1.data)
         self.assertEqual(b2.data, unpacked_b2.data)
         self.assertEqual(b2.data, unpacked_b2.data)
+
+
+class CountObjectsTests(PorcelainTestCase):
+    def test_count_objects_empty_repo(self):
+        """Test counting objects in an empty repository."""
+        stats = porcelain.count_objects(self.repo)
+        self.assertEqual(0, stats.count)
+        self.assertEqual(0, stats.size)
+
+    def test_count_objects_verbose_empty_repo(self):
+        """Test verbose counting in an empty repository."""
+        stats = porcelain.count_objects(self.repo, verbose=True)
+        self.assertEqual(0, stats.count)
+        self.assertEqual(0, stats.size)
+        self.assertEqual(0, stats.in_pack)
+        self.assertEqual(0, stats.packs)
+        self.assertEqual(0, stats.size_pack)
+
+    def test_count_objects_with_loose_objects(self):
+        """Test counting loose objects."""
+        # Create some loose objects
+        blob1 = make_object(Blob, data=b"data1")
+        blob2 = make_object(Blob, data=b"data2")
+        self.repo.object_store.add_object(blob1)
+        self.repo.object_store.add_object(blob2)
+
+        stats = porcelain.count_objects(self.repo)
+        self.assertEqual(2, stats.count)
+        self.assertGreater(stats.size, 0)
+
+    def test_count_objects_verbose_with_objects(self):
+        """Test verbose counting with both loose and packed objects."""
+        # Add some loose objects
+        for i in range(3):
+            blob = make_object(Blob, data=f"data{i}".encode())
+            self.repo.object_store.add_object(blob)
+
+        # Create a simple commit to have some objects in a pack
+        tree = Tree()
+        c1 = make_commit(tree=tree.id, message=b"Test commit")
+        self.repo.object_store.add_objects([(tree, None), (c1, None)])
+        self.repo.refs[b"HEAD"] = c1.id
+
+        # Repack to create a pack file
+        porcelain.repack(self.repo)
+
+        stats = porcelain.count_objects(self.repo, verbose=True)
+
+        # After repacking, loose objects might be cleaned up
+        self.assertIsInstance(stats.count, int)
+        self.assertIsInstance(stats.size, int)
+        self.assertGreater(stats.in_pack, 0)  # Should have packed objects
+        self.assertGreater(stats.packs, 0)  # Should have at least one pack
+        self.assertGreater(stats.size_pack, 0)  # Pack should have size
+
+        # Verify it's the correct dataclass type
+        self.assertIsInstance(stats, CountObjectsResult)