瀏覽代碼

Support pack config options (#1717)

Jelmer Vernooij 1 月之前
父節點
當前提交
0b6dd8e66a
共有 2 個文件被更改,包括 144 次插入13 次删除
  1. 95 4
      dulwich/object_store.py
  2. 49 9
      dulwich/pack.py

+ 95 - 4
dulwich/object_store.py

@@ -508,10 +508,26 @@ class BaseObjectStore:
 
 
 
 
 class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer):
 class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer):
-    def __init__(self, pack_compression_level=-1, pack_index_version=None) -> None:
+    def __init__(
+        self,
+        pack_compression_level=-1,
+        pack_index_version=None,
+        pack_delta_window_size=None,
+        pack_window_memory=None,
+        pack_delta_cache_size=None,
+        pack_depth=None,
+        pack_threads=None,
+        pack_big_file_threshold=None,
+    ) -> None:
         self._pack_cache: dict[str, Pack] = {}
         self._pack_cache: dict[str, Pack] = {}
         self.pack_compression_level = pack_compression_level
         self.pack_compression_level = pack_compression_level
         self.pack_index_version = pack_index_version
         self.pack_index_version = pack_index_version
+        self.pack_delta_window_size = pack_delta_window_size
+        self.pack_window_memory = pack_window_memory
+        self.pack_delta_cache_size = pack_delta_cache_size
+        self.pack_depth = pack_depth
+        self.pack_threads = pack_threads
+        self.pack_big_file_threshold = pack_big_file_threshold
 
 
     def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
     def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
         """Add a new pack to this object store."""
         """Add a new pack to this object store."""
@@ -915,6 +931,12 @@ class DiskObjectStore(PackBasedObjectStore):
         loose_compression_level=-1,
         loose_compression_level=-1,
         pack_compression_level=-1,
         pack_compression_level=-1,
         pack_index_version=None,
         pack_index_version=None,
+        pack_delta_window_size=None,
+        pack_window_memory=None,
+        pack_delta_cache_size=None,
+        pack_depth=None,
+        pack_threads=None,
+        pack_big_file_threshold=None,
     ) -> None:
     ) -> None:
         """Open an object store.
         """Open an object store.
 
 
@@ -923,10 +945,22 @@ class DiskObjectStore(PackBasedObjectStore):
           loose_compression_level: zlib compression level for loose objects
           loose_compression_level: zlib compression level for loose objects
           pack_compression_level: zlib compression level for pack objects
           pack_compression_level: zlib compression level for pack objects
           pack_index_version: pack index version to use (1, 2, or 3)
           pack_index_version: pack index version to use (1, 2, or 3)
+          pack_delta_window_size: sliding window size for delta compression
+          pack_window_memory: memory limit for delta window operations
+          pack_delta_cache_size: size of cache for delta operations
+          pack_depth: maximum delta chain depth
+          pack_threads: number of threads for pack operations
+          pack_big_file_threshold: threshold for treating files as big
         """
         """
         super().__init__(
         super().__init__(
             pack_compression_level=pack_compression_level,
             pack_compression_level=pack_compression_level,
             pack_index_version=pack_index_version,
             pack_index_version=pack_index_version,
+            pack_delta_window_size=pack_delta_window_size,
+            pack_window_memory=pack_window_memory,
+            pack_delta_cache_size=pack_delta_cache_size,
+            pack_depth=pack_depth,
+            pack_threads=pack_threads,
+            pack_big_file_threshold=pack_big_file_threshold,
         )
         )
         self.path = path
         self.path = path
         self.pack_dir = os.path.join(self.path, PACKDIR)
         self.pack_dir = os.path.join(self.path, PACKDIR)
@@ -967,11 +1001,52 @@ class DiskObjectStore(PackBasedObjectStore):
         except KeyError:
         except KeyError:
             pack_index_version = None
             pack_index_version = None
 
 
+        # Read pack configuration options
+        try:
+            pack_delta_window_size = int(
+                config.get((b"pack",), b"deltaWindowSize").decode()
+            )
+        except KeyError:
+            pack_delta_window_size = None
+        try:
+            pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())
+        except KeyError:
+            pack_window_memory = None
+        try:
+            pack_delta_cache_size = int(
+                config.get((b"pack",), b"deltaCacheSize").decode()
+            )
+        except KeyError:
+            pack_delta_cache_size = None
+        try:
+            pack_depth = int(config.get((b"pack",), b"depth").decode())
+        except KeyError:
+            pack_depth = None
+        try:
+            pack_threads = int(config.get((b"pack",), b"threads").decode())
+        except KeyError:
+            pack_threads = None
+        try:
+            pack_big_file_threshold = int(
+                config.get((b"pack",), b"bigFileThreshold").decode()
+            )
+        except KeyError:
+            pack_big_file_threshold = None
+
         # Read core.commitGraph setting
         # Read core.commitGraph setting
         use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
         use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
 
 
         instance = cls(
         instance = cls(
-            path, loose_compression_level, pack_compression_level, pack_index_version
+            path,
+            loose_compression_level,
+            pack_compression_level,
+            pack_index_version,
+            pack_delta_window_size,
+            pack_window_memory,
+            pack_delta_cache_size,
+            pack_depth,
+            pack_threads,
+            pack_big_file_threshold,
         )
         )
         instance._use_commit_graph = use_commit_graph
         instance._use_commit_graph = use_commit_graph
         return instance
         return instance
@@ -1042,7 +1117,15 @@ class DiskObjectStore(PackBasedObjectStore):
         new_packs = []
         new_packs = []
         for f in pack_files:
         for f in pack_files:
             if f not in self._pack_cache:
             if f not in self._pack_cache:
-                pack = Pack(os.path.join(self.pack_dir, f))
+                pack = Pack(
+                    os.path.join(self.pack_dir, f),
+                    delta_window_size=self.pack_delta_window_size,
+                    window_memory=self.pack_window_memory,
+                    delta_cache_size=self.pack_delta_cache_size,
+                    depth=self.pack_depth,
+                    threads=self.pack_threads,
+                    big_file_threshold=self.pack_big_file_threshold,
+                )
                 new_packs.append(pack)
                 new_packs.append(pack)
                 self._pack_cache[f] = pack
                 self._pack_cache[f] = pack
         # Remove disappeared pack files
         # Remove disappeared pack files
@@ -1209,7 +1292,15 @@ class DiskObjectStore(PackBasedObjectStore):
             )
             )
 
 
         # Add the pack to the store and return it.
         # Add the pack to the store and return it.
-        final_pack = Pack(pack_base_name)
+        final_pack = Pack(
+            pack_base_name,
+            delta_window_size=self.pack_delta_window_size,
+            window_memory=self.pack_window_memory,
+            delta_cache_size=self.pack_delta_cache_size,
+            depth=self.pack_depth,
+            threads=self.pack_threads,
+            big_file_threshold=self.pack_big_file_threshold,
+        )
         final_pack.check_length_and_checksum()
         final_pack.check_length_and_checksum()
         self._add_cached_pack(pack_base_name, final_pack)
         self._add_cached_pack(pack_base_name, final_pack)
         return final_pack
         return final_pack

+ 49 - 9
dulwich/pack.py

@@ -1274,7 +1274,19 @@ class PackData:
     position.  It will all just throw a zlib or KeyError.
     position.  It will all just throw a zlib or KeyError.
     """
     """
 
 
-    def __init__(self, filename: Union[str, os.PathLike], file=None, size=None) -> None:
+    def __init__(
+        self,
+        filename: Union[str, os.PathLike],
+        file=None,
+        size=None,
+        *,
+        delta_window_size=None,
+        window_memory=None,
+        delta_cache_size=None,
+        depth=None,
+        threads=None,
+        big_file_threshold=None,
+    ) -> None:
         """Create a PackData object representing the pack in the given filename.
         """Create a PackData object representing the pack in the given filename.
 
 
         The file must exist and stay readable until the object is disposed of.
         The file must exist and stay readable until the object is disposed of.
@@ -1286,13 +1298,23 @@ class PackData:
         self._filename = filename
         self._filename = filename
         self._size = size
         self._size = size
         self._header_size = 12
         self._header_size = 12
+        self.delta_window_size = delta_window_size
+        self.window_memory = window_memory
+        self.delta_cache_size = delta_cache_size
+        self.depth = depth
+        self.threads = threads
+        self.big_file_threshold = big_file_threshold
+
         if file is None:
         if file is None:
             self._file = GitFile(self._filename, "rb")
             self._file = GitFile(self._filename, "rb")
         else:
         else:
             self._file = file
             self._file = file
         (version, self._num_objects) = read_pack_header(self._file.read)
         (version, self._num_objects) = read_pack_header(self._file.read)
+
+        # Use delta_cache_size config if available, otherwise default
+        cache_size = delta_cache_size or (1024 * 1024 * 20)
         self._offset_cache = LRUSizeCache[int, tuple[int, OldUnpackedObject]](
         self._offset_cache = LRUSizeCache[int, tuple[int, OldUnpackedObject]](
-            1024 * 1024 * 20, compute_size=_compute_object_size
+            cache_size, compute_size=_compute_object_size
         )
         )
 
 
     @property
     @property
@@ -2724,14 +2746,37 @@ class Pack:
     _idx: Optional[PackIndex]
     _idx: Optional[PackIndex]
 
 
     def __init__(
     def __init__(
-        self, basename, resolve_ext_ref: Optional[ResolveExtRefFn] = None
+        self,
+        basename,
+        resolve_ext_ref: Optional[ResolveExtRefFn] = None,
+        *,
+        delta_window_size=None,
+        window_memory=None,
+        delta_cache_size=None,
+        depth=None,
+        threads=None,
+        big_file_threshold=None,
     ) -> None:
     ) -> None:
         self._basename = basename
         self._basename = basename
         self._data = None
         self._data = None
         self._idx = None
         self._idx = None
         self._idx_path = self._basename + ".idx"
         self._idx_path = self._basename + ".idx"
         self._data_path = self._basename + ".pack"
         self._data_path = self._basename + ".pack"
-        self._data_load = lambda: PackData(self._data_path)
+        self.delta_window_size = delta_window_size
+        self.window_memory = window_memory
+        self.delta_cache_size = delta_cache_size
+        self.depth = depth
+        self.threads = threads
+        self.big_file_threshold = big_file_threshold
+        self._data_load = lambda: PackData(
+            self._data_path,
+            delta_window_size=delta_window_size,
+            window_memory=window_memory,
+            delta_cache_size=delta_cache_size,
+            depth=depth,
+            threads=threads,
+            big_file_threshold=big_file_threshold,
+        )
         self._idx_load = lambda: load_pack_index(self._idx_path)
         self._idx_load = lambda: load_pack_index(self._idx_path)
         self.resolve_ext_ref = resolve_ext_ref
         self.resolve_ext_ref = resolve_ext_ref
 
 
@@ -2989,11 +3034,6 @@ class Pack:
         chunks = base_obj
         chunks = base_obj
         for prev_offset, _delta_type, delta in reversed(delta_stack):
         for prev_offset, _delta_type, delta in reversed(delta_stack):
             chunks = apply_delta(chunks, delta)
             chunks = apply_delta(chunks, delta)
-            # TODO(dborowitz): This can result in poor performance if
-            # large base objects are separated from deltas in the pack.
-            # We should reorganize so that we apply deltas to all
-            # objects in a chain one after the other to optimize cache
-            # performance.
             if prev_offset is not None:
             if prev_offset is not None:
                 self.data._offset_cache[prev_offset] = base_type, chunks
                 self.data._offset_cache[prev_offset] = base_type, chunks
         return base_type, chunks
         return base_type, chunks