소스 검색

Add support for core.fsyncObjectFiles configuration option

Fixes #1817
Jelmer Vernooij 2 달 전
부모
커밋
a78a416ae6
4개의 변경된 파일119개의 추가작업 그리고 10개의 파일을 삭제
  1. 3 0
      NEWS
  2. 16 2
      dulwich/file.py
  3. 18 8
      dulwich/object_store.py
  4. 82 0
      tests/test_object_store.py

+ 3 - 0
NEWS

@@ -5,6 +5,9 @@
    collapse operations, and the 'sdir' extension.
    (Jelmer Vernooij, #1797)
 
+ * Add support for core.fsyncObjectFiles configuration option.
+   (Jelmer Vernooij, #1817)
+
  * Work around typing module bug in Python 3.9.0 and 3.9.1 by using string
    annotation for Callable type in reflog.py. (Jelmer Vernooij, #1948)
 

+ 16 - 2
dulwich/file.py

@@ -72,6 +72,7 @@ def GitFile(
     mode: Literal["wb"],
     bufsize: int = -1,
     mask: int = 0o644,
+    fsync: bool = True,
 ) -> "_GitFile": ...
 
 
@@ -81,6 +82,7 @@ def GitFile(
     mode: Literal["rb"] = "rb",
     bufsize: int = -1,
     mask: int = 0o644,
+    fsync: bool = True,
 ) -> IO[bytes]: ...
 
 
@@ -90,6 +92,7 @@ def GitFile(
     mode: str = "rb",
     bufsize: int = -1,
     mask: int = 0o644,
+    fsync: bool = True,
 ) -> Union[IO[bytes], "_GitFile"]: ...
 
 
@@ -98,6 +101,7 @@ def GitFile(
     mode: str = "rb",
     bufsize: int = -1,
     mask: int = 0o644,
+    fsync: bool = True,
 ) -> Union[IO[bytes], "_GitFile"]:
     """Create a file object that obeys the git file locking protocol.
 
@@ -113,6 +117,13 @@ def GitFile(
     The default file mask makes any created files user-writable and
     world-readable.
 
+    Args:
+      filename: Path to the file
+      mode: File mode (only 'rb' and 'wb' are supported)
+      bufsize: Buffer size for file operations
+      mask: File mask for created files
+      fsync: Whether to call fsync() before closing (default: True)
+
     """
     if "a" in mode:
         raise OSError("append mode not supported for Git files")
@@ -121,7 +132,7 @@ def GitFile(
     if "b" not in mode:
         raise OSError("text mode not supported for Git files")
     if "w" in mode:
-        return _GitFile(filename, mode, bufsize, mask)
+        return _GitFile(filename, mode, bufsize, mask, fsync)
     else:
         return open(filename, mode, bufsize)
 
@@ -194,9 +205,11 @@ class _GitFile(IO[bytes]):
         mode: str,
         bufsize: int,
         mask: int,
+        fsync: bool = True,
     ) -> None:
         # Convert PathLike to str/bytes for our internal use
         self._filename: Union[str, bytes] = os.fspath(filename)
+        self._fsync = fsync
         if isinstance(self._filename, bytes):
             self._lockfilename: Union[str, bytes] = self._filename + b".lock"
         else:
@@ -247,7 +260,8 @@ class _GitFile(IO[bytes]):
         if self._closed:
             return
         self._file.flush()
-        os.fsync(self._file.fileno())
+        if self._fsync:
+            os.fsync(self._file.fileno())
         self._file.close()
         try:
             if getattr(os, "replace", None) is not None:

+ 18 - 8
dulwich/object_store.py

@@ -1202,6 +1202,7 @@ class DiskObjectStore(PackBasedObjectStore):
         pack_depth: Optional[int] = None,
         pack_threads: Optional[int] = None,
         pack_big_file_threshold: Optional[int] = None,
+        fsync_object_files: bool = False,
     ) -> None:
         """Open an object store.
 
@@ -1216,6 +1217,7 @@ class DiskObjectStore(PackBasedObjectStore):
           pack_depth: maximum delta chain depth
           pack_threads: number of threads for pack operations
           pack_big_file_threshold: threshold for treating files as big
+          fsync_object_files: whether to fsync object files for durability
         """
         super().__init__(
             pack_compression_level=pack_compression_level,
@@ -1233,6 +1235,7 @@ class DiskObjectStore(PackBasedObjectStore):
         self.loose_compression_level = loose_compression_level
         self.pack_compression_level = pack_compression_level
         self.pack_index_version = pack_index_version
+        self.fsync_object_files = fsync_object_files
 
         # Commit graph support - lazy loaded
         self._commit_graph = None
@@ -1317,6 +1320,9 @@ class DiskObjectStore(PackBasedObjectStore):
         # Read core.commitGraph setting
         use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
 
+        # Read core.fsyncObjectFiles setting
+        fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
+
         instance = cls(
             path,
             loose_compression_level,
@@ -1328,6 +1334,7 @@ class DiskObjectStore(PackBasedObjectStore):
             pack_depth,
             pack_threads,
             pack_big_file_threshold,
+            fsync_object_files,
         )
         instance._use_commit_graph = use_commit_graph
         return instance
@@ -1566,12 +1573,13 @@ class DiskObjectStore(PackBasedObjectStore):
             progress=progress,
         )
         f.flush()
-        try:
-            fileno = f.fileno()
-        except AttributeError:
-            pass
-        else:
-            os.fsync(fileno)
+        if self.fsync_object_files:
+            try:
+                fileno = f.fileno()
+            except AttributeError as e:
+                raise OSError("fsync requested but file has no fileno()") from e
+            else:
+                os.fsync(fileno)
         f.close()
 
         entries.extend(extra_entries)
@@ -1594,7 +1602,9 @@ class DiskObjectStore(PackBasedObjectStore):
         os.rename(path, target_pack_path)
 
         # Write the index.
-        with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file:
+        with GitFile(
+            target_index_path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files
+        ) as index_file:
             write_pack_index(
                 index_file, entries, pack_sha, version=self.pack_index_version
             )
@@ -1694,7 +1704,7 @@ class DiskObjectStore(PackBasedObjectStore):
             pass
         if os.path.exists(path):
             return  # Already there, no need to write again
-        with GitFile(path, "wb", mask=PACK_MODE) as f:
+        with GitFile(path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files) as f:
             f.write(
                 obj.as_legacy_object(compression_level=self.loose_compression_level)
             )

+ 82 - 0
tests/test_object_store.py

@@ -766,6 +766,88 @@ class DiskObjectStoreTests(PackBasedObjectStoreTests, TestCase):
         depth_disabled = get_depth(self.store, c4.id)
         self.assertEqual(3, depth_disabled)
 
+    def test_fsync_object_files_disabled_by_default(self) -> None:
+        """Test that fsync is disabled by default for object files."""
+        from dulwich.config import ConfigDict
+
+        config = ConfigDict()
+        store = DiskObjectStore.from_config(self.store_dir, config)
+        self.addCleanup(store.close)
+
+        # Should be disabled by default
+        self.assertFalse(store.fsync_object_files)
+
+    def test_fsync_object_files_enabled_by_config(self) -> None:
+        """Test that fsync can be enabled via core.fsyncObjectFiles config."""
+        from unittest.mock import patch
+
+        from dulwich.config import ConfigDict
+
+        config = ConfigDict()
+        config[(b"core",)] = {b"fsyncObjectFiles": b"true"}
+        store = DiskObjectStore.from_config(self.store_dir, config)
+        self.addCleanup(store.close)
+
+        # Should be enabled
+        self.assertTrue(store.fsync_object_files)
+
+        # Test that fsync is actually called when adding objects
+        blob = make_object(Blob, data=b"test fsync data")
+        with patch("os.fsync") as mock_fsync:
+            store.add_object(blob)
+            # fsync should have been called
+            mock_fsync.assert_called_once()
+
+    def test_fsync_object_files_disabled_by_config(self) -> None:
+        """Test that fsync can be explicitly disabled via config."""
+        from unittest.mock import patch
+
+        from dulwich.config import ConfigDict
+
+        config = ConfigDict()
+        config[(b"core",)] = {b"fsyncObjectFiles": b"false"}
+        store = DiskObjectStore.from_config(self.store_dir, config)
+        self.addCleanup(store.close)
+
+        # Should be disabled
+        self.assertFalse(store.fsync_object_files)
+
+        # Test that fsync is NOT called when adding objects
+        blob = make_object(Blob, data=b"test no fsync data")
+        with patch("os.fsync") as mock_fsync:
+            store.add_object(blob)
+            # fsync should NOT have been called
+            mock_fsync.assert_not_called()
+
+    def test_fsync_object_files_for_pack_files(self) -> None:
+        """Test that fsync config applies to pack files."""
+        from unittest.mock import patch
+
+        from dulwich.config import ConfigDict
+        from dulwich.pack import write_pack_objects
+
+        # Test with fsync enabled
+        config = ConfigDict()
+        config[(b"core",)] = {b"fsyncObjectFiles": b"true"}
+        store = DiskObjectStore.from_config(self.store_dir, config)
+        self.addCleanup(store.close)
+
+        self.assertTrue(store.fsync_object_files)
+
+        # Add some objects via pack
+        blob = make_object(Blob, data=b"pack test data")
+        with patch("os.fsync") as mock_fsync:
+            f, commit, abort = store.add_pack()
+            try:
+                write_pack_objects(f.write, [(blob, None)])
+            except BaseException:
+                abort()
+                raise
+            else:
+                commit()
+            # fsync should have been called at least once (for pack file and index)
+            self.assertGreater(mock_fsync.call_count, 0)
+
 
 class TreeLookupPathTests(TestCase):
     def setUp(self) -> None: