Переглянути джерело

refs: optimise writing unchanged refs (#1120)

This is inspired by [an issue filed in
hg-git](https://foss.heptapod.net/mercurial/hg-git/-/issues/401);
essentially, writing a _lot_ of unchanged references is quite slow, due
to the `fsync()` implied in re-writing the file. Although we could fix
this within hg-git, it seems to me that the general optimisation might
apply to other users of Dulwich, so this PR makes it so that it avoids
acquiring the lock to a ref if that ref already matches the expected
update.
Jelmer Vernooij 1 місяць тому
батько
коміт
1a5f850af6
3 змінених файлів з 71 додано та 1 видалено
  1. 5 0
      NEWS
  2. 13 1
      dulwich/refs.py
  3. 53 0
      tests/test_refs.py

+ 5 - 0
NEWS

@@ -2,6 +2,11 @@
 
  * Add ``reflog`` command in porcelain. (Jelmer Vernooij)
 
+ * Optimize writing unchanged refs by avoiding unnecessary fsync
+   when ref already has the desired value. File locking behavior
+   is preserved to ensure proper concurrency control.
+   (Dan Villiom Podlaski Christiansen, Jelmer Vernooij, #1120)
+
 0.23.2	2025-07-07
 
  * Print deprecations on usage, not import.

+ 13 - 1
dulwich/refs.py

@@ -931,7 +931,7 @@ class DiskRefsContainer(RefsContainer):
         with GitFile(filename, "wb") as f:
             if old_ref is not None:
                 try:
-                    # read again while holding the lock
+                    # read again while holding the lock to handle race conditions
                     orig_ref = self.read_loose_ref(realname)
                     if orig_ref is None:
                         orig_ref = self.get_packed_refs().get(realname, ZERO_SHA)
@@ -941,6 +941,18 @@ class DiskRefsContainer(RefsContainer):
                 except OSError:
                     f.abort()
                     raise
+
+            # Check if ref already has the desired value while holding the lock
+            # This avoids fsync when ref is unchanged but still detects lock conflicts
+            current_ref = self.read_loose_ref(realname)
+            if current_ref is None:
+                current_ref = packed_refs.get(realname, None)
+
+            if current_ref is not None and current_ref == new_ref:
+                # Ref already has desired value, abort write to avoid fsync
+                f.abort()
+                return True
+
             try:
                 f.write(new_ref + b"\n")
             except OSError:

+ 53 - 0
tests/test_refs.py

@@ -784,6 +784,59 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         expected_refs.add(b"HEAD")
         self.assertEqual(expected_refs, set(self._repo.get_refs().keys()))
 
+    def test_write_unchanged_ref_optimization(self):
+        # Test that writing unchanged ref avoids fsync but still checks locks
+        ref_name = b"refs/heads/unchanged"
+        ref_value = b"a" * 40
+
+        # Set initial ref value
+        self._refs[ref_name] = ref_value
+
+        # Test 1: Writing same value should succeed without changes
+        result = self._refs.set_if_equals(ref_name, ref_value, ref_value)
+        self.assertTrue(result)
+
+        # Test 2: Writing same value with wrong old_ref should fail
+        wrong_old = b"b" * 40
+        result = self._refs.set_if_equals(ref_name, wrong_old, ref_value)
+        self.assertFalse(result)
+
+        # Test 3: Writing different value should update normally
+        new_value = b"c" * 40
+        result = self._refs.set_if_equals(ref_name, ref_value, new_value)
+        self.assertTrue(result)
+        self.assertEqual(new_value, self._refs[ref_name])
+
+    def test_write_unchanged_ref_with_lock(self):
+        # Test that file locking is still detected when ref unchanged
+
+        from dulwich.file import FileLocked
+
+        ref_name = b"refs/heads/locktest"
+        ref_value = b"d" * 40
+
+        # Set initial ref value
+        self._refs[ref_name] = ref_value
+
+        # Get the actual file path
+        ref_file = os.path.join(os.fsencode(self._refs.path), ref_name)
+        lock_file = ref_file + b".lock"
+
+        # Create lock file to simulate another process holding lock
+        with open(lock_file, "wb") as f:
+            f.write(b"locked by another process")
+
+            # Try to write same value - should raise FileLocked
+            with self.assertRaises(FileLocked):
+                self._refs[ref_name] = ref_value
+
+        # Clean up lock file
+        if os.path.exists(lock_file):
+            os.unlink(lock_file)
+
+        # Now it should work
+        self._refs[ref_name] = ref_value
+
 
 _TEST_REFS_SERIALIZED = (
     b"42d06bd4b77fed026b154d16493e5deab78f02ec\t"