Browse Source

Fix ref optimization to preserve file locking behavior

The optimization to avoid fsync for unchanged refs was skipping lock
acquisition entirely, which could mask FileLocked errors when another
process holds the lock. This fix ensures we always acquire the lock
first, then check if the ref needs updating while holding it.

This preserves the performance benefit (avoiding fsync) while maintaining
proper concurrency control and error reporting.
Jelmer Vernooij 1 tháng trước cách đây
mục cha
commit
d5a59c85bc
3 tập tin đã thay đổi với 84 bổ sung19 xóa
  1. 5 0
      NEWS
  2. 26 19
      dulwich/refs.py
  3. 53 0
      tests/test_refs.py

+ 5 - 0
NEWS

@@ -2,6 +2,11 @@
 
  * Add ``reflog`` command in porcelain. (Jelmer Vernooij)
 
+ * Optimize writing unchanged refs by avoiding unnecessary fsync
+   when ref already has the desired value. File locking behavior
+   is preserved to ensure proper concurrency control.
+   (Dan Villiom Podlaski Christiansen, Jelmer Vernooij, #1120)
+
 0.23.2	2025-07-07
 
  * Print deprecations on usage, not import.

+ 26 - 19
dulwich/refs.py

@@ -928,29 +928,36 @@ class DiskRefsContainer(RefsContainer):
             probe_ref = os.path.dirname(probe_ref)
 
         ensure_dir_exists(os.path.dirname(filename))
-
-        # first, check if we need to write anything at all without
-        # acquiring the lock, which triggers a fs write, close and
-        # thus fsync()
-        if self.read_loose_ref(realname) != new_ref:
-            with GitFile(filename, "wb") as f:
-                if old_ref is not None:
-                    try:
-                        # read again while holding the lock
-                        orig_ref = self.read_loose_ref(realname)
-                        if orig_ref is None:
-                            orig_ref = self.get_packed_refs().get(realname, ZERO_SHA)
-                        if orig_ref != old_ref:
-                            f.abort()
-                            return False
-                    except OSError:
-                        f.abort()
-                        raise
+        with GitFile(filename, "wb") as f:
+            if old_ref is not None:
                 try:
-                    f.write(new_ref + b"\n")
+                    # read again while holding the lock to handle race conditions
+                    orig_ref = self.read_loose_ref(realname)
+                    if orig_ref is None:
+                        orig_ref = self.get_packed_refs().get(realname, ZERO_SHA)
+                    if orig_ref != old_ref:
+                        f.abort()
+                        return False
                 except OSError:
                     f.abort()
                     raise
+
+            # Check if ref already has the desired value while holding the lock
+            # This avoids fsync when ref is unchanged but still detects lock conflicts
+            current_ref = self.read_loose_ref(realname)
+            if current_ref is None:
+                current_ref = packed_refs.get(realname, None)
+
+            if current_ref is not None and current_ref == new_ref:
+                # Ref already has desired value, abort write to avoid fsync
+                f.abort()
+                return True
+
+            try:
+                f.write(new_ref + b"\n")
+            except OSError:
+                f.abort()
+                raise
             self._log(
                 realname,
                 old_ref,

+ 53 - 0
tests/test_refs.py

@@ -784,6 +784,59 @@ class DiskRefsContainerTests(RefsContainerTests, TestCase):
         expected_refs.add(b"HEAD")
         self.assertEqual(expected_refs, set(self._repo.get_refs().keys()))
 
+    def test_write_unchanged_ref_optimization(self):
+        # Test that writing unchanged ref avoids fsync but still checks locks
+        ref_name = b"refs/heads/unchanged"
+        ref_value = b"a" * 40
+
+        # Set initial ref value
+        self._refs[ref_name] = ref_value
+
+        # Test 1: Writing same value should succeed without changes
+        result = self._refs.set_if_equals(ref_name, ref_value, ref_value)
+        self.assertTrue(result)
+
+        # Test 2: Writing same value with wrong old_ref should fail
+        wrong_old = b"b" * 40
+        result = self._refs.set_if_equals(ref_name, wrong_old, ref_value)
+        self.assertFalse(result)
+
+        # Test 3: Writing different value should update normally
+        new_value = b"c" * 40
+        result = self._refs.set_if_equals(ref_name, ref_value, new_value)
+        self.assertTrue(result)
+        self.assertEqual(new_value, self._refs[ref_name])
+
+    def test_write_unchanged_ref_with_lock(self):
+        # Test that file locking is still detected when ref unchanged
+
+        from dulwich.file import FileLocked
+
+        ref_name = b"refs/heads/locktest"
+        ref_value = b"d" * 40
+
+        # Set initial ref value
+        self._refs[ref_name] = ref_value
+
+        # Get the actual file path
+        ref_file = os.path.join(os.fsencode(self._refs.path), ref_name)
+        lock_file = ref_file + b".lock"
+
+        # Create lock file to simulate another process holding lock
+        with open(lock_file, "wb") as f:
+            f.write(b"locked by another process")
+
+            # Try to write same value - should raise FileLocked
+            with self.assertRaises(FileLocked):
+                self._refs[ref_name] = ref_value
+
+        # Clean up lock file
+        if os.path.exists(lock_file):
+            os.unlink(lock_file)
+
+        # Now it should work
+        self._refs[ref_name] = ref_value
+
 
 _TEST_REFS_SERIALIZED = (
     b"42d06bd4b77fed026b154d16493e5deab78f02ec\t"