Ver código fonte

Handle trailing backslashes in config files appropriately

Fixes #1088
Jelmer Vernooij 2 meses atrás
pai
commit
fa9036bb79
3 arquivos alterados com 121 adições e 22 exclusões
  1. 3 0
      NEWS
  2. 68 22
      dulwich/config.py
  3. 50 0
      tests/test_config.py

+ 3 - 0
NEWS

@@ -46,6 +46,9 @@
 
  * Use ``dissolve`` to manage deprecations. (Jelmer Vernooij)
 
+ * Handle trailing backslashes in config files appropriately.
+   (Jelmer Vernooij, #1088)
+
 0.22.8	2025-03-02
 
  * Allow passing in plain strings to ``dulwich.porcelain.tag_create``

+ 68 - 22
dulwich/config.py

@@ -419,20 +419,26 @@ def _parse_string(value: bytes) -> bytes:
         c = value[i]
         if c == ord(b"\\"):
             i += 1
-            try:
-                v = _ESCAPE_TABLE[value[i]]
-            except IndexError as exc:
-                raise ValueError(
-                    f"escape character in {value!r} at {i} before end of string"
-                ) from exc
-            except KeyError as exc:
-                raise ValueError(
-                    f"escape character followed by unknown character {value[i]!r} at {i} in {value!r}"
-                ) from exc
-            if whitespace:
-                ret.extend(whitespace)
-                whitespace = bytearray()
-            ret.append(v)
+            if i >= len(value):
+                # Backslash at end of string - treat as literal backslash
+                if whitespace:
+                    ret.extend(whitespace)
+                    whitespace = bytearray()
+                ret.append(ord(b"\\"))
+            else:
+                try:
+                    v = _ESCAPE_TABLE[value[i]]
+                    if whitespace:
+                        ret.extend(whitespace)
+                        whitespace = bytearray()
+                    ret.append(v)
+                except KeyError:
+                    # Unknown escape sequence - treat backslash as literal and process next char normally
+                    if whitespace:
+                        ret.extend(whitespace)
+                        whitespace = bytearray()
+                    ret.append(ord(b"\\"))
+                    i -= 1  # Reprocess the character after the backslash
         elif c == ord(b'"'):
             in_quotes = not in_quotes
         elif c in _COMMENT_CHARS and not in_quotes:
@@ -493,6 +499,44 @@ def _strip_comments(line: bytes) -> bytes:
     return line
 
 
+def _is_line_continuation(value: bytes) -> bool:
+    """Check if a value ends with a line continuation backslash.
+
+    A line continuation occurs when a line ends with a backslash that is:
+    1. Not escaped (not preceded by another backslash)
+    2. Not within quotes
+
+    Args:
+        value: The value to check
+
+    Returns:
+        True if the value ends with a line continuation backslash
+    """
+    if not value.endswith((b"\\\n", b"\\\r\n")):
+        return False
+
+    # Remove only the newline characters, keep the content including the backslash
+    if value.endswith(b"\\\r\n"):
+        content = value[:-2]  # Remove \r\n, keep the \
+    else:
+        content = value[:-1]  # Remove \n, keep the \
+
+    if not content.endswith(b"\\"):
+        return False
+
+    # Count consecutive backslashes at the end
+    backslash_count = 0
+    for i in range(len(content) - 1, -1, -1):
+        if content[i : i + 1] == b"\\":
+            backslash_count += 1
+        else:
+            break
+
+    # If we have an odd number of backslashes, the last one is a line continuation
+    # If we have an even number, they are all escaped and there's no continuation
+    return backslash_count % 2 == 1
+
+
 def _parse_section_header_line(line: bytes) -> tuple[Section, bytes]:
     # Parse section header ("[bla]")
     line = _strip_comments(line).rstrip()
@@ -573,10 +617,11 @@ class ConfigFile(ConfigDict):
                 setting = setting.strip()
                 if not _check_variable_name(setting):
                     raise ValueError(f"invalid variable name {setting!r}")
-                if value.endswith(b"\\\n"):
-                    continuation = value[:-2]
-                elif value.endswith(b"\\\r\n"):
-                    continuation = value[:-3]
+                if _is_line_continuation(value):
+                    if value.endswith(b"\\\r\n"):
+                        continuation = value[:-3]
+                    else:
+                        continuation = value[:-2]
                 else:
                     continuation = None
                     value = _parse_string(value)
@@ -584,10 +629,11 @@ class ConfigFile(ConfigDict):
                     setting = None
             else:  # continuation line
                 assert continuation is not None
-                if line.endswith(b"\\\n"):
-                    continuation += line[:-2]
-                elif line.endswith(b"\\\r\n"):
-                    continuation += line[:-3]
+                if _is_line_continuation(line):
+                    if line.endswith(b"\\\r\n"):
+                        continuation += line[:-3]
+                    else:
+                        continuation += line[:-2]
                 else:
                     continuation += line
                     value = _parse_string(continuation)

+ 50 - 0
tests/test_config.py

@@ -266,6 +266,56 @@ who\"
         c.write_to_file(f)
         self.assertEqual(b'[xandikos]\n\tcolor = "#665544"\n', f.getvalue())
 
+    def test_windows_path_with_trailing_backslash_unquoted(self) -> None:
+        """Test that Windows paths ending with escaped backslash are handled correctly."""
+        # This reproduces the issue from https://github.com/jelmer/dulwich/issues/1088
+        # A single backslash at the end should actually be a line continuation in strict Git config
+        # But we want to be more tolerant like Git itself
+        cf = self.from_file(
+            b'[core]\n\trepositoryformatversion = 0\n[remote "origin"]\n\turl = C:/Users/test\\\\\n\tfetch = +refs/heads/*:refs/remotes/origin/*\n'
+        )
+        self.assertEqual(b"C:/Users/test\\", cf.get((b"remote", b"origin"), b"url"))
+        self.assertEqual(
+            b"+refs/heads/*:refs/remotes/origin/*",
+            cf.get((b"remote", b"origin"), b"fetch"),
+        )
+
+    def test_windows_path_with_trailing_backslash_quoted(self) -> None:
+        """Test that quoted Windows paths with escaped backslashes work correctly."""
+        cf = self.from_file(
+            b'[core]\n\trepositoryformatversion = 0\n[remote "origin"]\n\turl = "C:\\\\Users\\\\test\\\\"\n\tfetch = +refs/heads/*:refs/remotes/origin/*\n'
+        )
+        self.assertEqual(b"C:\\Users\\test\\", cf.get((b"remote", b"origin"), b"url"))
+        self.assertEqual(
+            b"+refs/heads/*:refs/remotes/origin/*",
+            cf.get((b"remote", b"origin"), b"fetch"),
+        )
+
+    def test_single_backslash_at_line_end_shows_proper_escaping_needed(self) -> None:
+        """Test that demonstrates proper escaping is needed for single backslashes."""
+        # This test documents the current behavior: a single backslash at the end of a line
+        # is treated as a line continuation per Git config spec. Users should escape backslashes.
+
+        # This reproduces the original issue - single backslash causes line continuation
+        cf = self.from_file(
+            b'[remote "origin"]\n\turl = C:/Users/test\\\n\tfetch = +refs/heads/*:refs/remotes/origin/*\n'
+        )
+        # The result shows that line continuation occurred
+        self.assertEqual(
+            b"C:/Users/testfetch = +refs/heads/*:refs/remotes/origin/*",
+            cf.get((b"remote", b"origin"), b"url"),
+        )
+
+        # The proper way to include a literal backslash is to escape it
+        cf2 = self.from_file(
+            b'[remote "origin"]\n\turl = C:/Users/test\\\\\n\tfetch = +refs/heads/*:refs/remotes/origin/*\n'
+        )
+        self.assertEqual(b"C:/Users/test\\", cf2.get((b"remote", b"origin"), b"url"))
+        self.assertEqual(
+            b"+refs/heads/*:refs/remotes/origin/*",
+            cf2.get((b"remote", b"origin"), b"fetch"),
+        )
+
 
 class ConfigDictTests(TestCase):
     def test_get_set(self) -> None: