Sfoglia il codice sorgente

config: disregard UTF-8 BOM when reading file

Previously, reading a config file that started with an UTF-8 BOM would
die with an error. This was originally reported to hg-git, where we
use Config.from_file() for reading `.gitmodules`.

https://foss.heptapod.net/mercurial/hg-git/-/issues/354
Dan Villiom Podlaski Christiansen 3 anni fa
parent
commit
8fd845e05c
2 ha cambiato i file con 7 aggiunte e 0 eliminazioni
  1. 2 0
      dulwich/config.py
  2. 5 0
      dulwich/tests/test_config.py

+ 2 - 0
dulwich/config.py

@@ -395,6 +395,8 @@ class ConfigFile(ConfigDict):
         setting = None
         continuation = None
         for lineno, line in enumerate(f.readlines()):
+            if lineno == 0 and line.startswith(b'\xef\xbb\xbf'):
+                line = line[3:]
             line = line.lstrip()
             if setting is None:
                 # Parse section header ("[bla]")

+ 5 - 0
dulwich/tests/test_config.py

@@ -108,6 +108,11 @@ class ConfigFileTests(TestCase):
         self.assertEqual(b"bar", cf.get((b"core",), b"foo"))
         self.assertEqual(b"bar", cf.get((b"core", b"foo"), b"foo"))
 
+    def test_from_file_utf8_bom(self):
+        text = "[core]\nfoo = b\u00e4r\n".encode("utf-8-sig")
+        cf = self.from_file(text)
+        self.assertEqual(b"b\xc3\xa4r", cf.get((b"core",), b"foo"))
+
     def test_from_file_section_case_insensitive_lower(self):
         cf = self.from_file(b"[cOre]\nfOo = bar\n")
         self.assertEqual(b"bar", cf.get((b"core",), b"foo"))