Browse Source

Add per-URL http.extraHeader configuration support (#1969)

Fixes #882
Jelmer Vernooij 2 months ago
parent
commit
12a5f0c460
3 changed files with 243 additions and 36 deletions
  1. 6 0
      NEWS
  2. 117 36
      dulwich/client.py
  3. 120 0
      tests/test_client.py

+ 6 - 0
NEWS

@@ -4,6 +4,12 @@
    implementation uses the similar crate for efficient diff computation.
    (Jelmer Vernooij)
 
+ * Extend ``http.extraHeader`` configuration to support per-URL settings.
+   Allows configuring different HTTP headers for specific URLs using
+   ``http.<url>.extraHeader`` syntax, enabling authentication in CI/CD
+   environments like GitHub Actions. More specific URL configurations
+   override less specific ones.  (Jelmer Vernooij, #882)
+
  * Add support for namespace isolation via ``NamespacedRefsContainer``.
    Implements Git's namespace feature for isolating refs within a single
    repository using the ``refs/namespaces/`` prefix. (Jelmer Vernooij, #1809)

+ 117 - 36
dulwich/client.py

@@ -108,6 +108,7 @@ if TYPE_CHECKING:
 
 from .bundle import Bundle
 from .config import Config, apply_instead_of, get_xdg_config_home_path
+from .credentials import match_partial_url, match_urls
 from .errors import GitProtocolError, HangupException, NotGitRepository, SendPackError
 from .object_store import GraphWalker
 from .pack import (
@@ -3149,6 +3150,58 @@ def default_user_agent_string() -> str:
     return "git/dulwich/{}".format(".".join([str(x) for x in dulwich.__version__]))
 
 
+def _urlmatch_http_sections(
+    config: Config, url: Optional[str]
+) -> Iterator[tuple[bytes, ...]]:
+    """Yield http config sections matching the given URL, ordered by specificity.
+
+    Yields sections from least specific to most specific, so callers can
+    apply settings in order with more specific settings overriding less specific ones.
+
+    Args:
+      config: Git configuration object
+      url: URL to match against config sections (if None, only yields global http section)
+
+    Yields:
+      Config section tuples that match the URL, ordered by specificity
+    """
+    encoding = getattr(config, "encoding", None) or sys.getdefaultencoding()
+    parsed_url = urlparse(url) if url else None
+
+    # Collect all matching sections with their specificity
+    # (specificity is based on URL path length - longer = more specific)
+    matching_sections: list[tuple[int, tuple[bytes, ...]]] = []
+
+    for config_section in config.sections():
+        if config_section[0] != b"http":
+            continue
+
+        if len(config_section) < 2:
+            # Global http section (least specific)
+            matching_sections.append((0, config_section))
+        elif parsed_url is not None:
+            # URL-specific http section - only match if we have a URL
+            config_url = config_section[1].decode(encoding)
+            parsed_config_url = urlparse(config_url)
+
+            is_match = False
+            if parsed_config_url.scheme and parsed_config_url.netloc:
+                is_match = match_urls(parsed_url, parsed_config_url)
+            else:
+                is_match = match_partial_url(parsed_url, config_url)
+
+            if is_match:
+                # Calculate specificity based on URL path length
+                specificity = len(parsed_config_url.path.rstrip("/"))
+                matching_sections.append((specificity, config_section))
+
+    # Sort by specificity (least specific first)
+    matching_sections.sort(key=lambda x: x[0])
+
+    for _, section in matching_sections:
+        yield section
+
+
 def default_urllib3_manager(
     config: Optional[Config],
     pool_manager_cls: Optional[type] = None,
@@ -3191,59 +3244,87 @@ def default_urllib3_manager(
             proxy_server = None
 
     if config is not None:
-        if proxy_server is None:
+        # Iterate through all matching http sections from least to most specific
+        # More specific settings will override less specific ones
+        for section in _urlmatch_http_sections(config, base_url):
+            if proxy_server is None:
+                try:
+                    proxy_server_bytes = config.get(section, b"proxy")
+                except KeyError:
+                    pass
+                else:
+                    if proxy_server_bytes is not None:
+                        proxy_server = proxy_server_bytes.decode("utf-8")
+
             try:
-                proxy_server_bytes = config.get(b"http", b"proxy")
-                if proxy_server_bytes is not None:
-                    proxy_server = proxy_server_bytes.decode("utf-8")
+                user_agent_bytes = config.get(section, b"useragent")
             except KeyError:
                 pass
-        try:
-            user_agent_bytes = config.get(b"http", b"useragent")
-            if user_agent_bytes is not None:
-                user_agent = user_agent_bytes.decode("utf-8")
-        except KeyError:
-            pass
+            else:
+                if user_agent_bytes is not None:
+                    user_agent = user_agent_bytes.decode("utf-8")
 
-        # TODO(jelmer): Support per-host settings
-        try:
-            ssl_verify = config.get_boolean(b"http", b"sslVerify")
-        except KeyError:
-            ssl_verify = True
+            try:
+                ssl_verify_value = config.get_boolean(section, b"sslVerify")
+            except KeyError:
+                pass
+            else:
+                if ssl_verify_value is not None:
+                    ssl_verify = ssl_verify_value
 
-        try:
-            ca_certs_bytes = config.get(b"http", b"sslCAInfo")
-            if ca_certs_bytes is not None:
-                ca_certs = ca_certs_bytes.decode("utf-8")
-        except KeyError:
-            ca_certs = None
-
-        # Check for timeout configuration
-        if timeout is None:
             try:
-                timeout_bytes = config.get(b"http", b"timeout")
-                if timeout_bytes is not None:
-                    timeout = float(timeout_bytes.decode("utf-8"))
+                ca_certs_bytes = config.get(section, b"sslCAInfo")
             except KeyError:
                 pass
+            else:
+                if ca_certs_bytes is not None:
+                    ca_certs = ca_certs_bytes.decode("utf-8")
+
+            if timeout is None:
+                try:
+                    timeout_bytes = config.get(section, b"timeout")
+                except KeyError:
+                    pass
+                else:
+                    if timeout_bytes is not None:
+                        timeout = float(timeout_bytes.decode("utf-8"))
+
+        # Default ssl_verify to True if not set
+        if ssl_verify is None:
+            ssl_verify = True
 
     if user_agent is None:
         user_agent = default_user_agent_string()
 
     headers = {"User-agent": user_agent}
 
-    # Check for extra headers in config
+    # Check for extra headers in config with URL matching
     if config is not None:
-        try:
-            # Git allows multiple http.extraHeader entries
-            extra_headers = config.get_multivar(b"http", b"extraHeader")
+        # Apply extra headers from least specific to most specific
+        for section in _urlmatch_http_sections(config, base_url):
+            try:
+                extra_headers = config.get_multivar(section, b"extraHeader")
+            except KeyError:
+                continue
+
             for extra_header in extra_headers:
-                if extra_header and b": " in extra_header:
-                    # Parse the header (format: "Header-Name: value")
-                    header_name, header_value = extra_header.split(b": ", 1)
+                if not extra_header:
+                    logger.warning("Ignoring empty http.extraHeader value")
+                    continue
+                if b": " not in extra_header:
+                    logger.warning(
+                        "Ignoring invalid http.extraHeader value %r (missing ': ' separator)",
+                        extra_header,
+                    )
+                    continue
+                # Parse the header (format: "Header-Name: value")
+                header_name, header_value = extra_header.split(b": ", 1)
+                try:
                     headers[header_name.decode("utf-8")] = header_value.decode("utf-8")
-        except KeyError:
-            pass
+                except UnicodeDecodeError as e:
+                    logger.warning(
+                        "Ignoring http.extraHeader with invalid UTF-8: %s", e
+                    )
 
     kwargs: dict[str, Union[str, float, None]] = {
         "ca_certs": ca_certs,

+ 120 - 0
tests/test_client.py

@@ -1797,6 +1797,126 @@ class HttpGitClientTests(TestCase):
         self.assertIn("Authorization", c.pool_manager.headers)
         self.assertEqual(c.pool_manager.headers["Authorization"], "Bearer token123")
 
+    def test_http_extra_headers_per_url_config(self) -> None:
+        """Test that per-URL http.extraHeader config values are applied (issue #882)."""
+        from dulwich.config import ConfigDict
+
+        url = "https://github.com/jelmer/dulwich"
+        config = ConfigDict()
+        # Set URL-specific extra header
+        config.set(
+            (b"http", b"https://github.com/"),
+            b"extraHeader",
+            b"Authorization: basic token123",
+        )
+
+        c = HttpGitClient(url, config=config)
+        # Check that the header was added to the pool manager
+        self.assertIn("Authorization", c.pool_manager.headers)
+        self.assertEqual(c.pool_manager.headers["Authorization"], "basic token123")
+
+    def test_http_extra_headers_url_specificity(self) -> None:
+        """Test that more specific URL configs override less specific ones."""
+        from dulwich.config import ConfigDict
+
+        url = "https://github.com/jelmer/dulwich"
+        config = ConfigDict()
+        # Set global header
+        config.set((b"http",), b"extraHeader", b"X-Global: global-value")
+        # Set host-specific header (overrides global)
+        config.set(
+            (b"http", b"https://github.com/"), b"extraHeader", b"X-Global: github-value"
+        )
+        config.add(
+            (b"http", b"https://github.com/"),
+            b"extraHeader",
+            b"Authorization: Bearer token123",
+        )
+
+        c = HttpGitClient(url, config=config)
+        # More specific setting should win
+        self.assertEqual(c.pool_manager.headers["X-Global"], "github-value")
+        self.assertEqual(c.pool_manager.headers["Authorization"], "Bearer token123")
+
+    def test_http_extra_headers_multiple_url_configs(self) -> None:
+        """Test that different URLs can have different extra headers."""
+        from dulwich.config import ConfigDict
+
+        config = ConfigDict()
+        # Set different headers for different URLs
+        config.set(
+            (b"http", b"https://github.com/"),
+            b"extraHeader",
+            b"Authorization: Bearer github-token",
+        )
+        config.set(
+            (b"http", b"https://gitlab.com/"),
+            b"extraHeader",
+            b"Authorization: Bearer gitlab-token",
+        )
+
+        # Test GitHub URL
+        c1 = HttpGitClient("https://github.com/user/repo", config=config)
+        self.assertEqual(
+            c1.pool_manager.headers["Authorization"], "Bearer github-token"
+        )
+
+        # Test GitLab URL
+        c2 = HttpGitClient("https://gitlab.com/user/repo", config=config)
+        self.assertEqual(
+            c2.pool_manager.headers["Authorization"], "Bearer gitlab-token"
+        )
+
+    def test_http_extra_headers_no_match(self) -> None:
+        """Test that non-matching URL configs don't apply."""
+        from dulwich.config import ConfigDict
+
+        url = "https://example.com/repo"
+        config = ConfigDict()
+        # Set header only for GitHub
+        config.set(
+            (b"http", b"https://github.com/"),
+            b"extraHeader",
+            b"Authorization: Bearer token123",
+        )
+
+        c = HttpGitClient(url, config=config)
+        # Authorization header should not be present for example.com
+        self.assertNotIn("Authorization", c.pool_manager.headers)
+
+    def test_http_extra_headers_invalid_format(self) -> None:
+        """Test that invalid extra headers trigger warnings."""
+        import logging
+
+        from dulwich.config import ConfigDict
+
+        url = "https://github.com/jelmer/dulwich"
+        config = ConfigDict()
+        # Set valid header
+        config.set((b"http",), b"extraHeader", b"X-Valid: valid-value")
+        # Set invalid headers (no colon-space separator)
+        config.add((b"http",), b"extraHeader", b"X-Invalid-No-Separator")
+        # Set empty header
+        config.add((b"http",), b"extraHeader", b"")
+        # Set another valid header to verify we continue processing
+        config.add((b"http",), b"extraHeader", b"X-Another-Valid: another-value")
+
+        with self.assertLogs("dulwich.client", level=logging.WARNING) as cm:
+            c = HttpGitClient(url, config=config)
+
+        # Check that warnings were logged
+        self.assertEqual(len(cm.output), 2)
+        self.assertIn("missing ': ' separator", cm.output[0])
+        self.assertIn("empty http.extraHeader", cm.output[1])
+
+        # Valid headers should still be applied
+        self.assertIn("X-Valid", c.pool_manager.headers)
+        self.assertEqual(c.pool_manager.headers["X-Valid"], "valid-value")
+        self.assertIn("X-Another-Valid", c.pool_manager.headers)
+        self.assertEqual(c.pool_manager.headers["X-Another-Valid"], "another-value")
+        # Invalid header should not be present
+        self.assertNotIn("X-Invalid-No-Separator", c.pool_manager.headers)
+
     def test_get_url_preserves_credentials_from_url(self) -> None:
         """Test that credentials from URL are preserved in get_url() (issue #1925)."""
         # When credentials come from the URL (not passed explicitly),