Quellcode durchsuchen

Fix HTTP authentication with credentials in URLs (#1926)

When credentials are embedded in a URL (like https://token@github.com),
preserve them when storing the remote URL in git config. This ensures
that subsequent operations like push can authenticate successfully.

Fixes #1925
Jelmer Vernooij vor 3 Monaten
Ursprung
Commit
a8acf4cdc2
4 geänderte Dateien mit 141 neuen und 13 gelöschten Zeilen
  1. 8 0
      NEWS
  2. 50 6
      dulwich/client.py
  3. 7 7
      dulwich/merge.py
  4. 76 0
      tests/test_client.py

+ 8 - 0
NEWS

@@ -1,3 +1,11 @@
+0.24.5	UNRELEASED
+
+ * Fix HTTP authentication to preserve credentials from URLs when storing
+   remote configuration. URLs with embedded credentials (like
+   ``https://token@github.com/user/repo.git``) now correctly save those
+   credentials to git config, allowing subsequent push operations to succeed.
+   (Jelmer Vernooij, #1925)
+
 0.24.4	2025-10-14
 
  * Add compatibility for Python 3.14.  (Jelmer Vernooij)

+ 50 - 6
dulwich/client.py

@@ -3315,9 +3315,15 @@ class AbstractHttpGitClient(GitClient):
         report_activity: Optional[Callable[[int, str], None]] = None,
         quiet: bool = False,
         include_tags: bool = False,
+        username: Optional[str] = None,
+        password: Optional[str] = None,
     ) -> None:
         """Initialize AbstractHttpGitClient."""
         self._base_url = base_url.rstrip("/") + "/"
+        self._username = username
+        self._password = password
+        # Track original URL with credentials (set by from_parsedurl when credentials come from URL)
+        self._url_with_auth: Optional[str] = None
         self.dumb = dumb
         GitClient.__init__(
             self,
@@ -3788,7 +3794,37 @@ class AbstractHttpGitClient(GitClient):
 
     def get_url(self, path: str) -> str:
         """Get the HTTP URL for a path."""
-        return self._get_url(path).rstrip("/")
+        url = self._get_url(path).rstrip("/")
+
+        # Include credentials in the URL only if they came from a URL (not passed explicitly)
+        # This preserves credentials that were in the original URL for git config storage
+        if self._url_with_auth is not None:
+            from urllib.parse import quote, urlparse, urlunparse
+
+            assert self._username is not None
+            parsed = urlparse(url)
+            # Construct netloc with credentials
+            if self._password is not None:
+                netloc = f"{quote(self._username, safe='')}:{quote(self._password, safe='')}@{parsed.hostname}"
+            else:
+                netloc = f"{quote(self._username, safe='')}@{parsed.hostname}"
+
+            if parsed.port:
+                netloc += f":{parsed.port}"
+
+            # Reconstruct URL with credentials
+            url = urlunparse(
+                (
+                    parsed.scheme,
+                    netloc,
+                    parsed.path,
+                    parsed.params,
+                    parsed.query,
+                    parsed.fragment,
+                )
+            )
+
+        return url
 
     def _get_url(self, path: Union[str, bytes]) -> str:
         path_str = path if isinstance(path, str) else path.decode("utf-8")
@@ -3842,7 +3878,7 @@ class AbstractHttpGitClient(GitClient):
 
         # Pass credentials to constructor if it's a subclass that supports them
         if cls is Urllib3HttpGitClient:
-            return cls(  # type: ignore[call-arg]
+            client = cls(  # type: ignore[call-arg]
                 urlunparse(base_parsed),
                 dumb=dumb,
                 thin_packs=thin_packs,
@@ -3854,16 +3890,24 @@ class AbstractHttpGitClient(GitClient):
                 config=config,
             )
         else:
-            # Base class doesn't support credentials in constructor
-            return cls(
+            # Base class now supports credentials in constructor
+            client = cls(
                 urlunparse(base_parsed),
                 dumb=dumb,
                 thin_packs=thin_packs,
                 report_activity=report_activity,
                 quiet=quiet,
                 include_tags=include_tags,
+                username=final_username,
+                password=final_password,
             )
 
+        # Mark that credentials came from URL (not passed explicitly) if URL had credentials
+        if url_username is not None or url_password is not None:
+            client._url_with_auth = urlunparse(parsedurl)
+
+        return client
+
     def __repr__(self) -> str:
         """Return string representation of this client."""
         return f"{type(self).__name__}({self._base_url!r}, dumb={self.dumb!r})"
@@ -3902,8 +3946,6 @@ class Urllib3HttpGitClient(AbstractHttpGitClient):
         include_tags: bool = False,
     ) -> None:
         """Initialize Urllib3HttpGitClient."""
-        self._username = username
-        self._password = password
         self._timeout = timeout
         self._extra_headers = extra_headers or {}
 
@@ -3932,6 +3974,8 @@ class Urllib3HttpGitClient(AbstractHttpGitClient):
             report_activity=report_activity,
             quiet=quiet,
             include_tags=include_tags,
+            username=username,
+            password=password,
         )
 
     def _get_url(self, path: Union[str, bytes]) -> str:

+ 7 - 7
dulwich/merge.py

@@ -1,11 +1,11 @@
 """Git merge implementation."""
 
 from collections.abc import Sequence
-from difflib import SequenceMatcher
 from typing import TYPE_CHECKING, Optional
 
 if TYPE_CHECKING:
     import merge3
+    from merge3 import SequenceMatcherProtocol
 else:
     try:
         import merge3
@@ -24,8 +24,8 @@ def make_merge3(
     a: Sequence[bytes],
     b: Sequence[bytes],
     is_cherrypick: bool = False,
-    sequence_matcher: Optional[type[SequenceMatcher[bytes]]] = None,
-) -> "merge3.Merge3":
+    sequence_matcher: Optional[type["SequenceMatcherProtocol[bytes]"]] = None,
+) -> "merge3.Merge3[bytes]":
     """Return a Merge3 object, or raise ImportError if merge3 is not installed."""
     if merge3 is None:
         raise ImportError(
@@ -66,7 +66,7 @@ def _can_merge_lines(
 
 if merge3 is not None:
 
-    def _merge3_to_bytes(m: "merge3.Merge3") -> bytes:
+    def _merge3_to_bytes(m: "merge3.Merge3[bytes]") -> bytes:
         """Convert merge3 result to bytes with conflict markers.
 
         Args:
@@ -75,7 +75,7 @@ if merge3 is not None:
         Returns:
             Merged content as bytes
         """
-        result = []
+        result: list[bytes] = []
         for group in m.merge_groups():  # type: ignore[no-untyped-call,unused-ignore]
             if group[0] == "unchanged":
                 result.extend(group[1])
@@ -105,8 +105,8 @@ if merge3 is not None:
 
 
 def _merge_lines(
-    base_lines: list[bytes], a_lines: list[bytes], b_lines: list[bytes]
-) -> list[bytes]:
+    base_lines: Sequence[bytes], a_lines: Sequence[bytes], b_lines: Sequence[bytes]
+) -> Sequence[bytes]:
     """Merge lines when possible."""
     if base_lines == a_lines:
         return b_lines

+ 76 - 0
tests/test_client.py

@@ -1641,6 +1641,82 @@ class HttpGitClientTests(TestCase):
         self.assertIn("Authorization", c.pool_manager.headers)
         self.assertEqual(c.pool_manager.headers["Authorization"], "Bearer token123")
 
+    def test_get_url_preserves_credentials_from_url(self) -> None:
+        """Test that credentials from URL are preserved in get_url() (issue #1925)."""
+        # When credentials come from the URL (not passed explicitly),
+        # they should be included in get_url() so they're saved to git config
+        username = "ghp_token123"
+        url = f"https://{username}@github.com/jelmer/dulwich"
+        path = "/jelmer/dulwich"
+
+        c = HttpGitClient.from_parsedurl(urlparse(url))
+        reconstructed_url = c.get_url(path)
+
+        # Credentials should be preserved in the URL
+        self.assertIn(username, reconstructed_url)
+        self.assertEqual(
+            f"https://{username}@github.com/jelmer/dulwich", reconstructed_url
+        )
+
+    def test_get_url_preserves_credentials_with_password_from_url(self) -> None:
+        """Test that username:password from URL are preserved in get_url()."""
+        username = "user"
+        password = "pass"
+        url = f"https://{username}:{password}@github.com/jelmer/dulwich"
+        path = "/jelmer/dulwich"
+
+        c = HttpGitClient.from_parsedurl(urlparse(url))
+        reconstructed_url = c.get_url(path)
+
+        # Both username and password should be preserved
+        self.assertIn(username, reconstructed_url)
+        self.assertIn(password, reconstructed_url)
+        self.assertEqual(
+            f"https://{username}:{password}@github.com/jelmer/dulwich",
+            reconstructed_url,
+        )
+
+    def test_get_url_preserves_special_chars_in_credentials(self) -> None:
+        """Test that special characters in credentials are properly escaped."""
+        # URL-encoded credentials with special characters
+        original_username = "user@domain"
+        original_password = "p@ss:word"
+        quoted_username = urlquote(original_username, safe="")
+        quoted_password = urlquote(original_password, safe="")
+
+        url = f"https://{quoted_username}:{quoted_password}@github.com/jelmer/dulwich"
+        path = "/jelmer/dulwich"
+
+        c = HttpGitClient.from_parsedurl(urlparse(url))
+        reconstructed_url = c.get_url(path)
+
+        # The reconstructed URL should have properly escaped credentials
+        self.assertIn(quoted_username, reconstructed_url)
+        self.assertIn(quoted_password, reconstructed_url)
+        # Verify the URL is valid by parsing it back
+        parsed = urlparse(reconstructed_url)
+        from urllib.parse import unquote
+
+        self.assertEqual(unquote(parsed.username), original_username)
+        self.assertEqual(unquote(parsed.password), original_password)
+
+    def test_get_url_explicit_credentials_not_in_url(self) -> None:
+        """Test that explicitly passed credentials are NOT included in get_url()."""
+        # When credentials are passed explicitly (not from URL),
+        # they should NOT appear in get_url() for security
+        base_url = "https://github.com/jelmer/dulwich"
+        path = "/jelmer/dulwich"
+        username = "explicit_user"
+        password = "explicit_pass"
+
+        c = HttpGitClient(base_url, username=username, password=password)
+        url = c.get_url(path)
+
+        # Credentials should NOT be in the URL
+        self.assertNotIn(username, url)
+        self.assertNotIn(password, url)
+        self.assertEqual("https://github.com/jelmer/dulwich", url)
+
 
 class TCPGitClientTests(TestCase):
     def test_get_url(self) -> None: