فهرست منبع

refactor(misnomer): rename `match_gitignore_patterns` to `match_sparse_patterns`; clarify docstring

Louis Maddox 8 ماه پیش
والد
کامیت
9f08544ab0
2فایلهای تغییر یافته به همراه46 افزوده شده و 43 حذف شده
  1. 17 12
      dulwich/sparse_patterns.py
  2. 29 31
      tests/test_sparse_patterns.py

+ 17 - 12
dulwich/sparse_patterns.py

@@ -86,8 +86,8 @@ def compute_included_paths_full(index: Index, lines: Sequence[str]) -> set[str]:
     included = set()
     included = set()
     for path_bytes, entry in index.items():
     for path_bytes, entry in index.items():
         path_str = path_bytes.decode("utf-8")
         path_str = path_bytes.decode("utf-8")
-        # For .gitignore logic, match_gitignore_patterns returns True if 'included'
-        if match_gitignore_patterns(path_str, parsed, path_is_dir=False):
+        # For .gitignore logic, match_sparse_patterns returns True if 'included'
+        if match_sparse_patterns(path_str, parsed, path_is_dir=False):
             included.add(path_str)
             included.add(path_str)
     return included
     return included
 
 
@@ -287,7 +287,7 @@ def parse_sparse_patterns(lines: Sequence[str]) -> list[tuple[str, bool, bool, b
     return results
     return results
 
 
 
 
-def match_gitignore_patterns(
+def match_sparse_patterns(
     path_str: str,
     path_str: str,
     parsed_patterns: Sequence[tuple[str, bool, bool, bool]],
     parsed_patterns: Sequence[tuple[str, bool, bool, bool]],
     path_is_dir: bool = False,
     path_is_dir: bool = False,
@@ -299,16 +299,22 @@ def match_gitignore_patterns(
       2. If a pattern matches, we set the "include" state depending on negation.
       2. If a pattern matches, we set the "include" state depending on negation.
       3. Later matches override earlier ones.
       3. Later matches override earlier ones.
 
 
-    In a .gitignore sense, lines that do not start with '!' are "ignore" patterns,
-    lines that start with '!' are "unignore" (re-include). But in sparse checkout,
-    it's effectively reversed: a non-negation line is "include," negation is "exclude."
-    However, many flows still rely on the same final logic: the last matching pattern
+    In a sparse checkout, lines that do not start with '!' are positive patterns,
+    indicating files/directories to check out (include in the index), and those that
+    start with '!' are negative ('negated'), meaning they indicate files not to check
+    out (not included in the index). This is fairly straightforward.
+
+    In a .gitignore, it's the same syntax but with a reverse effect: positive means
+    "ignore" (exclude from the index) and negative means "unignore" (re-include in the
+    index).
+
+    Many routines still rely on the same final logic: the last matching pattern
     decides "excluded" vs. "included."
     decides "excluded" vs. "included."
 
 
     We'll interpret "include" as returning True, "exclude" as returning False.
     We'll interpret "include" as returning True, "exclude" as returning False.
 
 
-    Each pattern can include negation (!), directory-only markers, or be anchored
-    to the start of the path. The last matching pattern determines whether the
+    Each pattern can include negation ('!'), directory-only markers ('/' as suffix), or
+    be anchored ('/' as prefix). The last matching pattern determines whether the
     path is ultimately included or excluded.
     path is ultimately included or excluded.
 
 
     Args:
     Args:
@@ -320,9 +326,8 @@ def match_gitignore_patterns(
     Returns:
     Returns:
       True if the path is included by the last matching pattern, False otherwise.
       True if the path is included by the last matching pattern, False otherwise.
     """
     """
-    # Start by assuming "excluded" (like a .gitignore starts by including everything
-    # until matched, but for sparse-checkout we often treat unmatched as "excluded").
-    # We will flip if we match an "include" pattern.
+    # Start by assuming "excluded". Like how .gitignore initially includes everything
+    # until matched, but reversed: sparse-checkout initially excludes everything.
     is_included = False
     is_included = False
 
 
     for pattern, negation, dir_only, anchored in parsed_patterns:
     for pattern, negation, dir_only, anchored in parsed_patterns:

+ 29 - 31
tests/test_sparse_patterns.py

@@ -37,7 +37,7 @@ from dulwich.sparse_patterns import (
     compute_included_paths_cone,
     compute_included_paths_cone,
     compute_included_paths_full,
     compute_included_paths_full,
     determine_included_paths,
     determine_included_paths,
-    match_gitignore_patterns,
+    match_sparse_patterns,
     parse_sparse_patterns,
     parse_sparse_patterns,
 )
 )
 
 
@@ -69,25 +69,25 @@ class ParseSparsePatternsTests(TestCase):
             "!data/",  # no data dirs anywhere
             "!data/",  # no data dirs anywhere
         ]
         ]
         parsed = parse_sparse_patterns(lines)
         parsed = parse_sparse_patterns(lines)
-        self.assertEqual(len(parsed), 6)
+        self.assertEqual(len(parsed), 8)
 
 
         # Returns a 4-tuple of: (pattern, negation, dir_only, anchored)
         # Returns a 4-tuple of: (pattern, negation, dir_only, anchored)
         self.assertEqual(parsed[0], ("*.py", False, False, False))  # _,_,_
         self.assertEqual(parsed[0], ("*.py", False, False, False))  # _,_,_
         self.assertEqual(parsed[1], ("*.md", True, False, False))  # N,_,_
         self.assertEqual(parsed[1], ("*.md", True, False, False))  # N,_,_
         self.assertEqual(parsed[2], ("docs", False, True, True))  # _,D,A
         self.assertEqual(parsed[2], ("docs", False, True, True))  # _,D,A
         self.assertEqual(parsed[3], ("docs/images", True, True, True))  # N,D,A
         self.assertEqual(parsed[3], ("docs/images", True, True, True))  # N,D,A
-        self.assertEqual(parsed[4], [("src", False, True, False)])  # _,D,_
-        self.assertEqual(parsed[5], [("*.toml", False, False, True)])  # _,_,A
-        self.assertEqual(parsed[6], [("*.bak", True, False, True)])  # N,_,A
-        self.assertEqual(parsed[7], [("data", True, True, False)])  # N,D,_
+        self.assertEqual(parsed[4], ("src", False, True, False))  # _,D,_
+        self.assertEqual(parsed[5], ("*.toml", False, False, True))  # _,_,A
+        self.assertEqual(parsed[6], ("*.bak", True, False, True))  # N,_,A
+        self.assertEqual(parsed[7], ("data", True, True, False))  # N,D,_
 
 
 
 
-class MatchGitignorePatternsTests(TestCase):
-    """Test the match_gitignore_patterns function."""
+class MatchSparsePatternsTests(TestCase):
+    """Test the match_sparse_patterns function."""
 
 
     def test_no_patterns_returns_excluded(self):
     def test_no_patterns_returns_excluded(self):
         """If no patterns are provided, by default we treat the path as excluded."""
         """If no patterns are provided, by default we treat the path as excluded."""
-        self.assertFalse(match_gitignore_patterns("anyfile.py", []))
+        self.assertFalse(match_sparse_patterns("anyfile.py", []))
 
 
     def test_last_match_wins(self):
     def test_last_match_wins(self):
         """Checks that the last pattern to match determines included vs excluded."""
         """Checks that the last pattern to match determines included vs excluded."""
@@ -99,78 +99,76 @@ class MatchGitignorePatternsTests(TestCase):
         )
         )
         # "foo.py" matches first pattern => included
         # "foo.py" matches first pattern => included
         # then matches second pattern => excluded
         # then matches second pattern => excluded
-        self.assertFalse(match_gitignore_patterns("foo.py", parsed))
+        self.assertFalse(match_sparse_patterns("foo.py", parsed))
 
 
     def test_dir_only(self):
     def test_dir_only(self):
         """A pattern with a trailing slash should only match directories and subdirectories."""
         """A pattern with a trailing slash should only match directories and subdirectories."""
         parsed = parse_sparse_patterns(["docs/"])
         parsed = parse_sparse_patterns(["docs/"])
         # Because we set path_is_dir=False, it won't match
         # Because we set path_is_dir=False, it won't match
         self.assertTrue(
         self.assertTrue(
-            match_gitignore_patterns("docs/readme.md", parsed, path_is_dir=False)
+            match_sparse_patterns("docs/readme.md", parsed, path_is_dir=False)
         )
         )
-        self.assertTrue(match_gitignore_patterns("docs", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("docs", parsed, path_is_dir=True))
         # Even if the path name is "docs", if it's a file, won't match:
         # Even if the path name is "docs", if it's a file, won't match:
-        self.assertFalse(match_gitignore_patterns("docs", parsed, path_is_dir=False))
+        self.assertFalse(match_sparse_patterns("docs", parsed, path_is_dir=False))
 
 
     def test_anchored(self):
     def test_anchored(self):
         """Anchored patterns match from the start of the path only."""
         """Anchored patterns match from the start of the path only."""
         parsed = parse_sparse_patterns(["/foo"])
         parsed = parse_sparse_patterns(["/foo"])
-        self.assertTrue(match_gitignore_patterns("foo", parsed))
+        self.assertTrue(match_sparse_patterns("foo", parsed))
         # But "some/foo" doesn't match because anchored requires start
         # But "some/foo" doesn't match because anchored requires start
-        self.assertFalse(match_gitignore_patterns("some/foo", parsed))
+        self.assertFalse(match_sparse_patterns("some/foo", parsed))
 
 
     def test_unanchored_uses_fnmatch(self):
     def test_unanchored_uses_fnmatch(self):
         parsed = parse_sparse_patterns(["foo"])
         parsed = parse_sparse_patterns(["foo"])
-        self.assertTrue(match_gitignore_patterns("some/foo", parsed))
-        self.assertFalse(match_gitignore_patterns("some/bar", parsed))
+        self.assertTrue(match_sparse_patterns("some/foo", parsed))
+        self.assertFalse(match_sparse_patterns("some/bar", parsed))
 
 
     def test_anchored_empty_pattern(self):
     def test_anchored_empty_pattern(self):
         """Test handling of empty pattern with anchoring (e.g., '/')."""
         """Test handling of empty pattern with anchoring (e.g., '/')."""
         parsed = parse_sparse_patterns(["/"])
         parsed = parse_sparse_patterns(["/"])
         # Check the structure of the parsed empty pattern first
         # Check the structure of the parsed empty pattern first
         self.assertEqual(parsed, [("", False, False, True)])
         self.assertEqual(parsed, [("", False, False, True)])
-        # When the pattern is empty with anchoring, it's continued (skipped) in match_gitignore_patterns
+        # When the pattern is empty with anchoring, it's continued (skipped) in match_sparse_patterns
         # for non-empty paths but for empty string it might match due to empty string comparisons
         # for non-empty paths but for empty string it might match due to empty string comparisons
-        self.assertFalse(match_gitignore_patterns("foo", parsed))
+        self.assertFalse(match_sparse_patterns("foo", parsed))
         # An empty string with empty pattern will match (implementation detail)
         # An empty string with empty pattern will match (implementation detail)
-        self.assertTrue(match_gitignore_patterns("", parsed))
+        self.assertTrue(match_sparse_patterns("", parsed))
 
 
     def test_anchored_dir_only_exact_match(self):
     def test_anchored_dir_only_exact_match(self):
         """Test anchored directory-only patterns with exact matching."""
         """Test anchored directory-only patterns with exact matching."""
         parsed = parse_sparse_patterns(["/docs/"])
         parsed = parse_sparse_patterns(["/docs/"])
         # Test with exact match "docs" and path_is_dir=True
         # Test with exact match "docs" and path_is_dir=True
-        self.assertTrue(match_gitignore_patterns("docs", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("docs", parsed, path_is_dir=True))
         # Test with "docs/" (exact match + trailing slash)
         # Test with "docs/" (exact match + trailing slash)
-        self.assertTrue(match_gitignore_patterns("docs/", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("docs/", parsed, path_is_dir=True))
 
 
     def test_complex_anchored_patterns(self):
     def test_complex_anchored_patterns(self):
         """Test more complex anchored pattern matching."""
         """Test more complex anchored pattern matching."""
         parsed = parse_sparse_patterns(["/dir/subdir"])
         parsed = parse_sparse_patterns(["/dir/subdir"])
         # Test exact match
         # Test exact match
-        self.assertTrue(match_gitignore_patterns("dir/subdir", parsed))
+        self.assertTrue(match_sparse_patterns("dir/subdir", parsed))
         # Test subdirectory path
         # Test subdirectory path
-        self.assertTrue(match_gitignore_patterns("dir/subdir/file.txt", parsed))
+        self.assertTrue(match_sparse_patterns("dir/subdir/file.txt", parsed))
         # Test non-matching path
         # Test non-matching path
-        self.assertFalse(match_gitignore_patterns("otherdir/subdir", parsed))
+        self.assertFalse(match_sparse_patterns("otherdir/subdir", parsed))
 
 
     def test_pattern_matching_edge_cases(self):
     def test_pattern_matching_edge_cases(self):
         """Test various edge cases in pattern matching."""
         """Test various edge cases in pattern matching."""
         # Test exact equality with an anchored pattern
         # Test exact equality with an anchored pattern
         parsed = parse_sparse_patterns(["/foo"])
         parsed = parse_sparse_patterns(["/foo"])
-        self.assertTrue(match_gitignore_patterns("foo", parsed))
+        self.assertTrue(match_sparse_patterns("foo", parsed))
 
 
         # Test with path_is_dir=True
         # Test with path_is_dir=True
-        self.assertTrue(match_gitignore_patterns("foo", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("foo", parsed, path_is_dir=True))
 
 
         # Test exact match with pattern with dir_only=True
         # Test exact match with pattern with dir_only=True
         parsed = parse_sparse_patterns(["/bar/"])
         parsed = parse_sparse_patterns(["/bar/"])
-        self.assertTrue(match_gitignore_patterns("bar", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("bar", parsed, path_is_dir=True))
 
 
         # Test startswith match for anchored pattern
         # Test startswith match for anchored pattern
         parsed = parse_sparse_patterns(["/prefix"])
         parsed = parse_sparse_patterns(["/prefix"])
-        self.assertTrue(
-            match_gitignore_patterns("prefix/subdirectory/file.txt", parsed)
-        )
+        self.assertTrue(match_sparse_patterns("prefix/subdirectory/file.txt", parsed))
 
 
 
 
 class ComputeIncludedPathsFullTests(TestCase):
 class ComputeIncludedPathsFullTests(TestCase):