Просмотр исходного кода

refactor(misnomer): rename `match_gitignore_patterns` to `match_sparse_patterns`; clarify docstring

Louis Maddox 8 месяцев назад
Родитель
Сommit
9f08544ab0
2 измененных файлов с 46 добавлено и 43 удалено
  1. 17 12
      dulwich/sparse_patterns.py
  2. 29 31
      tests/test_sparse_patterns.py

+ 17 - 12
dulwich/sparse_patterns.py

@@ -86,8 +86,8 @@ def compute_included_paths_full(index: Index, lines: Sequence[str]) -> set[str]:
     included = set()
     for path_bytes, entry in index.items():
         path_str = path_bytes.decode("utf-8")
-        # For .gitignore logic, match_gitignore_patterns returns True if 'included'
-        if match_gitignore_patterns(path_str, parsed, path_is_dir=False):
+        # For .gitignore logic, match_sparse_patterns returns True if 'included'
+        if match_sparse_patterns(path_str, parsed, path_is_dir=False):
             included.add(path_str)
     return included
 
@@ -287,7 +287,7 @@ def parse_sparse_patterns(lines: Sequence[str]) -> list[tuple[str, bool, bool, b
     return results
 
 
-def match_gitignore_patterns(
+def match_sparse_patterns(
     path_str: str,
     parsed_patterns: Sequence[tuple[str, bool, bool, bool]],
     path_is_dir: bool = False,
@@ -299,16 +299,22 @@ def match_gitignore_patterns(
       2. If a pattern matches, we set the "include" state depending on negation.
       3. Later matches override earlier ones.
 
-    In a .gitignore sense, lines that do not start with '!' are "ignore" patterns,
-    lines that start with '!' are "unignore" (re-include). But in sparse checkout,
-    it's effectively reversed: a non-negation line is "include," negation is "exclude."
-    However, many flows still rely on the same final logic: the last matching pattern
+    In a sparse checkout, lines that do not start with '!' are positive patterns,
+    indicating files/directories to check out (include in the index), and those that
+    start with '!' are negative ('negated'), meaning they indicate files not to check
+    out (not included in the index). This is fairly straightforward.
+
+    In a .gitignore, it's the same syntax but with a reverse effect: positive means
+    "ignore" (exclude from the index) and negative means "unignore" (re-include in the
+    index).
+
+    Many routines still rely on the same final logic: the last matching pattern
     decides "excluded" vs. "included."
 
     We'll interpret "include" as returning True, "exclude" as returning False.
 
-    Each pattern can include negation (!), directory-only markers, or be anchored
-    to the start of the path. The last matching pattern determines whether the
+    Each pattern can include negation ('!'), directory-only markers ('/' as suffix), or
+    be anchored ('/' as prefix). The last matching pattern determines whether the
     path is ultimately included or excluded.
 
     Args:
@@ -320,9 +326,8 @@ def match_gitignore_patterns(
     Returns:
       True if the path is included by the last matching pattern, False otherwise.
     """
-    # Start by assuming "excluded" (like a .gitignore starts by including everything
-    # until matched, but for sparse-checkout we often treat unmatched as "excluded").
-    # We will flip if we match an "include" pattern.
+    # Start by assuming "excluded". Like how .gitignore initially includes everything
+    # until matched, but reversed: sparse-checkout initially excludes everything.
     is_included = False
 
     for pattern, negation, dir_only, anchored in parsed_patterns:

+ 29 - 31
tests/test_sparse_patterns.py

@@ -37,7 +37,7 @@ from dulwich.sparse_patterns import (
     compute_included_paths_cone,
     compute_included_paths_full,
     determine_included_paths,
-    match_gitignore_patterns,
+    match_sparse_patterns,
     parse_sparse_patterns,
 )
 
@@ -69,25 +69,25 @@ class ParseSparsePatternsTests(TestCase):
             "!data/",  # no data dirs anywhere
         ]
         parsed = parse_sparse_patterns(lines)
-        self.assertEqual(len(parsed), 6)
+        self.assertEqual(len(parsed), 8)
 
         # Returns a 4-tuple of: (pattern, negation, dir_only, anchored)
         self.assertEqual(parsed[0], ("*.py", False, False, False))  # _,_,_
         self.assertEqual(parsed[1], ("*.md", True, False, False))  # N,_,_
         self.assertEqual(parsed[2], ("docs", False, True, True))  # _,D,A
         self.assertEqual(parsed[3], ("docs/images", True, True, True))  # N,D,A
-        self.assertEqual(parsed[4], [("src", False, True, False)])  # _,D,_
-        self.assertEqual(parsed[5], [("*.toml", False, False, True)])  # _,_,A
-        self.assertEqual(parsed[6], [("*.bak", True, False, True)])  # N,_,A
-        self.assertEqual(parsed[7], [("data", True, True, False)])  # N,D,_
+        self.assertEqual(parsed[4], ("src", False, True, False))  # _,D,_
+        self.assertEqual(parsed[5], ("*.toml", False, False, True))  # _,_,A
+        self.assertEqual(parsed[6], ("*.bak", True, False, True))  # N,_,A
+        self.assertEqual(parsed[7], ("data", True, True, False))  # N,D,_
 
 
-class MatchGitignorePatternsTests(TestCase):
-    """Test the match_gitignore_patterns function."""
+class MatchSparsePatternsTests(TestCase):
+    """Test the match_sparse_patterns function."""
 
     def test_no_patterns_returns_excluded(self):
         """If no patterns are provided, by default we treat the path as excluded."""
-        self.assertFalse(match_gitignore_patterns("anyfile.py", []))
+        self.assertFalse(match_sparse_patterns("anyfile.py", []))
 
     def test_last_match_wins(self):
         """Checks that the last pattern to match determines included vs excluded."""
@@ -99,78 +99,76 @@ class MatchGitignorePatternsTests(TestCase):
         )
         # "foo.py" matches first pattern => included
         # then matches second pattern => excluded
-        self.assertFalse(match_gitignore_patterns("foo.py", parsed))
+        self.assertFalse(match_sparse_patterns("foo.py", parsed))
 
     def test_dir_only(self):
         """A pattern with a trailing slash should only match directories and subdirectories."""
         parsed = parse_sparse_patterns(["docs/"])
         # Because we set path_is_dir=False, it won't match
         self.assertTrue(
-            match_gitignore_patterns("docs/readme.md", parsed, path_is_dir=False)
+            match_sparse_patterns("docs/readme.md", parsed, path_is_dir=False)
         )
-        self.assertTrue(match_gitignore_patterns("docs", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("docs", parsed, path_is_dir=True))
         # Even if the path name is "docs", if it's a file, won't match:
-        self.assertFalse(match_gitignore_patterns("docs", parsed, path_is_dir=False))
+        self.assertFalse(match_sparse_patterns("docs", parsed, path_is_dir=False))
 
     def test_anchored(self):
         """Anchored patterns match from the start of the path only."""
         parsed = parse_sparse_patterns(["/foo"])
-        self.assertTrue(match_gitignore_patterns("foo", parsed))
+        self.assertTrue(match_sparse_patterns("foo", parsed))
         # But "some/foo" doesn't match because anchored requires start
-        self.assertFalse(match_gitignore_patterns("some/foo", parsed))
+        self.assertFalse(match_sparse_patterns("some/foo", parsed))
 
     def test_unanchored_uses_fnmatch(self):
         parsed = parse_sparse_patterns(["foo"])
-        self.assertTrue(match_gitignore_patterns("some/foo", parsed))
-        self.assertFalse(match_gitignore_patterns("some/bar", parsed))
+        self.assertTrue(match_sparse_patterns("some/foo", parsed))
+        self.assertFalse(match_sparse_patterns("some/bar", parsed))
 
     def test_anchored_empty_pattern(self):
         """Test handling of empty pattern with anchoring (e.g., '/')."""
         parsed = parse_sparse_patterns(["/"])
         # Check the structure of the parsed empty pattern first
         self.assertEqual(parsed, [("", False, False, True)])
-        # When the pattern is empty with anchoring, it's continued (skipped) in match_gitignore_patterns
+        # When the pattern is empty with anchoring, it's continued (skipped) in match_sparse_patterns
         # for non-empty paths but for empty string it might match due to empty string comparisons
-        self.assertFalse(match_gitignore_patterns("foo", parsed))
+        self.assertFalse(match_sparse_patterns("foo", parsed))
         # An empty string with empty pattern will match (implementation detail)
-        self.assertTrue(match_gitignore_patterns("", parsed))
+        self.assertTrue(match_sparse_patterns("", parsed))
 
     def test_anchored_dir_only_exact_match(self):
         """Test anchored directory-only patterns with exact matching."""
         parsed = parse_sparse_patterns(["/docs/"])
         # Test with exact match "docs" and path_is_dir=True
-        self.assertTrue(match_gitignore_patterns("docs", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("docs", parsed, path_is_dir=True))
         # Test with "docs/" (exact match + trailing slash)
-        self.assertTrue(match_gitignore_patterns("docs/", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("docs/", parsed, path_is_dir=True))
 
     def test_complex_anchored_patterns(self):
         """Test more complex anchored pattern matching."""
         parsed = parse_sparse_patterns(["/dir/subdir"])
         # Test exact match
-        self.assertTrue(match_gitignore_patterns("dir/subdir", parsed))
+        self.assertTrue(match_sparse_patterns("dir/subdir", parsed))
         # Test subdirectory path
-        self.assertTrue(match_gitignore_patterns("dir/subdir/file.txt", parsed))
+        self.assertTrue(match_sparse_patterns("dir/subdir/file.txt", parsed))
         # Test non-matching path
-        self.assertFalse(match_gitignore_patterns("otherdir/subdir", parsed))
+        self.assertFalse(match_sparse_patterns("otherdir/subdir", parsed))
 
     def test_pattern_matching_edge_cases(self):
         """Test various edge cases in pattern matching."""
         # Test exact equality with an anchored pattern
         parsed = parse_sparse_patterns(["/foo"])
-        self.assertTrue(match_gitignore_patterns("foo", parsed))
+        self.assertTrue(match_sparse_patterns("foo", parsed))
 
         # Test with path_is_dir=True
-        self.assertTrue(match_gitignore_patterns("foo", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("foo", parsed, path_is_dir=True))
 
         # Test exact match with pattern with dir_only=True
         parsed = parse_sparse_patterns(["/bar/"])
-        self.assertTrue(match_gitignore_patterns("bar", parsed, path_is_dir=True))
+        self.assertTrue(match_sparse_patterns("bar", parsed, path_is_dir=True))
 
         # Test startswith match for anchored pattern
         parsed = parse_sparse_patterns(["/prefix"])
-        self.assertTrue(
-            match_gitignore_patterns("prefix/subdirectory/file.txt", parsed)
-        )
+        self.assertTrue(match_sparse_patterns("prefix/subdirectory/file.txt", parsed))
 
 
 class ComputeIncludedPathsFullTests(TestCase):