Pārlūkot izejas kodu

Simplify gitignore parent exclusion logic (#1728)

Reduce complexity while maintaining exact Git-compatible behavior:
- Streamline _check_parent_exclusion function by combining conditions
- Consolidate _pattern_excludes_parent cases into cleaner flow
- Simplify Pattern.match method with more concise conditionals
- Remove redundant comments and intermediate variables

Fixes #1721
Jelmer Vernooij 2 nedēļas atpakaļ
vecāks
revīzija
6d02f7e35e
3 mainītis faili ar 365 papildinājumiem un 55 dzēšanām
  1. 67 54
      dulwich/ignore.py
  2. 58 1
      tests/compat/test_check_ignore.py
  3. 240 0
      tests/test_ignore.py

+ 67 - 54
dulwich/ignore.py

@@ -40,12 +40,11 @@ from .config import Config, get_xdg_config_home_path
 
 def _pattern_to_str(pattern: Union["Pattern", bytes, str]) -> str:
     """Convert a pattern to string, handling both Pattern objects and raw patterns."""
-    if hasattr(pattern, "pattern"):
-        pattern_bytes = pattern.pattern
+    if isinstance(pattern, Pattern):
+        pattern_data: Union[bytes, str] = pattern.pattern
     else:
-        pattern_bytes = pattern
-
-    return pattern_bytes.decode() if isinstance(pattern_bytes, bytes) else pattern_bytes
+        pattern_data = pattern
+    return pattern_data.decode() if isinstance(pattern_data, bytes) else pattern_data
 
 
 def _check_parent_exclusion(path: str, matching_patterns: list) -> bool:
@@ -58,55 +57,65 @@ def _check_parent_exclusion(path: str, matching_patterns: list) -> bool:
     Returns:
         True if parent exclusion applies (negation should be ineffective), False otherwise
     """
-    # Find the final negation pattern that would include this file
-    final_negation_pattern = None
-    for pattern in reversed(matching_patterns):
-        if not pattern.is_exclude:  # is_exclude=False means negation/inclusion
-            final_negation_pattern = pattern
-            break
-
-    if not final_negation_pattern:
-        return False  # No negation to check
-
-    final_pattern_str = _pattern_to_str(final_negation_pattern)
-
-    # Check each exclusion pattern to see if it excludes a parent directory
-    for pattern in matching_patterns:
-        if not pattern.is_exclude:  # Skip negations
-            continue
-
-        pattern_str = _pattern_to_str(pattern)
+    # Find the last negation pattern
+    final_negation = next(
+        (p for p in reversed(matching_patterns) if not p.is_exclude), None
+    )
+    if not final_negation:
+        return False
 
-        if _pattern_excludes_parent(pattern_str, path, final_pattern_str):
-            return True
+    final_pattern_str = _pattern_to_str(final_negation)
 
-    return False  # No parent exclusion applies
+    # Check if any exclusion pattern excludes a parent directory
+    return any(
+        pattern.is_exclude
+        and _pattern_excludes_parent(_pattern_to_str(pattern), path, final_pattern_str)
+        for pattern in matching_patterns
+    )
 
 
 def _pattern_excludes_parent(
     pattern_str: str, path: str, final_pattern_str: str
 ) -> bool:
     """Check if a pattern excludes a parent directory of the given path."""
-    # Case 1: Direct directory exclusion (pattern ending with /)
-    if pattern_str.endswith("/"):
-        excluded_dir = pattern_str[:-1]  # Remove trailing /
-        return "/" in path and path.startswith(excluded_dir + "/")
-
-    # Case 2: Recursive exclusion patterns (**/dir/**)
+    # Handle **/middle/** patterns
     if pattern_str.startswith("**/") and pattern_str.endswith("/**"):
-        dir_name = pattern_str[3:-3]  # Remove **/ and /**
-        return dir_name != "" and ("/" + dir_name + "/") in ("/" + path)
+        middle = pattern_str[3:-3]
+        return f"/{middle}/" in f"/{path}" or path.startswith(f"{middle}/")
 
-    # Case 3: Directory glob patterns (dir/**)
+    # Handle dir/** patterns
     if pattern_str.endswith("/**") and not pattern_str.startswith("**/"):
-        dir_prefix = pattern_str[:-3]  # Remove /**
-        if path.startswith(dir_prefix + "/"):
-            # Check if this is a nested path (more than one level under dir_prefix)
-            remaining_path = path[len(dir_prefix + "/") :]
-            if "/" in remaining_path:
-                # This is a nested path - parent directory exclusion applies
-                # BUT only for directory negations, not file negations
-                return final_pattern_str.endswith("/")
+        base_dir = pattern_str[:-3]
+        if not path.startswith(base_dir + "/"):
+            return False
+
+        remaining = path[len(base_dir) + 1 :]
+
+        # Special case: dir/** allows immediate child file negations
+        if (
+            not path.endswith("/")
+            and final_pattern_str.startswith("!")
+            and "/" not in remaining
+        ):
+            neg_pattern = final_pattern_str[1:]
+            if neg_pattern == path or ("*" in neg_pattern and "**" not in neg_pattern):
+                return False
+
+        # Nested files with ** negation patterns
+        if "**" in final_pattern_str and Pattern(final_pattern_str[1:].encode()).match(
+            path.encode()
+        ):
+            return False
+
+        return True
+
+    # Directory patterns (ending with /) can exclude parent directories
+    if pattern_str.endswith("/") and "/" in path:
+        p = Pattern(pattern_str.encode())
+        parts = path.split("/")
+        return any(
+            p.match(("/".join(parts[:i]) + "/").encode()) for i in range(1, len(parts))
+        )
 
     return False
 
@@ -347,17 +356,22 @@ class Pattern:
           path: Path to match (relative to ignore location)
         Returns: boolean
         """
+        # For negation directory patterns (e.g., !dir/), only match directories
+        if self.is_directory_only and not self.is_exclude and not path.endswith(b"/"):
+            return False
+
+        # Check if the regex matches
         if self._re.match(path):
             return True
 
-        # Special handling for directory patterns that exclude files under them
-        if self.is_directory_only and self.is_exclude:
-            # For exclusion directory patterns, also match files under the directory
-            if not path.endswith(b"/"):
-                # This is a file - check if it's under any directory that matches the pattern
-                path_dir = path.rsplit(b"/", 1)[0] + b"/"
-                if len(path.split(b"/")) > 1 and self._re.match(path_dir):
-                    return True
+        # For exclusion directory patterns, also match files under the directory
+        if (
+            self.is_directory_only
+            and self.is_exclude
+            and not path.endswith(b"/")
+            and b"/" in path
+        ):
+            return bool(self._re.match(path.rsplit(b"/", 1)[0] + b"/"))
 
         return False
 
@@ -462,12 +476,11 @@ class IgnoreFilterStack:
           None if the file is not mentioned, True if it is included,
           False if it is explicitly excluded.
         """
-        status = None
         for filter in self._filters:
             status = filter.is_ignored(path)
             if status is not None:
                 return status
-        return status
+        return None
 
 
 def default_user_ignore_filter_path(config: Config) -> str:
@@ -592,7 +605,7 @@ class IgnoreFilterManager:
         patterns for its subdirectories, then the directory itself should not
         be ignored (to allow traversal).
         """
-        # Get the last pattern that determined the result
+        # Original logic for traversal check
         last_excluding_pattern = None
         for match in matches:
             if match.is_exclude:

+ 58 - 1
tests/compat/test_check_ignore.py

@@ -1096,11 +1096,68 @@ class CheckIgnoreCompatTestCase(CompatTestCase):
             # Verify unquoted paths contain the expected files
             expected_unquoted = {"тест.txt", "файл.测试", "mixed_тест.log"}
             self.assertEqual(unquoted_ignored, expected_unquoted)
-
         except (UnicodeEncodeError, OSError):
             # Skip test if filesystem doesn't support unicode
             self.skipTest("Filesystem doesn't support unicode filenames")
 
+    def test_parent_exclusion_wildcard_directories(self) -> None:
+        """Test parent directory exclusion with wildcard patterns (issue #1721)."""
+        self._write_gitignore("*/build/\n!*/build/important.txt\n")
+        self._create_dir("src/build")
+        self._create_dir("test/build")
+        self._create_file("src/build/important.txt")
+        self._create_file("test/build/important.txt")
+        self._create_file("src/build/other.txt")
+
+        paths = [
+            "src/build/",
+            "src/build/important.txt",
+            "src/build/other.txt",
+            "test/build/",
+            "test/build/important.txt",
+        ]
+        self._assert_ignore_match(paths)
+
+    def test_parent_exclusion_double_asterisk(self) -> None:
+        """Test parent directory exclusion with ** patterns."""
+        self._write_gitignore("**/node_modules/\n!**/node_modules/keep.txt\n")
+        self._create_dir("node_modules")
+        self._create_dir("src/node_modules")
+        self._create_dir("deep/nested/node_modules")
+        self._create_file("node_modules/keep.txt")
+        self._create_file("src/node_modules/keep.txt")
+        self._create_file("deep/nested/node_modules/keep.txt")
+
+        paths = [
+            "node_modules/",
+            "node_modules/keep.txt",
+            "src/node_modules/",
+            "src/node_modules/keep.txt",
+            "deep/nested/node_modules/",
+            "deep/nested/node_modules/keep.txt",
+        ]
+        self._assert_ignore_match(paths)
+
+    def test_glob_pattern_base_directory(self) -> None:
+        """Test that dir/** doesn't match dir/ itself."""
+        self._write_gitignore("logs/**\n!logs/important.log\n!logs/keep/\n")
+        self._create_dir("logs")
+        self._create_dir("logs/keep")
+        self._create_dir("logs/subdir")
+        self._create_file("logs/important.log")
+        self._create_file("logs/other.log")
+        self._create_file("logs/subdir/file.txt")
+
+        paths = [
+            "logs/",
+            "logs/important.log",
+            "logs/other.log",
+            "logs/keep/",
+            "logs/subdir/",
+            "logs/subdir/file.txt",
+        ]
+        self._assert_ignore_match(paths)
+
     def _git_check_ignore_quoted(self, paths: list[str]) -> set[str]:
         """Run git check-ignore with default quoting and return set of ignored paths."""
         try:

+ 240 - 0
tests/test_ignore.py

@@ -145,6 +145,108 @@ class MatchPatternTests(TestCase):
             )
 
 
+class ParentExclusionTests(TestCase):
+    """Tests for parent directory exclusion helper functions."""
+
+    def test_check_parent_exclusion_direct_directory(self) -> None:
+        """Test _check_parent_exclusion with direct directory exclusion."""
+        from dulwich.ignore import Pattern, _check_parent_exclusion
+
+        # Pattern: dir/, !dir/file.txt
+        patterns = [Pattern(b"dir/"), Pattern(b"!dir/file.txt")]
+
+        # dir/file.txt has parent 'dir' excluded
+        self.assertTrue(_check_parent_exclusion("dir/file.txt", patterns))
+
+        # dir/subdir/file.txt also has parent 'dir' excluded
+        self.assertTrue(_check_parent_exclusion("dir/subdir/file.txt", patterns))
+
+        # other/file.txt has no parent excluded
+        self.assertFalse(_check_parent_exclusion("other/file.txt", patterns))
+
+    def test_check_parent_exclusion_no_negation(self) -> None:
+        """Test _check_parent_exclusion when there's no negation pattern."""
+        from dulwich.ignore import Pattern, _check_parent_exclusion
+
+        # Only exclusion patterns
+        patterns = [Pattern(b"*.log"), Pattern(b"build/")]
+
+        # No negation pattern, so no parent exclusion check needed
+        self.assertFalse(_check_parent_exclusion("build/file.txt", patterns))
+
+    def test_pattern_excludes_parent_directory_slash(self) -> None:
+        """Test _pattern_excludes_parent for patterns ending with /."""
+        from dulwich.ignore import _pattern_excludes_parent
+
+        # Pattern: parent/
+        self.assertTrue(
+            _pattern_excludes_parent("parent/", "parent/file.txt", "!parent/file.txt")
+        )
+        self.assertTrue(
+            _pattern_excludes_parent(
+                "parent/", "parent/sub/file.txt", "!parent/sub/file.txt"
+            )
+        )
+        self.assertFalse(
+            _pattern_excludes_parent("parent/", "other/file.txt", "!other/file.txt")
+        )
+        self.assertFalse(
+            _pattern_excludes_parent("parent/", "parent", "!parent")
+        )  # No / in path
+
+    def test_pattern_excludes_parent_double_asterisk(self) -> None:
+        """Test _pattern_excludes_parent for **/ patterns."""
+        from dulwich.ignore import _pattern_excludes_parent
+
+        # Pattern: **/node_modules/**
+        self.assertTrue(
+            _pattern_excludes_parent(
+                "**/node_modules/**",
+                "foo/node_modules/bar/file.txt",
+                "!foo/node_modules/bar/file.txt",
+            )
+        )
+        self.assertTrue(
+            _pattern_excludes_parent(
+                "**/node_modules/**", "node_modules/file.txt", "!node_modules/file.txt"
+            )
+        )
+        self.assertFalse(
+            _pattern_excludes_parent(
+                "**/node_modules/**", "foo/bar/file.txt", "!foo/bar/file.txt"
+            )
+        )
+
+    def test_pattern_excludes_parent_glob(self) -> None:
+        """Test _pattern_excludes_parent for dir/** patterns."""
+        from dulwich.ignore import _pattern_excludes_parent
+
+        # Pattern: logs/** - allows exact file negations for immediate children
+        self.assertFalse(
+            _pattern_excludes_parent("logs/**", "logs/file.txt", "!logs/file.txt")
+        )
+
+        # Directory negations still have parent exclusion
+        self.assertTrue(
+            _pattern_excludes_parent("logs/**", "logs/keep/", "!logs/keep/")
+        )
+
+        # Non-exact negations have parent exclusion
+        self.assertTrue(
+            _pattern_excludes_parent("logs/**", "logs/keep/", "!logs/keep/file.txt")
+        )
+
+        # Nested paths have parent exclusion
+        self.assertTrue(
+            _pattern_excludes_parent("logs/**", "logs/sub/file.txt", "!logs/sub/")
+        )
+        self.assertTrue(
+            _pattern_excludes_parent(
+                "logs/**", "logs/sub/file.txt", "!logs/sub/file.txt"
+            )
+        )
+
+
 class IgnoreFilterTests(TestCase):
     def test_included(self) -> None:
         filter = IgnoreFilter([b"a.c", b"b.c"])
@@ -328,6 +430,144 @@ class IgnoreFilterManagerTests(TestCase):
             m.is_ignored("data/subdir/")
         )  # Subdirectory should be ignored (matches Git behavior)
 
+    def test_issue_1721_directory_negation_with_double_asterisk(self) -> None:
+        """Test for issue #1721: regression with negated subdirectory patterns using **."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Create .gitignore with the patterns from issue #1721
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"data/**\n")
+            f.write(b"!data/**/\n")
+            f.write(b"!data/**/*.csv\n")
+
+        # Create directory structure
+        os.makedirs(os.path.join(repo.path, "data", "subdir"))
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        # Test the expected behavior - issue #1721 was that data/myfile was not ignored
+        self.assertTrue(
+            m.is_ignored("data/myfile")
+        )  # File should be ignored (fixes issue #1721)
+        self.assertFalse(m.is_ignored("data/"))  # data/ is matched by !data/**/
+        self.assertFalse(
+            m.is_ignored("data/subdir/")
+        )  # Subdirectory is matched by !data/**/
+        # With data/** pattern, Git allows CSV files to be re-included via !data/**/*.csv
+        self.assertFalse(m.is_ignored("data/test.csv"))  # CSV files are not ignored
+        self.assertFalse(
+            m.is_ignored("data/subdir/test.csv")
+        )  # CSV files in subdirs are not ignored
+        self.assertTrue(
+            m.is_ignored("data/subdir/other.txt")
+        )  # Non-CSV files in subdirs are ignored
+
+    def test_parent_directory_exclusion(self) -> None:
+        """Test Git's parent directory exclusion rule.
+
+        Git rule: "It is not possible to re-include a file if a parent directory of that file is excluded."
+        """
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test case 1: Direct parent directory exclusion
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"parent/\n")
+            f.write(b"!parent/file.txt\n")
+            f.write(b"!parent/child/\n")
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        # parent/ is excluded, so files inside cannot be re-included
+        self.assertTrue(m.is_ignored("parent/"))
+        self.assertTrue(m.is_ignored("parent/file.txt"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("parent/child/"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("parent/child/file.txt"))
+
+    def test_parent_exclusion_with_wildcards(self) -> None:
+        """Test parent directory exclusion with wildcard patterns."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test case 2: Parent excluded by wildcard
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"*/build/\n")
+            f.write(b"!*/build/important.txt\n")
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        self.assertTrue(m.is_ignored("src/build/"))
+        self.assertTrue(m.is_ignored("src/build/important.txt"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("test/build/"))
+        self.assertTrue(m.is_ignored("test/build/important.txt"))  # Cannot re-include
+
+    def test_parent_exclusion_with_double_asterisk(self) -> None:
+        """Test parent directory exclusion with ** patterns."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test case 3: Complex ** pattern with parent exclusion
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"**/node_modules/\n")
+            f.write(b"!**/node_modules/keep.txt\n")
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        self.assertTrue(m.is_ignored("node_modules/"))
+        self.assertTrue(m.is_ignored("node_modules/keep.txt"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("src/node_modules/"))
+        self.assertTrue(m.is_ignored("src/node_modules/keep.txt"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("deep/nested/node_modules/"))
+        self.assertTrue(
+            m.is_ignored("deep/nested/node_modules/keep.txt")
+        )  # Cannot re-include
+
+    def test_no_parent_exclusion_with_glob_contents(self) -> None:
+        """Test that dir/** allows specific file negations for immediate children."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test: dir/** allows specific file negations (unlike dir/ which doesn't)
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"logs/**\n")
+            f.write(b"!logs/important.log\n")
+            f.write(b"!logs/keep/\n")
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        # logs/ itself is excluded by logs/**
+        self.assertTrue(m.is_ignored("logs/"))
+        # Specific file negation works with dir/** patterns
+        self.assertFalse(m.is_ignored("logs/important.log"))
+        # Directory negations still don't work (parent exclusion)
+        self.assertTrue(m.is_ignored("logs/keep/"))
+        # Nested paths are ignored
+        self.assertTrue(m.is_ignored("logs/subdir/"))
+        self.assertTrue(m.is_ignored("logs/subdir/file.txt"))
+
+    def test_parent_exclusion_ordering(self) -> None:
+        """Test that parent exclusion depends on pattern ordering."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test case 5: Order matters for parent exclusion
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"!data/important/\n")  # This comes first but won't work
+            f.write(b"data/\n")  # This excludes the parent
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        self.assertTrue(m.is_ignored("data/"))
+        self.assertTrue(m.is_ignored("data/important/"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("data/important/file.txt"))
+
 
 class QuotePathTests(TestCase):
     """Tests for _quote_path function."""