Browse Source

Simplify gitignore parent exclusion logic (#1728)

Reduce complexity while maintaining exact Git-compatible behavior:
- Streamline _check_parent_exclusion function by combining conditions
- Consolidate _pattern_excludes_parent cases into cleaner flow
- Simplify Pattern.match method with more concise conditionals
- Remove redundant comments and intermediate variables

Fixes #1721
Jelmer Vernooij 2 weeks ago
parent
commit
6d02f7e35e
3 changed files with 365 additions and 55 deletions
  1. 67 54
      dulwich/ignore.py
  2. 58 1
      tests/compat/test_check_ignore.py
  3. 240 0
      tests/test_ignore.py

+ 67 - 54
dulwich/ignore.py

@@ -40,12 +40,11 @@ from .config import Config, get_xdg_config_home_path
 
 
 def _pattern_to_str(pattern: Union["Pattern", bytes, str]) -> str:
 def _pattern_to_str(pattern: Union["Pattern", bytes, str]) -> str:
     """Convert a pattern to string, handling both Pattern objects and raw patterns."""
     """Convert a pattern to string, handling both Pattern objects and raw patterns."""
-    if hasattr(pattern, "pattern"):
-        pattern_bytes = pattern.pattern
+    if isinstance(pattern, Pattern):
+        pattern_data: Union[bytes, str] = pattern.pattern
     else:
     else:
-        pattern_bytes = pattern
-
-    return pattern_bytes.decode() if isinstance(pattern_bytes, bytes) else pattern_bytes
+        pattern_data = pattern
+    return pattern_data.decode() if isinstance(pattern_data, bytes) else pattern_data
 
 
 
 
 def _check_parent_exclusion(path: str, matching_patterns: list) -> bool:
 def _check_parent_exclusion(path: str, matching_patterns: list) -> bool:
@@ -58,55 +57,65 @@ def _check_parent_exclusion(path: str, matching_patterns: list) -> bool:
     Returns:
     Returns:
         True if parent exclusion applies (negation should be ineffective), False otherwise
         True if parent exclusion applies (negation should be ineffective), False otherwise
     """
     """
-    # Find the final negation pattern that would include this file
-    final_negation_pattern = None
-    for pattern in reversed(matching_patterns):
-        if not pattern.is_exclude:  # is_exclude=False means negation/inclusion
-            final_negation_pattern = pattern
-            break
-
-    if not final_negation_pattern:
-        return False  # No negation to check
-
-    final_pattern_str = _pattern_to_str(final_negation_pattern)
-
-    # Check each exclusion pattern to see if it excludes a parent directory
-    for pattern in matching_patterns:
-        if not pattern.is_exclude:  # Skip negations
-            continue
-
-        pattern_str = _pattern_to_str(pattern)
+    # Find the last negation pattern
+    final_negation = next(
+        (p for p in reversed(matching_patterns) if not p.is_exclude), None
+    )
+    if not final_negation:
+        return False
 
 
-        if _pattern_excludes_parent(pattern_str, path, final_pattern_str):
-            return True
+    final_pattern_str = _pattern_to_str(final_negation)
 
 
-    return False  # No parent exclusion applies
+    # Check if any exclusion pattern excludes a parent directory
+    return any(
+        pattern.is_exclude
+        and _pattern_excludes_parent(_pattern_to_str(pattern), path, final_pattern_str)
+        for pattern in matching_patterns
+    )
 
 
 
 
 def _pattern_excludes_parent(
 def _pattern_excludes_parent(
     pattern_str: str, path: str, final_pattern_str: str
     pattern_str: str, path: str, final_pattern_str: str
 ) -> bool:
 ) -> bool:
     """Check if a pattern excludes a parent directory of the given path."""
     """Check if a pattern excludes a parent directory of the given path."""
-    # Case 1: Direct directory exclusion (pattern ending with /)
-    if pattern_str.endswith("/"):
-        excluded_dir = pattern_str[:-1]  # Remove trailing /
-        return "/" in path and path.startswith(excluded_dir + "/")
-
-    # Case 2: Recursive exclusion patterns (**/dir/**)
+    # Handle **/middle/** patterns
     if pattern_str.startswith("**/") and pattern_str.endswith("/**"):
     if pattern_str.startswith("**/") and pattern_str.endswith("/**"):
-        dir_name = pattern_str[3:-3]  # Remove **/ and /**
-        return dir_name != "" and ("/" + dir_name + "/") in ("/" + path)
+        middle = pattern_str[3:-3]
+        return f"/{middle}/" in f"/{path}" or path.startswith(f"{middle}/")
 
 
-    # Case 3: Directory glob patterns (dir/**)
+    # Handle dir/** patterns
     if pattern_str.endswith("/**") and not pattern_str.startswith("**/"):
     if pattern_str.endswith("/**") and not pattern_str.startswith("**/"):
-        dir_prefix = pattern_str[:-3]  # Remove /**
-        if path.startswith(dir_prefix + "/"):
-            # Check if this is a nested path (more than one level under dir_prefix)
-            remaining_path = path[len(dir_prefix + "/") :]
-            if "/" in remaining_path:
-                # This is a nested path - parent directory exclusion applies
-                # BUT only for directory negations, not file negations
-                return final_pattern_str.endswith("/")
+        base_dir = pattern_str[:-3]
+        if not path.startswith(base_dir + "/"):
+            return False
+
+        remaining = path[len(base_dir) + 1 :]
+
+        # Special case: dir/** allows immediate child file negations
+        if (
+            not path.endswith("/")
+            and final_pattern_str.startswith("!")
+            and "/" not in remaining
+        ):
+            neg_pattern = final_pattern_str[1:]
+            if neg_pattern == path or ("*" in neg_pattern and "**" not in neg_pattern):
+                return False
+
+        # Nested files with ** negation patterns
+        if "**" in final_pattern_str and Pattern(final_pattern_str[1:].encode()).match(
+            path.encode()
+        ):
+            return False
+
+        return True
+
+    # Directory patterns (ending with /) can exclude parent directories
+    if pattern_str.endswith("/") and "/" in path:
+        p = Pattern(pattern_str.encode())
+        parts = path.split("/")
+        return any(
+            p.match(("/".join(parts[:i]) + "/").encode()) for i in range(1, len(parts))
+        )
 
 
     return False
     return False
 
 
@@ -347,17 +356,22 @@ class Pattern:
           path: Path to match (relative to ignore location)
           path: Path to match (relative to ignore location)
         Returns: boolean
         Returns: boolean
         """
         """
+        # For negation directory patterns (e.g., !dir/), only match directories
+        if self.is_directory_only and not self.is_exclude and not path.endswith(b"/"):
+            return False
+
+        # Check if the regex matches
         if self._re.match(path):
         if self._re.match(path):
             return True
             return True
 
 
-        # Special handling for directory patterns that exclude files under them
-        if self.is_directory_only and self.is_exclude:
-            # For exclusion directory patterns, also match files under the directory
-            if not path.endswith(b"/"):
-                # This is a file - check if it's under any directory that matches the pattern
-                path_dir = path.rsplit(b"/", 1)[0] + b"/"
-                if len(path.split(b"/")) > 1 and self._re.match(path_dir):
-                    return True
+        # For exclusion directory patterns, also match files under the directory
+        if (
+            self.is_directory_only
+            and self.is_exclude
+            and not path.endswith(b"/")
+            and b"/" in path
+        ):
+            return bool(self._re.match(path.rsplit(b"/", 1)[0] + b"/"))
 
 
         return False
         return False
 
 
@@ -462,12 +476,11 @@ class IgnoreFilterStack:
           None if the file is not mentioned, True if it is included,
           None if the file is not mentioned, True if it is included,
           False if it is explicitly excluded.
           False if it is explicitly excluded.
         """
         """
-        status = None
         for filter in self._filters:
         for filter in self._filters:
             status = filter.is_ignored(path)
             status = filter.is_ignored(path)
             if status is not None:
             if status is not None:
                 return status
                 return status
-        return status
+        return None
 
 
 
 
 def default_user_ignore_filter_path(config: Config) -> str:
 def default_user_ignore_filter_path(config: Config) -> str:
@@ -592,7 +605,7 @@ class IgnoreFilterManager:
         patterns for its subdirectories, then the directory itself should not
         patterns for its subdirectories, then the directory itself should not
         be ignored (to allow traversal).
         be ignored (to allow traversal).
         """
         """
-        # Get the last pattern that determined the result
+        # Original logic for traversal check
         last_excluding_pattern = None
         last_excluding_pattern = None
         for match in matches:
         for match in matches:
             if match.is_exclude:
             if match.is_exclude:

+ 58 - 1
tests/compat/test_check_ignore.py

@@ -1096,11 +1096,68 @@ class CheckIgnoreCompatTestCase(CompatTestCase):
             # Verify unquoted paths contain the expected files
             # Verify unquoted paths contain the expected files
             expected_unquoted = {"тест.txt", "файл.测试", "mixed_тест.log"}
             expected_unquoted = {"тест.txt", "файл.测试", "mixed_тест.log"}
             self.assertEqual(unquoted_ignored, expected_unquoted)
             self.assertEqual(unquoted_ignored, expected_unquoted)
-
         except (UnicodeEncodeError, OSError):
         except (UnicodeEncodeError, OSError):
             # Skip test if filesystem doesn't support unicode
             # Skip test if filesystem doesn't support unicode
             self.skipTest("Filesystem doesn't support unicode filenames")
             self.skipTest("Filesystem doesn't support unicode filenames")
 
 
+    def test_parent_exclusion_wildcard_directories(self) -> None:
+        """Test parent directory exclusion with wildcard patterns (issue #1721)."""
+        self._write_gitignore("*/build/\n!*/build/important.txt\n")
+        self._create_dir("src/build")
+        self._create_dir("test/build")
+        self._create_file("src/build/important.txt")
+        self._create_file("test/build/important.txt")
+        self._create_file("src/build/other.txt")
+
+        paths = [
+            "src/build/",
+            "src/build/important.txt",
+            "src/build/other.txt",
+            "test/build/",
+            "test/build/important.txt",
+        ]
+        self._assert_ignore_match(paths)
+
+    def test_parent_exclusion_double_asterisk(self) -> None:
+        """Test parent directory exclusion with ** patterns."""
+        self._write_gitignore("**/node_modules/\n!**/node_modules/keep.txt\n")
+        self._create_dir("node_modules")
+        self._create_dir("src/node_modules")
+        self._create_dir("deep/nested/node_modules")
+        self._create_file("node_modules/keep.txt")
+        self._create_file("src/node_modules/keep.txt")
+        self._create_file("deep/nested/node_modules/keep.txt")
+
+        paths = [
+            "node_modules/",
+            "node_modules/keep.txt",
+            "src/node_modules/",
+            "src/node_modules/keep.txt",
+            "deep/nested/node_modules/",
+            "deep/nested/node_modules/keep.txt",
+        ]
+        self._assert_ignore_match(paths)
+
+    def test_glob_pattern_base_directory(self) -> None:
+        """Test that dir/** doesn't match dir/ itself."""
+        self._write_gitignore("logs/**\n!logs/important.log\n!logs/keep/\n")
+        self._create_dir("logs")
+        self._create_dir("logs/keep")
+        self._create_dir("logs/subdir")
+        self._create_file("logs/important.log")
+        self._create_file("logs/other.log")
+        self._create_file("logs/subdir/file.txt")
+
+        paths = [
+            "logs/",
+            "logs/important.log",
+            "logs/other.log",
+            "logs/keep/",
+            "logs/subdir/",
+            "logs/subdir/file.txt",
+        ]
+        self._assert_ignore_match(paths)
+
     def _git_check_ignore_quoted(self, paths: list[str]) -> set[str]:
     def _git_check_ignore_quoted(self, paths: list[str]) -> set[str]:
         """Run git check-ignore with default quoting and return set of ignored paths."""
         """Run git check-ignore with default quoting and return set of ignored paths."""
         try:
         try:

+ 240 - 0
tests/test_ignore.py

@@ -145,6 +145,108 @@ class MatchPatternTests(TestCase):
             )
             )
 
 
 
 
+class ParentExclusionTests(TestCase):
+    """Tests for parent directory exclusion helper functions."""
+
+    def test_check_parent_exclusion_direct_directory(self) -> None:
+        """Test _check_parent_exclusion with direct directory exclusion."""
+        from dulwich.ignore import Pattern, _check_parent_exclusion
+
+        # Pattern: dir/, !dir/file.txt
+        patterns = [Pattern(b"dir/"), Pattern(b"!dir/file.txt")]
+
+        # dir/file.txt has parent 'dir' excluded
+        self.assertTrue(_check_parent_exclusion("dir/file.txt", patterns))
+
+        # dir/subdir/file.txt also has parent 'dir' excluded
+        self.assertTrue(_check_parent_exclusion("dir/subdir/file.txt", patterns))
+
+        # other/file.txt has no parent excluded
+        self.assertFalse(_check_parent_exclusion("other/file.txt", patterns))
+
+    def test_check_parent_exclusion_no_negation(self) -> None:
+        """Test _check_parent_exclusion when there's no negation pattern."""
+        from dulwich.ignore import Pattern, _check_parent_exclusion
+
+        # Only exclusion patterns
+        patterns = [Pattern(b"*.log"), Pattern(b"build/")]
+
+        # No negation pattern, so no parent exclusion check needed
+        self.assertFalse(_check_parent_exclusion("build/file.txt", patterns))
+
+    def test_pattern_excludes_parent_directory_slash(self) -> None:
+        """Test _pattern_excludes_parent for patterns ending with /."""
+        from dulwich.ignore import _pattern_excludes_parent
+
+        # Pattern: parent/
+        self.assertTrue(
+            _pattern_excludes_parent("parent/", "parent/file.txt", "!parent/file.txt")
+        )
+        self.assertTrue(
+            _pattern_excludes_parent(
+                "parent/", "parent/sub/file.txt", "!parent/sub/file.txt"
+            )
+        )
+        self.assertFalse(
+            _pattern_excludes_parent("parent/", "other/file.txt", "!other/file.txt")
+        )
+        self.assertFalse(
+            _pattern_excludes_parent("parent/", "parent", "!parent")
+        )  # No / in path
+
+    def test_pattern_excludes_parent_double_asterisk(self) -> None:
+        """Test _pattern_excludes_parent for **/ patterns."""
+        from dulwich.ignore import _pattern_excludes_parent
+
+        # Pattern: **/node_modules/**
+        self.assertTrue(
+            _pattern_excludes_parent(
+                "**/node_modules/**",
+                "foo/node_modules/bar/file.txt",
+                "!foo/node_modules/bar/file.txt",
+            )
+        )
+        self.assertTrue(
+            _pattern_excludes_parent(
+                "**/node_modules/**", "node_modules/file.txt", "!node_modules/file.txt"
+            )
+        )
+        self.assertFalse(
+            _pattern_excludes_parent(
+                "**/node_modules/**", "foo/bar/file.txt", "!foo/bar/file.txt"
+            )
+        )
+
+    def test_pattern_excludes_parent_glob(self) -> None:
+        """Test _pattern_excludes_parent for dir/** patterns."""
+        from dulwich.ignore import _pattern_excludes_parent
+
+        # Pattern: logs/** - allows exact file negations for immediate children
+        self.assertFalse(
+            _pattern_excludes_parent("logs/**", "logs/file.txt", "!logs/file.txt")
+        )
+
+        # Directory negations still have parent exclusion
+        self.assertTrue(
+            _pattern_excludes_parent("logs/**", "logs/keep/", "!logs/keep/")
+        )
+
+        # Non-exact negations have parent exclusion
+        self.assertTrue(
+            _pattern_excludes_parent("logs/**", "logs/keep/", "!logs/keep/file.txt")
+        )
+
+        # Nested paths have parent exclusion
+        self.assertTrue(
+            _pattern_excludes_parent("logs/**", "logs/sub/file.txt", "!logs/sub/")
+        )
+        self.assertTrue(
+            _pattern_excludes_parent(
+                "logs/**", "logs/sub/file.txt", "!logs/sub/file.txt"
+            )
+        )
+
+
 class IgnoreFilterTests(TestCase):
 class IgnoreFilterTests(TestCase):
     def test_included(self) -> None:
     def test_included(self) -> None:
         filter = IgnoreFilter([b"a.c", b"b.c"])
         filter = IgnoreFilter([b"a.c", b"b.c"])
@@ -328,6 +430,144 @@ class IgnoreFilterManagerTests(TestCase):
             m.is_ignored("data/subdir/")
             m.is_ignored("data/subdir/")
         )  # Subdirectory should be ignored (matches Git behavior)
         )  # Subdirectory should be ignored (matches Git behavior)
 
 
+    def test_issue_1721_directory_negation_with_double_asterisk(self) -> None:
+        """Test for issue #1721: regression with negated subdirectory patterns using **."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Create .gitignore with the patterns from issue #1721
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"data/**\n")
+            f.write(b"!data/**/\n")
+            f.write(b"!data/**/*.csv\n")
+
+        # Create directory structure
+        os.makedirs(os.path.join(repo.path, "data", "subdir"))
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        # Test the expected behavior - issue #1721 was that data/myfile was not ignored
+        self.assertTrue(
+            m.is_ignored("data/myfile")
+        )  # File should be ignored (fixes issue #1721)
+        self.assertFalse(m.is_ignored("data/"))  # data/ is matched by !data/**/
+        self.assertFalse(
+            m.is_ignored("data/subdir/")
+        )  # Subdirectory is matched by !data/**/
+        # With data/** pattern, Git allows CSV files to be re-included via !data/**/*.csv
+        self.assertFalse(m.is_ignored("data/test.csv"))  # CSV files are not ignored
+        self.assertFalse(
+            m.is_ignored("data/subdir/test.csv")
+        )  # CSV files in subdirs are not ignored
+        self.assertTrue(
+            m.is_ignored("data/subdir/other.txt")
+        )  # Non-CSV files in subdirs are ignored
+
+    def test_parent_directory_exclusion(self) -> None:
+        """Test Git's parent directory exclusion rule.
+
+        Git rule: "It is not possible to re-include a file if a parent directory of that file is excluded."
+        """
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test case 1: Direct parent directory exclusion
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"parent/\n")
+            f.write(b"!parent/file.txt\n")
+            f.write(b"!parent/child/\n")
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        # parent/ is excluded, so files inside cannot be re-included
+        self.assertTrue(m.is_ignored("parent/"))
+        self.assertTrue(m.is_ignored("parent/file.txt"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("parent/child/"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("parent/child/file.txt"))
+
+    def test_parent_exclusion_with_wildcards(self) -> None:
+        """Test parent directory exclusion with wildcard patterns."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test case 2: Parent excluded by wildcard
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"*/build/\n")
+            f.write(b"!*/build/important.txt\n")
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        self.assertTrue(m.is_ignored("src/build/"))
+        self.assertTrue(m.is_ignored("src/build/important.txt"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("test/build/"))
+        self.assertTrue(m.is_ignored("test/build/important.txt"))  # Cannot re-include
+
+    def test_parent_exclusion_with_double_asterisk(self) -> None:
+        """Test parent directory exclusion with ** patterns."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test case 3: Complex ** pattern with parent exclusion
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"**/node_modules/\n")
+            f.write(b"!**/node_modules/keep.txt\n")
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        self.assertTrue(m.is_ignored("node_modules/"))
+        self.assertTrue(m.is_ignored("node_modules/keep.txt"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("src/node_modules/"))
+        self.assertTrue(m.is_ignored("src/node_modules/keep.txt"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("deep/nested/node_modules/"))
+        self.assertTrue(
+            m.is_ignored("deep/nested/node_modules/keep.txt")
+        )  # Cannot re-include
+
+    def test_no_parent_exclusion_with_glob_contents(self) -> None:
+        """Test that dir/** allows specific file negations for immediate children."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test: dir/** allows specific file negations (unlike dir/ which doesn't)
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"logs/**\n")
+            f.write(b"!logs/important.log\n")
+            f.write(b"!logs/keep/\n")
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        # logs/ itself is excluded by logs/**
+        self.assertTrue(m.is_ignored("logs/"))
+        # Specific file negation works with dir/** patterns
+        self.assertFalse(m.is_ignored("logs/important.log"))
+        # Directory negations still don't work (parent exclusion)
+        self.assertTrue(m.is_ignored("logs/keep/"))
+        # Nested paths are ignored
+        self.assertTrue(m.is_ignored("logs/subdir/"))
+        self.assertTrue(m.is_ignored("logs/subdir/file.txt"))
+
+    def test_parent_exclusion_ordering(self) -> None:
+        """Test that parent exclusion depends on pattern ordering."""
+        tmp_dir = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, tmp_dir)
+        repo = Repo.init(tmp_dir)
+
+        # Test case 5: Order matters for parent exclusion
+        with open(os.path.join(repo.path, ".gitignore"), "wb") as f:
+            f.write(b"!data/important/\n")  # This comes first but won't work
+            f.write(b"data/\n")  # This excludes the parent
+
+        m = IgnoreFilterManager.from_repo(repo)
+
+        self.assertTrue(m.is_ignored("data/"))
+        self.assertTrue(m.is_ignored("data/important/"))  # Cannot re-include
+        self.assertTrue(m.is_ignored("data/important/file.txt"))
+
 
 
 class QuotePathTests(TestCase):
 class QuotePathTests(TestCase):
     """Tests for _quote_path function."""
     """Tests for _quote_path function."""