test_sparse_patterns.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. # test_sparse_patterns.py -- Sparse checkout (full and cone mode) pattern handling
  2. # Copyright (C) 2013 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for dulwich.sparse_patterns."""
  22. import os
  23. import shutil
  24. import tempfile
  25. import time
  26. from dulwich.index import IndexEntry
  27. from dulwich.repo import Repo
  28. from dulwich.sparse_patterns import (
  29. BlobNotFoundError,
  30. SparseCheckoutConflictError,
  31. apply_included_paths,
  32. compute_included_paths_cone,
  33. compute_included_paths_full,
  34. determine_included_paths,
  35. match_gitignore_patterns,
  36. parse_sparse_patterns,
  37. )
  38. from . import TestCase
  39. class ParseSparsePatternsTests(TestCase):
  40. """Test parse_sparse_patterns function."""
  41. def test_empty_and_comment_lines(self):
  42. lines = [
  43. "",
  44. "# comment here",
  45. " ",
  46. "# another comment",
  47. ]
  48. parsed = parse_sparse_patterns(lines)
  49. self.assertEqual(parsed, [])
  50. def test_simple_patterns(self):
  51. lines = [
  52. "*.py",
  53. "!*.md",
  54. "/docs/",
  55. "!/docs/images/",
  56. ]
  57. parsed = parse_sparse_patterns(lines)
  58. self.assertEqual(len(parsed), 4)
  59. self.assertEqual(parsed[0], ("*.py", False, False, False)) # include *.py
  60. self.assertEqual(parsed[1], ("*.md", True, False, False)) # exclude *.md
  61. self.assertEqual(parsed[2], ("docs", False, True, True)) # anchored, dir_only
  62. self.assertEqual(parsed[3], ("docs/images", True, True, True))
  63. def test_trailing_slash_dir(self):
  64. lines = [
  65. "src/",
  66. ]
  67. parsed = parse_sparse_patterns(lines)
  68. # "src/" => (pattern="src", negation=False, dir_only=True, anchored=False)
  69. self.assertEqual(parsed, [("src", False, True, False)])
  70. def test_negation_anchor(self):
  71. lines = [
  72. "!/foo.txt",
  73. ]
  74. parsed = parse_sparse_patterns(lines)
  75. # => (pattern="foo.txt", negation=True, dir_only=False, anchored=True)
  76. self.assertEqual(parsed, [("foo.txt", True, False, True)])
  77. class MatchGitignorePatternsTests(TestCase):
  78. """Test the match_gitignore_patterns function."""
  79. def test_no_patterns_returns_excluded(self):
  80. """If no patterns are provided, by default we treat the path as excluded."""
  81. self.assertFalse(match_gitignore_patterns("anyfile.py", []))
  82. def test_last_match_wins(self):
  83. """Checks that the last pattern to match determines included vs excluded."""
  84. parsed = parse_sparse_patterns(
  85. [
  86. "*.py", # include
  87. "!foo.py", # exclude
  88. ]
  89. )
  90. # "foo.py" matches first pattern => included
  91. # then matches second pattern => excluded
  92. self.assertFalse(match_gitignore_patterns("foo.py", parsed))
  93. def test_dir_only(self):
  94. """A pattern with a trailing slash should only match directories and subdirectories."""
  95. parsed = parse_sparse_patterns(["docs/"])
  96. # Because we set path_is_dir=False, it won't match
  97. self.assertTrue(
  98. match_gitignore_patterns("docs/readme.md", parsed, path_is_dir=False)
  99. )
  100. self.assertTrue(match_gitignore_patterns("docs", parsed, path_is_dir=True))
  101. # Even if the path name is "docs", if it's a file, won't match:
  102. self.assertFalse(match_gitignore_patterns("docs", parsed, path_is_dir=False))
  103. def test_anchored(self):
  104. """Anchored patterns match from the start of the path only."""
  105. parsed = parse_sparse_patterns(["/foo"])
  106. self.assertTrue(match_gitignore_patterns("foo", parsed))
  107. # But "some/foo" doesn't match because anchored requires start
  108. self.assertFalse(match_gitignore_patterns("some/foo", parsed))
  109. def test_unanchored_uses_fnmatch(self):
  110. parsed = parse_sparse_patterns(["foo"])
  111. self.assertTrue(match_gitignore_patterns("some/foo", parsed))
  112. self.assertFalse(match_gitignore_patterns("some/bar", parsed))
  113. def test_anchored_empty_pattern(self):
  114. """Test handling of empty pattern with anchoring (e.g., '/')."""
  115. parsed = parse_sparse_patterns(["/"])
  116. # Check the structure of the parsed empty pattern first
  117. self.assertEqual(parsed, [("", False, False, True)])
  118. # When the pattern is empty with anchoring, it's continued (skipped) in match_gitignore_patterns
  119. # for non-empty paths but for empty string it might match due to empty string comparisons
  120. self.assertFalse(match_gitignore_patterns("foo", parsed))
  121. # An empty string with empty pattern will match (implementation detail)
  122. self.assertTrue(match_gitignore_patterns("", parsed))
  123. def test_anchored_dir_only_exact_match(self):
  124. """Test anchored directory-only patterns with exact matching."""
  125. parsed = parse_sparse_patterns(["/docs/"])
  126. # Test with exact match "docs" and path_is_dir=True
  127. self.assertTrue(match_gitignore_patterns("docs", parsed, path_is_dir=True))
  128. # Test with "docs/" (exact match + trailing slash)
  129. self.assertTrue(match_gitignore_patterns("docs/", parsed, path_is_dir=True))
  130. def test_complex_anchored_patterns(self):
  131. """Test more complex anchored pattern matching."""
  132. parsed = parse_sparse_patterns(["/dir/subdir"])
  133. # Test exact match
  134. self.assertTrue(match_gitignore_patterns("dir/subdir", parsed))
  135. # Test subdirectory path
  136. self.assertTrue(match_gitignore_patterns("dir/subdir/file.txt", parsed))
  137. # Test non-matching path
  138. self.assertFalse(match_gitignore_patterns("otherdir/subdir", parsed))
  139. def test_pattern_matching_edge_cases(self):
  140. """Test various edge cases in pattern matching."""
  141. # Test exact equality with an anchored pattern
  142. parsed = parse_sparse_patterns(["/foo"])
  143. self.assertTrue(match_gitignore_patterns("foo", parsed))
  144. # Test with path_is_dir=True
  145. self.assertTrue(match_gitignore_patterns("foo", parsed, path_is_dir=True))
  146. # Test exact match with pattern with dir_only=True
  147. parsed = parse_sparse_patterns(["/bar/"])
  148. self.assertTrue(match_gitignore_patterns("bar", parsed, path_is_dir=True))
  149. # Test startswith match for anchored pattern
  150. parsed = parse_sparse_patterns(["/prefix"])
  151. self.assertTrue(
  152. match_gitignore_patterns("prefix/subdirectory/file.txt", parsed)
  153. )
  154. class ComputeIncludedPathsFullTests(TestCase):
  155. """Test compute_included_paths_full using a real ephemeral repo index."""
  156. def setUp(self):
  157. super().setUp()
  158. self.temp_dir = tempfile.mkdtemp()
  159. self.addCleanup(shutil.rmtree, self.temp_dir)
  160. self.repo = Repo.init(self.temp_dir)
  161. def _add_file_to_index(self, relpath, content=b"test"):
  162. full = os.path.join(self.temp_dir, relpath)
  163. os.makedirs(os.path.dirname(full), exist_ok=True)
  164. with open(full, "wb") as f:
  165. f.write(content)
  166. # Stage in the index
  167. self.repo.stage([relpath])
  168. def test_basic_inclusion_exclusion(self):
  169. """Given patterns, check correct set of included paths."""
  170. self._add_file_to_index("foo.py", b"print(1)")
  171. self._add_file_to_index("bar.md", b"markdown")
  172. self._add_file_to_index("docs/readme", b"# docs")
  173. lines = [
  174. "*.py", # include all .py
  175. "!bar.*", # exclude bar.md
  176. "docs/", # include docs dir
  177. ]
  178. included = compute_included_paths_full(self.repo, lines)
  179. self.assertEqual(included, {"foo.py", "docs/readme"})
  180. def test_full_with_utf8_paths(self):
  181. """Test that UTF-8 encoded paths are handled correctly."""
  182. self._add_file_to_index("unicode/文件.txt", b"unicode content")
  183. self._add_file_to_index("unicode/другой.md", b"more unicode")
  184. # Include all text files
  185. lines = ["*.txt"]
  186. included = compute_included_paths_full(self.repo, lines)
  187. self.assertEqual(included, {"unicode/文件.txt"})
  188. class ComputeIncludedPathsConeTests(TestCase):
  189. """Test compute_included_paths_cone with ephemeral repo to see included vs excluded."""
  190. def setUp(self):
  191. super().setUp()
  192. self.temp_dir = tempfile.mkdtemp()
  193. self.addCleanup(shutil.rmtree, self.temp_dir)
  194. self.repo = Repo.init(self.temp_dir)
  195. def _add_file_to_index(self, relpath, content=b"test"):
  196. full = os.path.join(self.temp_dir, relpath)
  197. os.makedirs(os.path.dirname(full), exist_ok=True)
  198. with open(full, "wb") as f:
  199. f.write(content)
  200. self.repo.stage([relpath])
  201. def test_cone_mode_patterns(self):
  202. """Simpler pattern handling in cone mode.
  203. Lines in 'cone' style typically look like:
  204. - /* -> include top-level
  205. - !/*/ -> exclude all subdirs
  206. - /docs/ -> reinclude 'docs' directory
  207. """
  208. self._add_file_to_index("topfile", b"hi")
  209. self._add_file_to_index("docs/readme.md", b"stuff")
  210. self._add_file_to_index("lib/code.py", b"stuff")
  211. lines = [
  212. "/*",
  213. "!/*/",
  214. "/docs/",
  215. ]
  216. included = compute_included_paths_cone(self.repo, lines)
  217. # top-level => includes 'topfile'
  218. # subdirs => excluded, except docs/
  219. self.assertEqual(included, {"topfile", "docs/readme.md"})
  220. def test_cone_mode_with_empty_pattern(self):
  221. """Test cone mode with an empty reinclude directory."""
  222. self._add_file_to_index("topfile", b"hi")
  223. self._add_file_to_index("docs/readme.md", b"stuff")
  224. # Include an empty pattern that should be skipped
  225. lines = [
  226. "/*",
  227. "!/*/",
  228. "/", # This empty pattern should be skipped
  229. ]
  230. included = compute_included_paths_cone(self.repo, lines)
  231. # Only topfile should be included since the empty pattern is skipped
  232. self.assertEqual(included, {"topfile"})
  233. def test_no_exclude_subdirs(self):
  234. """If lines never specify '!/*/', we include everything by default."""
  235. self._add_file_to_index("topfile", b"hi")
  236. self._add_file_to_index("docs/readme.md", b"stuff")
  237. self._add_file_to_index("lib/code.py", b"stuff")
  238. lines = [
  239. "/*", # top-level
  240. "/docs/", # re-include docs?
  241. ]
  242. included = compute_included_paths_cone(self.repo, lines)
  243. # Because exclude_subdirs was never set, everything is included:
  244. self.assertEqual(
  245. included,
  246. {"topfile", "docs/readme.md", "lib/code.py"},
  247. )
  248. def test_only_reinclude_dirs(self):
  249. """Test cone mode when only reinclude directories are specified."""
  250. self._add_file_to_index("topfile", b"hi")
  251. self._add_file_to_index("docs/readme.md", b"stuff")
  252. self._add_file_to_index("lib/code.py", b"stuff")
  253. # Only specify reinclude_dirs, need to explicitly exclude subdirs
  254. lines = ["!/*/", "/docs/"]
  255. included = compute_included_paths_cone(self.repo, lines)
  256. # Only docs/* should be included, not topfile or lib/*
  257. self.assertEqual(included, {"docs/readme.md"})
  258. def test_exclude_subdirs_no_toplevel(self):
  259. """Test with exclude_subdirs but without toplevel files."""
  260. self._add_file_to_index("topfile", b"hi")
  261. self._add_file_to_index("docs/readme.md", b"stuff")
  262. self._add_file_to_index("lib/code.py", b"stuff")
  263. # Only exclude subdirs and reinclude docs
  264. lines = ["!/*/", "/docs/"]
  265. included = compute_included_paths_cone(self.repo, lines)
  266. # Only docs/* should be included since we didn't include top level
  267. self.assertEqual(included, {"docs/readme.md"})
  268. class DetermineIncludedPathsTests(TestCase):
  269. """Test the top-level determine_included_paths function."""
  270. def setUp(self):
  271. super().setUp()
  272. self.temp_dir = tempfile.mkdtemp()
  273. self.addCleanup(shutil.rmtree, self.temp_dir)
  274. self.repo = Repo.init(self.temp_dir)
  275. def _add_file_to_index(self, relpath):
  276. path = os.path.join(self.temp_dir, relpath)
  277. os.makedirs(os.path.dirname(path), exist_ok=True)
  278. with open(path, "wb") as f:
  279. f.write(b"data")
  280. self.repo.stage([relpath])
  281. def test_full_mode(self):
  282. self._add_file_to_index("foo.py")
  283. self._add_file_to_index("bar.md")
  284. lines = ["*.py", "!bar.*"]
  285. included = determine_included_paths(self.repo, lines, cone=False)
  286. self.assertEqual(included, {"foo.py"})
  287. def test_cone_mode(self):
  288. self._add_file_to_index("topfile")
  289. self._add_file_to_index("subdir/anotherfile")
  290. lines = ["/*", "!/*/"]
  291. included = determine_included_paths(self.repo, lines, cone=True)
  292. self.assertEqual(included, {"topfile"})
  293. class ApplyIncludedPathsTests(TestCase):
  294. """Integration tests for apply_included_paths, verifying skip-worktree bits and file removal."""
  295. def setUp(self):
  296. super().setUp()
  297. self.temp_dir = tempfile.mkdtemp()
  298. self.addCleanup(shutil.rmtree, self.temp_dir)
  299. self.repo = Repo.init(self.temp_dir)
  300. # For testing local_modifications_exist logic, we'll need the normalizer
  301. # plus some real content in the object store.
  302. def _commit_blob(self, relpath, content=b"hello"):
  303. """Create a blob object in object_store, stage an index entry for it."""
  304. full = os.path.join(self.temp_dir, relpath)
  305. os.makedirs(os.path.dirname(full), exist_ok=True)
  306. with open(full, "wb") as f:
  307. f.write(content)
  308. self.repo.stage([relpath])
  309. # Actually commit so the object is in the store
  310. self.repo.do_commit(message=b"Commit " + relpath.encode())
  311. def test_set_skip_worktree_bits(self):
  312. """If a path is not in included_paths, skip_worktree bit is set."""
  313. self._commit_blob("keep.py", b"print('keep')")
  314. self._commit_blob("exclude.md", b"# exclude")
  315. included = {"keep.py"}
  316. apply_included_paths(self.repo, included_paths=included, force=False)
  317. idx = self.repo.open_index()
  318. self.assertIn(b"keep.py", idx)
  319. self.assertFalse(idx[b"keep.py"].skip_worktree)
  320. self.assertIn(b"exclude.md", idx)
  321. self.assertTrue(idx[b"exclude.md"].skip_worktree)
  322. # Also check that the exclude.md file was removed from the working tree
  323. exclude_path = os.path.join(self.temp_dir, "exclude.md")
  324. self.assertFalse(os.path.exists(exclude_path))
  325. def test_conflict_with_local_modifications_no_force(self):
  326. """If local modifications exist for an excluded path, raise SparseCheckoutConflictError."""
  327. self._commit_blob("foo.txt", b"original")
  328. # Modify foo.txt on disk
  329. with open(os.path.join(self.temp_dir, "foo.txt"), "ab") as f:
  330. f.write(b" local changes")
  331. with self.assertRaises(SparseCheckoutConflictError):
  332. apply_included_paths(self.repo, included_paths=set(), force=False)
  333. def test_conflict_with_local_modifications_forced_removal(self):
  334. """With force=True, we remove local modifications and skip_worktree the file."""
  335. self._commit_blob("foo.txt", b"original")
  336. with open(os.path.join(self.temp_dir, "foo.txt"), "ab") as f:
  337. f.write(b" local changes")
  338. # This time, pass force=True => file is removed
  339. apply_included_paths(self.repo, included_paths=set(), force=True)
  340. # Check skip-worktree in index
  341. idx = self.repo.open_index()
  342. self.assertTrue(idx[b"foo.txt"].skip_worktree)
  343. # Working tree file removed
  344. self.assertFalse(os.path.exists(os.path.join(self.temp_dir, "foo.txt")))
  345. def test_materialize_included_file_if_missing(self):
  346. """If a path is included but missing from disk, we restore it from the blob in the store."""
  347. self._commit_blob("restored.txt", b"some content")
  348. # Manually remove the file from the working tree
  349. os.remove(os.path.join(self.temp_dir, "restored.txt"))
  350. apply_included_paths(self.repo, included_paths={"restored.txt"}, force=False)
  351. # Should have re-created "restored.txt" from the blob
  352. self.assertTrue(os.path.exists(os.path.join(self.temp_dir, "restored.txt")))
  353. with open(os.path.join(self.temp_dir, "restored.txt"), "rb") as f:
  354. self.assertEqual(f.read(), b"some content")
  355. def test_blob_not_found_raises(self):
  356. """If the object store is missing the blob for an included path, raise BlobNotFoundError."""
  357. # We'll create an entry in the index that references a nonexistent sha
  358. idx = self.repo.open_index()
  359. fake_sha = b"ab" * 20
  360. e = IndexEntry(
  361. ctime=(int(time.time()), 0), # ctime (s, ns)
  362. mtime=(int(time.time()), 0), # mtime (s, ns)
  363. dev=0, # dev
  364. ino=0, # ino
  365. mode=0o100644, # mode
  366. uid=0, # uid
  367. gid=0, # gid
  368. size=0, # size
  369. sha=fake_sha, # sha
  370. flags=0, # flags
  371. extended_flags=0,
  372. )
  373. e.set_skip_worktree(False)
  374. e.sha = fake_sha
  375. idx[(b"missing_file")] = e
  376. idx.write()
  377. with self.assertRaises(BlobNotFoundError):
  378. apply_included_paths(
  379. self.repo, included_paths={"missing_file"}, force=False
  380. )
  381. def test_directory_removal(self):
  382. """Test handling of directories when removing excluded files."""
  383. # Create a directory with a file
  384. dir_path = os.path.join(self.temp_dir, "dir")
  385. os.makedirs(dir_path, exist_ok=True)
  386. self._commit_blob("dir/file.txt", b"content")
  387. # Make sure it exists before we proceed
  388. self.assertTrue(os.path.exists(os.path.join(dir_path, "file.txt")))
  389. # Exclude everything
  390. apply_included_paths(self.repo, included_paths=set(), force=True)
  391. # The file should be removed, but the directory might remain
  392. self.assertFalse(os.path.exists(os.path.join(dir_path, "file.txt")))
  393. # Test when file is actually a directory - should hit the IsADirectoryError case
  394. another_dir_path = os.path.join(self.temp_dir, "another_dir")
  395. os.makedirs(another_dir_path, exist_ok=True)
  396. self._commit_blob("another_dir/subfile.txt", b"content")
  397. # Create a path with the same name as the file but make it a dir to trigger IsADirectoryError
  398. subfile_dir_path = os.path.join(another_dir_path, "subfile.txt")
  399. if os.path.exists(subfile_dir_path):
  400. # Remove any existing file first
  401. os.remove(subfile_dir_path)
  402. os.makedirs(subfile_dir_path, exist_ok=True)
  403. # Attempt to apply sparse checkout, should trigger IsADirectoryError but not fail
  404. apply_included_paths(self.repo, included_paths=set(), force=True)
  405. def test_handling_removed_files(self):
  406. """Test that files already removed from disk are handled correctly during exclusion."""
  407. self._commit_blob("test_file.txt", b"test content")
  408. # Remove the file manually
  409. os.remove(os.path.join(self.temp_dir, "test_file.txt"))
  410. # Should not raise any errors when excluding this file
  411. apply_included_paths(self.repo, included_paths=set(), force=True)
  412. # Verify skip-worktree bit is set in index
  413. idx = self.repo.open_index()
  414. self.assertTrue(idx[b"test_file.txt"].skip_worktree)
  415. def test_local_modifications_ioerror(self):
  416. """Test handling of IOError when checking for local modifications."""
  417. self._commit_blob("special_file.txt", b"content")
  418. file_path = os.path.join(self.temp_dir, "special_file.txt")
  419. # Make the file unreadable
  420. os.chmod(file_path, 0)
  421. # Add a cleanup that checks if file exists first
  422. def safe_chmod_cleanup():
  423. if os.path.exists(file_path):
  424. try:
  425. os.chmod(file_path, 0o644)
  426. except (FileNotFoundError, PermissionError):
  427. pass
  428. self.addCleanup(safe_chmod_cleanup)
  429. # Should raise conflict error with unreadable file and force=False
  430. with self.assertRaises(SparseCheckoutConflictError):
  431. apply_included_paths(self.repo, included_paths=set(), force=False)
  432. # With force=True, should remove the file anyway
  433. apply_included_paths(self.repo, included_paths=set(), force=True)
  434. # Verify file is gone and skip-worktree bit is set
  435. self.assertFalse(os.path.exists(file_path))
  436. idx = self.repo.open_index()
  437. self.assertTrue(idx[b"special_file.txt"].skip_worktree)