test_source.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. # test_source.py -- Tests for scanning dulwich source code
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for scanning dulwich source code for compliance."""
  22. import ast
  23. import os
  24. import re
  25. import unittest
  26. from pathlib import Path
  27. # Files that are allowed to not have the standard preamble
  28. PREAMBLE_EXCEPTIONS = [
  29. "dulwich/diffstat.py", # MIT licensed file
  30. ]
  31. # Files that are allowed to use os.environ (beyond cli.py and porcelain/)
  32. OS_ENVIRON_EXCEPTIONS = [
  33. "dulwich/client.py", # Git protocol environment variables
  34. "dulwich/repo.py", # User identity environment variables
  35. "dulwich/log_utils.py", # GIT_TRACE environment variable
  36. "dulwich/config.py", # Git configuration environment variables
  37. "dulwich/gc.py", # GIT_AUTO_GC environment variable
  38. "dulwich/contrib/swift.py", # DULWICH_SWIFT_CFG environment variable
  39. "dulwich/hooks.py", # Git hooks environment setup
  40. ]
  41. # Standard license block that must appear in all files
  42. STANDARD_LICENSE_BLOCK = [
  43. "# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later\n",
  44. "# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU\n",
  45. "# General Public License as published by the Free Software Foundation; version 2.0\n",
  46. "# or (at your option) any later version. You can redistribute it and/or\n",
  47. "# modify it under the terms of either of these two licenses.\n",
  48. "#\n",
  49. "# Unless required by applicable law or agreed to in writing, software\n",
  50. '# distributed under the License is distributed on an "AS IS" BASIS,\n',
  51. "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
  52. "# See the License for the specific language governing permissions and\n",
  53. "# limitations under the License.\n",
  54. "#\n",
  55. "# You should have received a copy of the licenses; if not, see\n",
  56. "# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License\n",
  57. "# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache\n",
  58. "# License, Version 2.0.\n",
  59. "#\n",
  60. ]
  61. def _get_python_files(directory_name):
  62. """Get all Python files in a directory.
  63. Args:
  64. directory_name: Name of directory relative to project root (e.g., "dulwich", "tests")
  65. Returns:
  66. List of tuples of (Path object, relative path from project root)
  67. """
  68. project_root = Path(__file__).parent.parent
  69. target_dir = project_root / directory_name
  70. if not target_dir.exists():
  71. raise RuntimeError(f"{directory_name} directory not found at {target_dir}")
  72. python_files = []
  73. for root, dirs, files in os.walk(target_dir):
  74. # Skip build directories
  75. if root.endswith(("build", "__pycache__")):
  76. continue
  77. for file in files:
  78. if file.endswith(".py"):
  79. file_path = Path(root) / file
  80. rel_path = file_path.relative_to(project_root)
  81. python_files.append((file_path, rel_path))
  82. return python_files
  83. def _imports_module(file_path, module_name):
  84. """Check if a Python file imports a specific module or any submodules.
  85. Args:
  86. file_path: Path to the Python file
  87. module_name: Module name to check for (e.g., "dulwich.porcelain", "dulwich.cli")
  88. Returns:
  89. bool: True if the file imports the module or any submodule
  90. """
  91. with open(file_path, encoding="utf-8") as f:
  92. tree = ast.parse(f.read(), filename=str(file_path))
  93. for node in ast.walk(tree):
  94. # Check "import dulwich.porcelain" or "import dulwich.porcelain.lfs"
  95. if isinstance(node, ast.Import):
  96. for alias in node.names:
  97. if alias.name == module_name or alias.name.startswith(f"{module_name}."):
  98. return True
  99. # Check "from dulwich.porcelain import ..." or "from dulwich import porcelain"
  100. if isinstance(node, ast.ImportFrom):
  101. # "from dulwich.porcelain import something"
  102. # "from dulwich.porcelain.lfs import something"
  103. if node.module == module_name or (node.module and node.module.startswith(f"{module_name}.")):
  104. return True
  105. # Handle "from dulwich import porcelain"
  106. if node.module and module_name.startswith(f"{node.module}."):
  107. # e.g., module="dulwich", module_name="dulwich.porcelain"
  108. suffix = module_name[len(node.module) + 1:]
  109. for alias in node.names:
  110. if alias.name == suffix:
  111. return True
  112. return False
  113. class SourceCodeComplianceTests(unittest.TestCase):
  114. """Tests to ensure dulwich source code follows project standards."""
  115. @staticmethod
  116. def _get_dulwich_python_files():
  117. """Get all Python files in the dulwich package.
  118. Returns:
  119. List of tuples of (Path object, relative path from project root)
  120. """
  121. return _get_python_files("dulwich")
  122. @classmethod
  123. def _has_standard_preamble(cls, file_path: Path) -> tuple[bool, str]:
  124. """Check if a file has the standard dulwich preamble.
  125. The standard preamble consists of:
  126. - First line: # filename -- Description (or similar)
  127. - Copyright line(s): # Copyright (C) ...
  128. - Empty comment: #
  129. - Standard license block (exact match required)
  130. Args:
  131. file_path: Path to the Python file to check
  132. Returns:
  133. Tuple of (has_preamble, error_message)
  134. """
  135. with open(file_path, encoding="utf-8") as f:
  136. lines = f.readlines()
  137. if len(lines) < 21:
  138. return False, "File too short to contain standard preamble"
  139. # Check first line starts with #
  140. if not lines[0].startswith("#"):
  141. return False, "First line does not start with #"
  142. # Find the SPDX line (should be within first 10 lines)
  143. spdx_line_idx = None
  144. for i in range(min(10, len(lines))):
  145. if "SPDX-License-Identifier" in lines[i]:
  146. spdx_line_idx = i
  147. break
  148. if spdx_line_idx is None:
  149. return False, "SPDX-License-Identifier line not found in first 10 lines"
  150. # Check that we have enough lines after the SPDX line
  151. if len(lines) < spdx_line_idx + len(STANDARD_LICENSE_BLOCK):
  152. return (
  153. False,
  154. "File too short to contain complete license block after SPDX line",
  155. )
  156. # Extract the license block from the file
  157. file_license_block = lines[
  158. spdx_line_idx : spdx_line_idx + len(STANDARD_LICENSE_BLOCK)
  159. ]
  160. # Compare with standard license block
  161. for i, (expected, actual) in enumerate(
  162. zip(STANDARD_LICENSE_BLOCK, file_license_block)
  163. ):
  164. if expected != actual:
  165. return (
  166. False,
  167. f"License block mismatch at line {spdx_line_idx + i + 1}: expected {expected!r}, got {actual!r}",
  168. )
  169. return True, ""
  170. def test_all_files_have_preamble(self):
  171. """Test that all dulwich Python files have the standard preamble."""
  172. python_files = self._get_dulwich_python_files()
  173. self.assertGreater(len(python_files), 0, "No Python files found in dulwich/")
  174. files_without_preamble = []
  175. for file_path, rel_path in python_files:
  176. # Convert to forward slashes for consistency
  177. rel_path_str = str(rel_path).replace(os.sep, "/")
  178. # Skip exceptions
  179. if rel_path_str in PREAMBLE_EXCEPTIONS:
  180. continue
  181. has_preamble, error_msg = self._has_standard_preamble(file_path)
  182. if not has_preamble:
  183. files_without_preamble.append(f"{rel_path_str}: {error_msg}")
  184. if files_without_preamble:
  185. self.fail(
  186. "The following files are missing the standard preamble:\n"
  187. + "\n".join(f" - {f}" for f in files_without_preamble)
  188. )
  189. def test_os_environ_usage_restricted(self):
  190. """Test that os.environ is only used in allowed files."""
  191. python_files = self._get_dulwich_python_files()
  192. self.assertGreater(len(python_files), 0, "No Python files found in dulwich/")
  193. # Files allowed to use os.environ
  194. allowed_files = {
  195. "dulwich/cli.py",
  196. "dulwich/porcelain/",
  197. }
  198. # Add exception files
  199. allowed_files.update(OS_ENVIRON_EXCEPTIONS)
  200. files_with_violations = []
  201. # Pattern to match os.environ usage
  202. os_environ_pattern = re.compile(r"\bos\.environ\b")
  203. for file_path, rel_path in python_files:
  204. # Convert to forward slashes for consistency
  205. rel_path_str = str(rel_path).replace(os.sep, "/")
  206. # Skip allowed files
  207. if any(rel_path_str.startswith(f) for f in allowed_files):
  208. continue
  209. with open(file_path, encoding="utf-8") as f:
  210. content = f.read()
  211. matches = os_environ_pattern.findall(content)
  212. if matches:
  213. # Count occurrences
  214. line_numbers = []
  215. for line_num, line in enumerate(content.split("\n"), 1):
  216. if os_environ_pattern.search(line):
  217. line_numbers.append(line_num)
  218. files_with_violations.append(
  219. f"{rel_path_str}: os.environ used on line(s) {', '.join(map(str, line_numbers))}"
  220. )
  221. if files_with_violations:
  222. self.fail(
  223. "The following files use os.environ but are not in the allowed list:\n"
  224. + "\n".join(f" - {f}" for f in files_with_violations)
  225. + "\n\nFiles allowed to use os.environ:\n"
  226. + "\n".join(f" - {f}" for f in sorted(allowed_files))
  227. )
  228. def test_porcelain_usage_restricted_in_tests(self):
  229. """Test that dulwich.porcelain is only used in allowed test directories."""
  230. test_files = _get_python_files("tests")
  231. self.assertGreater(len(test_files), 0, "No Python files found in tests/")
  232. # Directories allowed to use porcelain
  233. allowed_dirs = {
  234. "tests/cli/",
  235. "tests/porcelain/",
  236. "tests/compat/",
  237. }
  238. # Individual test files allowed to use porcelain
  239. allowed_files = {
  240. "tests/test_bisect.py",
  241. "tests/test_filters.py",
  242. "tests/test_maintenance.py",
  243. "tests/test_mbox.py",
  244. "tests/test_rebase.py",
  245. "tests/test_rerere.py",
  246. }
  247. files_with_violations = []
  248. for file_path, rel_path in test_files:
  249. # Convert to forward slashes for consistency
  250. rel_path_str = str(rel_path).replace(os.sep, "/")
  251. # Skip allowed directories
  252. if any(rel_path_str.startswith(d) for d in allowed_dirs):
  253. continue
  254. # Skip allowed files
  255. if rel_path_str in allowed_files:
  256. continue
  257. if _imports_module(file_path, "dulwich.porcelain"):
  258. files_with_violations.append(rel_path_str)
  259. if files_with_violations:
  260. self.fail(
  261. "The following test files use dulwich.porcelain but are not in the allowed list:\n"
  262. + "\n".join(f" - {f}" for f in files_with_violations)
  263. + "\n\nLower-level tests should use dulwich APIs directly, not porcelain."
  264. + "\n\nAllowed directories:\n"
  265. + "\n".join(f" - {d}" for d in sorted(allowed_dirs))
  266. + "\nAllowed files:\n"
  267. + "\n".join(f" - {f}" for f in sorted(allowed_files))
  268. )
  269. def test_cli_usage_restricted_in_tests(self):
  270. """Test that dulwich.cli is only used in CLI test directory."""
  271. test_files = _get_python_files("tests")
  272. self.assertGreater(len(test_files), 0, "No Python files found in tests/")
  273. # Only CLI tests should import dulwich.cli
  274. allowed_dir = "tests/cli/"
  275. files_with_violations = []
  276. for file_path, rel_path in test_files:
  277. # Convert to forward slashes for consistency
  278. rel_path_str = str(rel_path).replace(os.sep, "/")
  279. # Skip allowed directory
  280. if rel_path_str.startswith(allowed_dir):
  281. continue
  282. if _imports_module(file_path, "dulwich.cli"):
  283. files_with_violations.append(rel_path_str)
  284. if files_with_violations:
  285. self.fail(
  286. "The following test files use dulwich.cli but are not in tests/cli/:\n"
  287. + "\n".join(f" - {f}" for f in files_with_violations)
  288. + "\n\nOnly CLI tests in tests/cli/ should import dulwich.cli."
  289. )