test_source.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. # test_source.py -- Tests for scanning dulwich source code
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for scanning dulwich source code for compliance."""
  22. import ast
  23. import os
  24. import re
  25. import unittest
  26. from pathlib import Path
  27. # Files that are allowed to not have the standard preamble
  28. PREAMBLE_EXCEPTIONS = [
  29. "dulwich/diffstat.py", # MIT licensed file
  30. ]
  31. # Files that are allowed to use os.environ (beyond cli.py and porcelain/)
  32. OS_ENVIRON_EXCEPTIONS = [
  33. "dulwich/client.py", # Git protocol environment variables
  34. "dulwich/repo.py", # User identity environment variables
  35. "dulwich/log_utils.py", # GIT_TRACE environment variable
  36. "dulwich/config.py", # Git configuration environment variables
  37. "dulwich/gc.py", # GIT_AUTO_GC environment variable
  38. "dulwich/contrib/swift.py", # DULWICH_SWIFT_CFG environment variable
  39. "dulwich/hooks.py", # Git hooks environment setup
  40. ]
  41. # Standard license block that must appear in all files
  42. STANDARD_LICENSE_BLOCK = [
  43. "# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later\n",
  44. "# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU\n",
  45. "# General Public License as published by the Free Software Foundation; version 2.0\n",
  46. "# or (at your option) any later version. You can redistribute it and/or\n",
  47. "# modify it under the terms of either of these two licenses.\n",
  48. "#\n",
  49. "# Unless required by applicable law or agreed to in writing, software\n",
  50. '# distributed under the License is distributed on an "AS IS" BASIS,\n',
  51. "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
  52. "# See the License for the specific language governing permissions and\n",
  53. "# limitations under the License.\n",
  54. "#\n",
  55. "# You should have received a copy of the licenses; if not, see\n",
  56. "# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License\n",
  57. "# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache\n",
  58. "# License, Version 2.0.\n",
  59. "#\n",
  60. ]
  61. def _get_python_files(directory_name):
  62. """Get all Python files in a directory.
  63. Args:
  64. directory_name: Name of directory relative to project root (e.g., "dulwich", "tests")
  65. Returns:
  66. List of tuples of (Path object, relative path from project root)
  67. """
  68. project_root = Path(__file__).parent.parent
  69. target_dir = project_root / directory_name
  70. if not target_dir.exists():
  71. raise RuntimeError(f"{directory_name} directory not found at {target_dir}")
  72. python_files = []
  73. for root, dirs, files in os.walk(target_dir):
  74. # Skip build directories
  75. if root.endswith(("build", "__pycache__")):
  76. continue
  77. for file in files:
  78. if file.endswith(".py"):
  79. file_path = Path(root) / file
  80. rel_path = file_path.relative_to(project_root)
  81. python_files.append((file_path, rel_path))
  82. return python_files
  83. def _imports_module(file_path, module_name):
  84. """Check if a Python file imports a specific module or any submodules.
  85. Args:
  86. file_path: Path to the Python file
  87. module_name: Module name to check for (e.g., "dulwich.porcelain", "dulwich.cli")
  88. Returns:
  89. bool: True if the file imports the module or any submodule
  90. """
  91. with open(file_path, encoding="utf-8") as f:
  92. tree = ast.parse(f.read(), filename=str(file_path))
  93. for node in ast.walk(tree):
  94. # Check "import dulwich.porcelain" or "import dulwich.porcelain.lfs"
  95. if isinstance(node, ast.Import):
  96. for alias in node.names:
  97. if alias.name == module_name or alias.name.startswith(
  98. f"{module_name}."
  99. ):
  100. return True
  101. # Check "from dulwich.porcelain import ..." or "from dulwich import porcelain"
  102. if isinstance(node, ast.ImportFrom):
  103. # "from dulwich.porcelain import something"
  104. # "from dulwich.porcelain.lfs import something"
  105. if node.module == module_name or (
  106. node.module and node.module.startswith(f"{module_name}.")
  107. ):
  108. return True
  109. # Handle "from dulwich import porcelain"
  110. if node.module and module_name.startswith(f"{node.module}."):
  111. # e.g., module="dulwich", module_name="dulwich.porcelain"
  112. suffix = module_name[len(node.module) + 1 :]
  113. for alias in node.names:
  114. if alias.name == suffix:
  115. return True
  116. return False
  117. class SourceCodeComplianceTests(unittest.TestCase):
  118. """Tests to ensure dulwich source code follows project standards."""
  119. @staticmethod
  120. def _get_dulwich_python_files():
  121. """Get all Python files in the dulwich package.
  122. Returns:
  123. List of tuples of (Path object, relative path from project root)
  124. """
  125. return _get_python_files("dulwich")
  126. @classmethod
  127. def _has_standard_preamble(cls, file_path: Path) -> tuple[bool, str]:
  128. """Check if a file has the standard dulwich preamble.
  129. The standard preamble consists of:
  130. - First line: # filename -- Description (or similar)
  131. - Copyright line(s): # Copyright (C) ...
  132. - Empty comment: #
  133. - Standard license block (exact match required)
  134. Args:
  135. file_path: Path to the Python file to check
  136. Returns:
  137. Tuple of (has_preamble, error_message)
  138. """
  139. with open(file_path, encoding="utf-8") as f:
  140. lines = f.readlines()
  141. if len(lines) < 21:
  142. return False, "File too short to contain standard preamble"
  143. # Check first line starts with #
  144. if not lines[0].startswith("#"):
  145. return False, "First line does not start with #"
  146. # Find the SPDX line (should be within first 10 lines)
  147. spdx_line_idx = None
  148. for i in range(min(10, len(lines))):
  149. if "SPDX-License-Identifier" in lines[i]:
  150. spdx_line_idx = i
  151. break
  152. if spdx_line_idx is None:
  153. return False, "SPDX-License-Identifier line not found in first 10 lines"
  154. # Check that we have enough lines after the SPDX line
  155. if len(lines) < spdx_line_idx + len(STANDARD_LICENSE_BLOCK):
  156. return (
  157. False,
  158. "File too short to contain complete license block after SPDX line",
  159. )
  160. # Extract the license block from the file
  161. file_license_block = lines[
  162. spdx_line_idx : spdx_line_idx + len(STANDARD_LICENSE_BLOCK)
  163. ]
  164. # Compare with standard license block
  165. for i, (expected, actual) in enumerate(
  166. zip(STANDARD_LICENSE_BLOCK, file_license_block)
  167. ):
  168. if expected != actual:
  169. return (
  170. False,
  171. f"License block mismatch at line {spdx_line_idx + i + 1}: expected {expected!r}, got {actual!r}",
  172. )
  173. return True, ""
  174. def test_all_files_have_preamble(self):
  175. """Test that all dulwich Python files have the standard preamble."""
  176. python_files = self._get_dulwich_python_files()
  177. self.assertGreater(len(python_files), 0, "No Python files found in dulwich/")
  178. files_without_preamble = []
  179. for file_path, rel_path in python_files:
  180. # Convert to forward slashes for consistency
  181. rel_path_str = str(rel_path).replace(os.sep, "/")
  182. # Skip exceptions
  183. if rel_path_str in PREAMBLE_EXCEPTIONS:
  184. continue
  185. has_preamble, error_msg = self._has_standard_preamble(file_path)
  186. if not has_preamble:
  187. files_without_preamble.append(f"{rel_path_str}: {error_msg}")
  188. if files_without_preamble:
  189. self.fail(
  190. "The following files are missing the standard preamble:\n"
  191. + "\n".join(f" - {f}" for f in files_without_preamble)
  192. )
  193. def test_os_environ_usage_restricted(self):
  194. """Test that os.environ is only used in allowed files."""
  195. python_files = self._get_dulwich_python_files()
  196. self.assertGreater(len(python_files), 0, "No Python files found in dulwich/")
  197. # Files allowed to use os.environ
  198. allowed_files = {
  199. "dulwich/cli.py",
  200. "dulwich/porcelain/",
  201. }
  202. # Add exception files
  203. allowed_files.update(OS_ENVIRON_EXCEPTIONS)
  204. files_with_violations = []
  205. # Pattern to match os.environ usage
  206. os_environ_pattern = re.compile(r"\bos\.environ\b")
  207. for file_path, rel_path in python_files:
  208. # Convert to forward slashes for consistency
  209. rel_path_str = str(rel_path).replace(os.sep, "/")
  210. # Skip allowed files
  211. if any(rel_path_str.startswith(f) for f in allowed_files):
  212. continue
  213. with open(file_path, encoding="utf-8") as f:
  214. content = f.read()
  215. matches = os_environ_pattern.findall(content)
  216. if matches:
  217. # Count occurrences
  218. line_numbers = []
  219. for line_num, line in enumerate(content.split("\n"), 1):
  220. if os_environ_pattern.search(line):
  221. line_numbers.append(line_num)
  222. files_with_violations.append(
  223. f"{rel_path_str}: os.environ used on line(s) {', '.join(map(str, line_numbers))}"
  224. )
  225. if files_with_violations:
  226. self.fail(
  227. "The following files use os.environ but are not in the allowed list:\n"
  228. + "\n".join(f" - {f}" for f in files_with_violations)
  229. + "\n\nFiles allowed to use os.environ:\n"
  230. + "\n".join(f" - {f}" for f in sorted(allowed_files))
  231. )
  232. def test_porcelain_usage_restricted_in_tests(self):
  233. """Test that dulwich.porcelain is only used in allowed test directories."""
  234. test_files = _get_python_files("tests")
  235. self.assertGreater(len(test_files), 0, "No Python files found in tests/")
  236. # Directories allowed to use porcelain
  237. allowed_dirs = {
  238. "tests/cli/",
  239. "tests/porcelain/",
  240. "tests/compat/",
  241. }
  242. # Individual test files allowed to use porcelain
  243. allowed_files: set[str] = set()
  244. files_with_violations = []
  245. for file_path, rel_path in test_files:
  246. # Convert to forward slashes for consistency
  247. rel_path_str = str(rel_path).replace(os.sep, "/")
  248. # Skip allowed directories
  249. if any(rel_path_str.startswith(d) for d in allowed_dirs):
  250. continue
  251. # Skip allowed files
  252. if rel_path_str in allowed_files:
  253. continue
  254. if _imports_module(file_path, "dulwich.porcelain"):
  255. files_with_violations.append(rel_path_str)
  256. if files_with_violations:
  257. self.fail(
  258. "The following test files use dulwich.porcelain but are not in the allowed list:\n"
  259. + "\n".join(f" - {f}" for f in files_with_violations)
  260. + "\n\nLower-level tests should use dulwich APIs directly, not porcelain."
  261. + "\n\nAllowed directories:\n"
  262. + "\n".join(f" - {d}" for d in sorted(allowed_dirs))
  263. + "\nAllowed files:\n"
  264. + "\n".join(f" - {f}" for f in sorted(allowed_files))
  265. )
  266. def test_cli_usage_restricted_in_tests(self):
  267. """Test that dulwich.cli is only used in CLI test directory."""
  268. test_files = _get_python_files("tests")
  269. self.assertGreater(len(test_files), 0, "No Python files found in tests/")
  270. # Only CLI tests should import dulwich.cli
  271. allowed_dir = "tests/cli/"
  272. files_with_violations = []
  273. for file_path, rel_path in test_files:
  274. # Convert to forward slashes for consistency
  275. rel_path_str = str(rel_path).replace(os.sep, "/")
  276. # Skip allowed directory
  277. if rel_path_str.startswith(allowed_dir):
  278. continue
  279. if _imports_module(file_path, "dulwich.cli"):
  280. files_with_violations.append(rel_path_str)
  281. if files_with_violations:
  282. self.fail(
  283. "The following test files use dulwich.cli but are not in tests/cli/:\n"
  284. + "\n".join(f" - {f}" for f in files_with_violations)
  285. + "\n\nOnly CLI tests in tests/cli/ should import dulwich.cli."
  286. )