2
0

test_source.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. # test_source.py -- Tests for scanning dulwich source code
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Tests for scanning dulwich source code for compliance."""
  22. import os
  23. import re
  24. import unittest
  25. from pathlib import Path
  26. # Files that are allowed to not have the standard preamble
  27. PREAMBLE_EXCEPTIONS = [
  28. "dulwich/diffstat.py", # MIT licensed file
  29. ]
  30. # Files that are allowed to use os.environ (beyond cli.py and porcelain/)
  31. OS_ENVIRON_EXCEPTIONS = [
  32. "dulwich/client.py", # Git protocol environment variables
  33. "dulwich/repo.py", # User identity environment variables
  34. "dulwich/log_utils.py", # GIT_TRACE environment variable
  35. "dulwich/config.py", # Git configuration environment variables
  36. "dulwich/gc.py", # GIT_AUTO_GC environment variable
  37. "dulwich/contrib/swift.py", # DULWICH_SWIFT_CFG environment variable
  38. "dulwich/hooks.py", # Git hooks environment setup
  39. ]
  40. # Standard license block that must appear in all files
  41. STANDARD_LICENSE_BLOCK = [
  42. "# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later\n",
  43. "# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU\n",
  44. "# General Public License as published by the Free Software Foundation; version 2.0\n",
  45. "# or (at your option) any later version. You can redistribute it and/or\n",
  46. "# modify it under the terms of either of these two licenses.\n",
  47. "#\n",
  48. "# Unless required by applicable law or agreed to in writing, software\n",
  49. '# distributed under the License is distributed on an "AS IS" BASIS,\n',
  50. "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
  51. "# See the License for the specific language governing permissions and\n",
  52. "# limitations under the License.\n",
  53. "#\n",
  54. "# You should have received a copy of the licenses; if not, see\n",
  55. "# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License\n",
  56. "# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache\n",
  57. "# License, Version 2.0.\n",
  58. "#\n",
  59. ]
  60. class SourceCodeComplianceTests(unittest.TestCase):
  61. """Tests to ensure dulwich source code follows project standards."""
  62. @staticmethod
  63. def _get_dulwich_python_files():
  64. """Get all Python files in the dulwich package.
  65. Returns:
  66. List of tuples of (Path object, relative path from project root)
  67. """
  68. project_root = Path(__file__).parent.parent
  69. dulwich_dir = project_root / "dulwich"
  70. if not dulwich_dir.exists():
  71. raise RuntimeError(f"dulwich directory not found at {dulwich_dir}")
  72. python_files = []
  73. for root, dirs, files in os.walk(dulwich_dir):
  74. # Skip build directories
  75. if root.endswith(("build", "__pycache__")):
  76. continue
  77. for file in files:
  78. if file.endswith(".py"):
  79. file_path = Path(root) / file
  80. rel_path = file_path.relative_to(project_root)
  81. python_files.append((file_path, rel_path))
  82. return python_files
  83. @classmethod
  84. def _has_standard_preamble(cls, file_path: Path) -> tuple[bool, str]:
  85. """Check if a file has the standard dulwich preamble.
  86. The standard preamble consists of:
  87. - First line: # filename -- Description (or similar)
  88. - Copyright line(s): # Copyright (C) ...
  89. - Empty comment: #
  90. - Standard license block (exact match required)
  91. Args:
  92. file_path: Path to the Python file to check
  93. Returns:
  94. Tuple of (has_preamble, error_message)
  95. """
  96. with open(file_path, encoding="utf-8") as f:
  97. lines = f.readlines()
  98. if len(lines) < 21:
  99. return False, "File too short to contain standard preamble"
  100. # Check first line starts with #
  101. if not lines[0].startswith("#"):
  102. return False, "First line does not start with #"
  103. # Find the SPDX line (should be within first 10 lines)
  104. spdx_line_idx = None
  105. for i in range(min(10, len(lines))):
  106. if "SPDX-License-Identifier" in lines[i]:
  107. spdx_line_idx = i
  108. break
  109. if spdx_line_idx is None:
  110. return False, "SPDX-License-Identifier line not found in first 10 lines"
  111. # Check that we have enough lines after the SPDX line
  112. if len(lines) < spdx_line_idx + len(STANDARD_LICENSE_BLOCK):
  113. return (
  114. False,
  115. "File too short to contain complete license block after SPDX line",
  116. )
  117. # Extract the license block from the file
  118. file_license_block = lines[
  119. spdx_line_idx : spdx_line_idx + len(STANDARD_LICENSE_BLOCK)
  120. ]
  121. # Compare with standard license block
  122. for i, (expected, actual) in enumerate(
  123. zip(STANDARD_LICENSE_BLOCK, file_license_block)
  124. ):
  125. if expected != actual:
  126. return (
  127. False,
  128. f"License block mismatch at line {spdx_line_idx + i + 1}: expected {expected!r}, got {actual!r}",
  129. )
  130. return True, ""
  131. def test_all_files_have_preamble(self):
  132. """Test that all dulwich Python files have the standard preamble."""
  133. python_files = self._get_dulwich_python_files()
  134. self.assertGreater(len(python_files), 0, "No Python files found in dulwich/")
  135. files_without_preamble = []
  136. for file_path, rel_path in python_files:
  137. # Convert to forward slashes for consistency
  138. rel_path_str = str(rel_path).replace(os.sep, "/")
  139. # Skip exceptions
  140. if rel_path_str in PREAMBLE_EXCEPTIONS:
  141. continue
  142. has_preamble, error_msg = self._has_standard_preamble(file_path)
  143. if not has_preamble:
  144. files_without_preamble.append(f"{rel_path_str}: {error_msg}")
  145. if files_without_preamble:
  146. self.fail(
  147. "The following files are missing the standard preamble:\n"
  148. + "\n".join(f" - {f}" for f in files_without_preamble)
  149. )
  150. def test_os_environ_usage_restricted(self):
  151. """Test that os.environ is only used in allowed files."""
  152. python_files = self._get_dulwich_python_files()
  153. self.assertGreater(len(python_files), 0, "No Python files found in dulwich/")
  154. # Files allowed to use os.environ
  155. allowed_files = {
  156. "dulwich/cli.py",
  157. "dulwich/porcelain/",
  158. }
  159. # Add exception files
  160. allowed_files.update(OS_ENVIRON_EXCEPTIONS)
  161. files_with_violations = []
  162. # Pattern to match os.environ usage
  163. os_environ_pattern = re.compile(r"\bos\.environ\b")
  164. for file_path, rel_path in python_files:
  165. # Convert to forward slashes for consistency
  166. rel_path_str = str(rel_path).replace(os.sep, "/")
  167. # Skip allowed files
  168. if any(rel_path_str.startswith(f) for f in allowed_files):
  169. continue
  170. with open(file_path, encoding="utf-8") as f:
  171. content = f.read()
  172. matches = os_environ_pattern.findall(content)
  173. if matches:
  174. # Count occurrences
  175. line_numbers = []
  176. for line_num, line in enumerate(content.split("\n"), 1):
  177. if os_environ_pattern.search(line):
  178. line_numbers.append(line_num)
  179. files_with_violations.append(
  180. f"{rel_path_str}: os.environ used on line(s) {', '.join(map(str, line_numbers))}"
  181. )
  182. if files_with_violations:
  183. self.fail(
  184. "The following files use os.environ but are not in the allowed list:\n"
  185. + "\n".join(f" - {f}" for f in files_with_violations)
  186. + "\n\nFiles allowed to use os.environ:\n"
  187. + "\n".join(f" - {f}" for f in sorted(allowed_files))
  188. )