test_partial_clone.py 17 KB


  1. # test_partial_clone.py -- Compatibility tests for partial clone.
  2. # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for partial clone support."""
  22. import os
  23. import shutil
  24. import sys
  25. import tempfile
  26. import threading
  27. from dulwich.objects import Blob, Tree
  28. from dulwich.repo import Repo
  29. from dulwich.server import DictBackend, TCPGitServer
  30. from dulwich.tests.utils import make_commit
  31. from .. import skipIf
  32. from .utils import CompatTestCase, require_git_version, run_git_or_fail
  33. @skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
  34. class PartialCloneServerTestCase(CompatTestCase):
  35. """Tests for partial clone server compatibility with git client."""
  36. protocol = "git"
  37. # Partial clone support was introduced in git 2.17.0
  38. min_git_version = (2, 17, 0)
  39. def setUp(self) -> None:
  40. super().setUp()
  41. require_git_version(self.min_git_version)
  42. def _start_server(self, repo):
  43. backend = DictBackend({b"/": repo})
  44. dul_server = TCPGitServer(backend, b"localhost", 0)
  45. # Start server in a thread
  46. server_thread = threading.Thread(target=dul_server.serve)
  47. server_thread.daemon = True
  48. server_thread.start()
  49. # Add cleanup
  50. def cleanup_server():
  51. dul_server.shutdown()
  52. dul_server.server_close()
  53. server_thread.join(timeout=1.0)
  54. self.addCleanup(cleanup_server)
  55. self._server = dul_server
  56. _, port = self._server.socket.getsockname()
  57. return port
  58. def url(self, port) -> str:
  59. return f"{self.protocol}://localhost:{port}/"
  60. def test_clone_with_blob_none_filter(self) -> None:
  61. """Test that git client can clone with blob:none filter."""
  62. # Create repository with dulwich
  63. repo_path = tempfile.mkdtemp()
  64. self.addCleanup(shutil.rmtree, repo_path)
  65. source_repo = Repo.init(repo_path, mkdir=False)
  66. # Create test content with multiple blobs
  67. blob1 = Blob.from_string(b"File 1 content - this is a test file")
  68. blob2 = Blob.from_string(b"File 2 content - another test file")
  69. blob3 = Blob.from_string(b"File 3 content - third test file")
  70. tree = Tree()
  71. tree.add(b"file1.txt", 0o100644, blob1.id)
  72. tree.add(b"file2.txt", 0o100644, blob2.id)
  73. tree.add(b"file3.txt", 0o100644, blob3.id)
  74. # Add objects to repo
  75. source_repo.object_store.add_object(blob1)
  76. source_repo.object_store.add_object(blob2)
  77. source_repo.object_store.add_object(blob3)
  78. source_repo.object_store.add_object(tree)
  79. commit = make_commit(tree=tree.id, message=b"Test commit with multiple files")
  80. source_repo.object_store.add_object(commit)
  81. source_repo.refs[b"refs/heads/master"] = commit.id
  82. # Start dulwich server
  83. port = self._start_server(source_repo)
  84. # Clone with blob:none filter
  85. clone_path = tempfile.mkdtemp()
  86. self.addCleanup(shutil.rmtree, clone_path)
  87. clone_dir = os.path.join(clone_path, "cloned_repo")
  88. run_git_or_fail(
  89. ["clone", "--filter=blob:none", "--no-checkout", self.url(port), clone_dir],
  90. cwd=clone_path,
  91. )
  92. # Verify cloned repo has commit and tree but no blobs
  93. cloned_repo = Repo(clone_dir)
  94. self.addCleanup(cloned_repo.close)
  95. # Commit should exist
  96. self.assertEqual(cloned_repo.refs[b"refs/heads/master"], commit.id)
  97. # Tree should exist
  98. self.assertIn(tree.id, cloned_repo.object_store)
  99. # Blobs should NOT be in object store (filtered out)
  100. # Note: git may still have the blobs if they're small enough to be inlined
  101. # or if it fetched them anyway, so we just verify the filter was accepted
  102. # Verify git recognizes this as a partial clone
  103. config_output = run_git_or_fail(
  104. ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
  105. )
  106. self.assertEqual(config_output.strip(), b"true")
  107. source_repo.close()
  108. def test_clone_with_blob_limit_filter(self) -> None:
  109. """Test that git client can clone with blob:limit filter."""
  110. # Create repository
  111. repo_path = tempfile.mkdtemp()
  112. self.addCleanup(shutil.rmtree, repo_path)
  113. source_repo = Repo.init(repo_path, mkdir=False)
  114. # Create blobs of different sizes
  115. small_blob = Blob.from_string(b"small") # 5 bytes
  116. large_blob = Blob.from_string(b"x" * 1000) # 1000 bytes
  117. tree = Tree()
  118. tree.add(b"small.txt", 0o100644, small_blob.id)
  119. tree.add(b"large.txt", 0o100644, large_blob.id)
  120. source_repo.object_store.add_object(small_blob)
  121. source_repo.object_store.add_object(large_blob)
  122. source_repo.object_store.add_object(tree)
  123. commit = make_commit(tree=tree.id, message=b"Test commit with mixed sizes")
  124. source_repo.object_store.add_object(commit)
  125. source_repo.refs[b"refs/heads/master"] = commit.id
  126. # Start server
  127. port = self._start_server(source_repo)
  128. # Clone with blob:limit=100 filter (should exclude large blob)
  129. clone_path = tempfile.mkdtemp()
  130. self.addCleanup(shutil.rmtree, clone_path)
  131. clone_dir = os.path.join(clone_path, "cloned_repo")
  132. run_git_or_fail(
  133. [
  134. "clone",
  135. "--filter=blob:limit=100",
  136. "--no-checkout",
  137. self.url(port),
  138. clone_dir,
  139. ],
  140. cwd=clone_path,
  141. )
  142. # Verify it's a partial clone
  143. cloned_repo = Repo(clone_dir)
  144. self.addCleanup(cloned_repo.close)
  145. config_output = run_git_or_fail(
  146. ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
  147. )
  148. self.assertEqual(config_output.strip(), b"true")
  149. source_repo.close()
  150. def test_clone_with_tree_depth_filter(self) -> None:
  151. """Test that git client can clone with tree:0 filter."""
  152. # Create repository with nested structure
  153. repo_path = tempfile.mkdtemp()
  154. self.addCleanup(shutil.rmtree, repo_path)
  155. source_repo = Repo.init(repo_path, mkdir=False)
  156. # Create nested tree structure
  157. blob1 = Blob.from_string(b"root file")
  158. blob2 = Blob.from_string(b"nested file")
  159. inner_tree = Tree()
  160. inner_tree.add(b"nested.txt", 0o100644, blob2.id)
  161. outer_tree = Tree()
  162. outer_tree.add(b"root.txt", 0o100644, blob1.id)
  163. outer_tree.add(b"subdir", 0o040000, inner_tree.id)
  164. source_repo.object_store.add_object(blob1)
  165. source_repo.object_store.add_object(blob2)
  166. source_repo.object_store.add_object(inner_tree)
  167. source_repo.object_store.add_object(outer_tree)
  168. commit = make_commit(tree=outer_tree.id, message=b"Test nested structure")
  169. source_repo.object_store.add_object(commit)
  170. source_repo.refs[b"refs/heads/master"] = commit.id
  171. # Start server
  172. port = self._start_server(source_repo)
  173. # Clone with tree:0 filter
  174. clone_path = tempfile.mkdtemp()
  175. self.addCleanup(shutil.rmtree, clone_path)
  176. clone_dir = os.path.join(clone_path, "cloned_repo")
  177. run_git_or_fail(
  178. ["clone", "--filter=tree:0", "--no-checkout", self.url(port), clone_dir],
  179. cwd=clone_path,
  180. )
  181. # Verify it's a partial clone
  182. cloned_repo = Repo(clone_dir)
  183. self.addCleanup(cloned_repo.close)
  184. config_output = run_git_or_fail(
  185. ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
  186. )
  187. self.assertEqual(config_output.strip(), b"true")
  188. source_repo.close()
  189. def test_clone_with_filter_protocol_v0(self) -> None:
  190. """Test that git client can clone with filter using protocol v0."""
  191. # Create repository with dulwich
  192. repo_path = tempfile.mkdtemp()
  193. self.addCleanup(shutil.rmtree, repo_path)
  194. source_repo = Repo.init(repo_path, mkdir=False)
  195. # Create test content
  196. blob = Blob.from_string(b"test content")
  197. tree = Tree()
  198. tree.add(b"file.txt", 0o100644, blob.id)
  199. source_repo.object_store.add_object(blob)
  200. source_repo.object_store.add_object(tree)
  201. commit = make_commit(tree=tree.id, message=b"Test commit")
  202. source_repo.object_store.add_object(commit)
  203. source_repo.refs[b"refs/heads/master"] = commit.id
  204. # Start server
  205. port = self._start_server(source_repo)
  206. # Clone with protocol v0 and blob:none filter
  207. clone_path = tempfile.mkdtemp()
  208. self.addCleanup(shutil.rmtree, clone_path)
  209. clone_dir = os.path.join(clone_path, "cloned_repo")
  210. run_git_or_fail(
  211. [
  212. "-c",
  213. "protocol.version=0",
  214. "clone",
  215. "--filter=blob:none",
  216. "--no-checkout",
  217. self.url(port),
  218. clone_dir,
  219. ],
  220. cwd=clone_path,
  221. )
  222. # Verify partial clone
  223. cloned_repo = Repo(clone_dir)
  224. self.addCleanup(cloned_repo.close)
  225. self.assertIn(commit.id, cloned_repo.object_store)
  226. self.assertIn(tree.id, cloned_repo.object_store)
  227. source_repo.close()
  228. def test_clone_with_filter_protocol_v2(self) -> None:
  229. """Test that git client can clone with filter using protocol v2."""
  230. # Create repository with dulwich
  231. repo_path = tempfile.mkdtemp()
  232. self.addCleanup(shutil.rmtree, repo_path)
  233. source_repo = Repo.init(repo_path, mkdir=False)
  234. # Create test content
  235. blob = Blob.from_string(b"test content")
  236. tree = Tree()
  237. tree.add(b"file.txt", 0o100644, blob.id)
  238. source_repo.object_store.add_object(blob)
  239. source_repo.object_store.add_object(tree)
  240. commit = make_commit(tree=tree.id, message=b"Test commit")
  241. source_repo.object_store.add_object(commit)
  242. source_repo.refs[b"refs/heads/master"] = commit.id
  243. # Start server
  244. port = self._start_server(source_repo)
  245. # Clone with protocol v2 and blob:none filter
  246. clone_path = tempfile.mkdtemp()
  247. self.addCleanup(shutil.rmtree, clone_path)
  248. clone_dir = os.path.join(clone_path, "cloned_repo")
  249. run_git_or_fail(
  250. [
  251. "-c",
  252. "protocol.version=2",
  253. "clone",
  254. "--filter=blob:none",
  255. "--no-checkout",
  256. self.url(port),
  257. clone_dir,
  258. ],
  259. cwd=clone_path,
  260. )
  261. # Verify partial clone
  262. cloned_repo = Repo(clone_dir)
  263. self.addCleanup(cloned_repo.close)
  264. self.assertIn(commit.id, cloned_repo.object_store)
  265. self.assertIn(tree.id, cloned_repo.object_store)
  266. source_repo.close()
  267. @skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
  268. class PartialCloneClientTestCase(CompatTestCase):
  269. """Tests for partial clone client compatibility with git server."""
  270. # Partial clone support was introduced in git 2.17.0
  271. min_git_version = (2, 17, 0)
  272. def setUp(self) -> None:
  273. super().setUp()
  274. require_git_version(self.min_git_version)
  275. def test_fetch_with_blob_none_filter(self) -> None:
  276. """Test that dulwich client can fetch with blob:none filter."""
  277. from dulwich.client import get_transport_and_path
  278. # Create a git repository using git itself
  279. repo_path = tempfile.mkdtemp()
  280. self.addCleanup(shutil.rmtree, repo_path)
  281. # Initialize with git
  282. run_git_or_fail(["init"], cwd=repo_path)
  283. run_git_or_fail(["config", "user.name", "Test User"], cwd=repo_path)
  284. run_git_or_fail(["config", "user.email", "test@example.com"], cwd=repo_path)
  285. # Create test files
  286. file1 = os.path.join(repo_path, "file1.txt")
  287. with open(file1, "wb") as f:
  288. f.write(b"Content of file 1")
  289. file2 = os.path.join(repo_path, "file2.txt")
  290. with open(file2, "wb") as f:
  291. f.write(b"Content of file 2")
  292. # Commit files
  293. run_git_or_fail(["add", "."], cwd=repo_path)
  294. run_git_or_fail(["commit", "-m", "Initial commit"], cwd=repo_path)
  295. # Start git daemon
  296. daemon_port = self._start_git_daemon(repo_path)
  297. # Create destination repo
  298. dest_path = tempfile.mkdtemp()
  299. self.addCleanup(shutil.rmtree, dest_path)
  300. dest_repo = Repo.init(dest_path, mkdir=False)
  301. self.addCleanup(dest_repo.close)
  302. # Fetch with blob:none filter using dulwich client
  303. client, path = get_transport_and_path(
  304. f"git://localhost:{daemon_port}/",
  305. thin_packs=False,
  306. )
  307. def determine_wants(refs, depth=None):
  308. # Get all refs
  309. return list(refs.values())
  310. # Fetch with filter
  311. result = client.fetch(
  312. path,
  313. dest_repo,
  314. determine_wants=determine_wants,
  315. progress=None,
  316. filter_spec=b"blob:none",
  317. )
  318. # The fetch should succeed with partial clone
  319. self.assertIsNotNone(result)
  320. def test_clone_with_filter(self) -> None:
  321. """Test that dulwich clone function works with filter."""
  322. from dulwich.client import get_transport_and_path
  323. # Create a git repository
  324. repo_path = tempfile.mkdtemp()
  325. self.addCleanup(shutil.rmtree, repo_path)
  326. run_git_or_fail(["init"], cwd=repo_path)
  327. run_git_or_fail(["config", "user.name", "Test User"], cwd=repo_path)
  328. run_git_or_fail(["config", "user.email", "test@example.com"], cwd=repo_path)
  329. # Create and commit a file
  330. test_file = os.path.join(repo_path, "test.txt")
  331. with open(test_file, "wb") as f:
  332. f.write(b"Test content for partial clone")
  333. run_git_or_fail(["add", "."], cwd=repo_path)
  334. run_git_or_fail(["commit", "-m", "Test commit"], cwd=repo_path)
  335. # Start git daemon
  336. daemon_port = self._start_git_daemon(repo_path)
  337. # Clone with dulwich using filter
  338. dest_path = tempfile.mkdtemp()
  339. self.addCleanup(shutil.rmtree, dest_path)
  340. client, path = get_transport_and_path(f"git://localhost:{daemon_port}/")
  341. # Clone with blob:limit filter
  342. cloned_repo = client.clone(
  343. path,
  344. dest_path,
  345. mkdir=False,
  346. filter_spec=b"blob:limit=100",
  347. )
  348. self.addCleanup(cloned_repo.close)
  349. # Verify clone succeeded
  350. self.assertTrue(os.path.exists(dest_path))
  351. self.assertTrue(os.path.exists(os.path.join(dest_path, ".git")))
  352. def _start_git_daemon(self, repo_path):
  353. """Start git daemon for testing."""
  354. import socket
  355. import subprocess
  356. import time
  357. # Find an available port
  358. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  359. sock.bind(("localhost", 0))
  360. _, port = sock.getsockname()
  361. sock.close()
  362. # Mark directory as git daemon export
  363. export_file = os.path.join(repo_path, "git-daemon-export-ok")
  364. with open(export_file, "w") as f:
  365. f.write("")
  366. # Start git daemon
  367. daemon_process = subprocess.Popen(
  368. [
  369. "git",
  370. "daemon",
  371. "--reuseaddr",
  372. f"--port={port}",
  373. "--base-path=.",
  374. "--export-all",
  375. "--enable=receive-pack",
  376. ".",
  377. ],
  378. cwd=repo_path,
  379. stdout=subprocess.PIPE,
  380. stderr=subprocess.PIPE,
  381. )
  382. # Give daemon time to start
  383. time.sleep(0.5)
  384. def cleanup_daemon():
  385. daemon_process.terminate()
  386. daemon_process.wait(timeout=2)
  387. self.addCleanup(cleanup_daemon)
  388. return port