test_partial_clone.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. # test_partial_clone.py -- Compatibility tests for partial clone.
  2. # Copyright (C) 2024 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Compatibility tests for partial clone support."""
  22. import os
  23. import shutil
  24. import sys
  25. import tempfile
  26. import threading
  27. from dulwich.objects import Blob, Tree
  28. from dulwich.repo import Repo
  29. from dulwich.server import DictBackend, TCPGitServer
  30. from dulwich.tests.utils import make_commit
  31. from .. import skipIf
  32. from .utils import CompatTestCase, require_git_version, run_git_or_fail
  33. @skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
  34. class PartialCloneServerTestCase(CompatTestCase):
  35. """Tests for partial clone server compatibility with git client."""
  36. protocol = "git"
  37. # Partial clone support was introduced in git 2.17.0
  38. min_git_version = (2, 17, 0)
  39. def setUp(self) -> None:
  40. super().setUp()
  41. require_git_version(self.min_git_version)
  42. def _start_server(self, repo):
  43. backend = DictBackend({b"/": repo})
  44. dul_server = TCPGitServer(backend, b"localhost", 0)
  45. # Start server in a thread
  46. server_thread = threading.Thread(target=dul_server.serve)
  47. server_thread.daemon = True
  48. server_thread.start()
  49. # Add cleanup
  50. def cleanup_server():
  51. dul_server.shutdown()
  52. dul_server.server_close()
  53. server_thread.join(timeout=1.0)
  54. self.addCleanup(cleanup_server)
  55. self._server = dul_server
  56. _, port = self._server.socket.getsockname()
  57. return port
  58. def url(self, port) -> str:
  59. return f"{self.protocol}://localhost:{port}/"
  60. def test_clone_with_blob_none_filter(self) -> None:
  61. """Test that git client can clone with blob:none filter."""
  62. # Create repository with dulwich
  63. repo_path = tempfile.mkdtemp()
  64. self.addCleanup(shutil.rmtree, repo_path)
  65. source_repo = Repo.init(repo_path, mkdir=False)
  66. # Create test content with multiple blobs
  67. blob1 = Blob.from_string(b"File 1 content - this is a test file")
  68. blob2 = Blob.from_string(b"File 2 content - another test file")
  69. blob3 = Blob.from_string(b"File 3 content - third test file")
  70. tree = Tree()
  71. tree.add(b"file1.txt", 0o100644, blob1.id)
  72. tree.add(b"file2.txt", 0o100644, blob2.id)
  73. tree.add(b"file3.txt", 0o100644, blob3.id)
  74. # Add objects to repo
  75. source_repo.object_store.add_object(blob1)
  76. source_repo.object_store.add_object(blob2)
  77. source_repo.object_store.add_object(blob3)
  78. source_repo.object_store.add_object(tree)
  79. commit = make_commit(tree=tree.id, message=b"Test commit with multiple files")
  80. source_repo.object_store.add_object(commit)
  81. source_repo.refs[b"refs/heads/master"] = commit.id
  82. # Start dulwich server
  83. port = self._start_server(source_repo)
  84. # Clone with blob:none filter
  85. clone_path = tempfile.mkdtemp()
  86. self.addCleanup(shutil.rmtree, clone_path)
  87. clone_dir = os.path.join(clone_path, "cloned_repo")
  88. run_git_or_fail(
  89. ["clone", "--filter=blob:none", self.url(port), clone_dir],
  90. cwd=clone_path,
  91. )
  92. # Verify cloned repo has commit and tree but no blobs
  93. cloned_repo = Repo(clone_dir)
  94. self.addCleanup(cloned_repo.close)
  95. # Commit should exist
  96. self.assertEqual(cloned_repo.refs[b"refs/heads/master"], commit.id)
  97. # Tree should exist
  98. self.assertIn(tree.id, cloned_repo.object_store)
  99. # Blobs should NOT be in object store (filtered out)
  100. # Note: git may still have the blobs if they're small enough to be inlined
  101. # or if it fetched them anyway, so we just verify the filter was accepted
  102. # Verify git recognizes this as a partial clone
  103. config_output = run_git_or_fail(
  104. ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
  105. )
  106. self.assertEqual(config_output.strip(), b"true")
  107. source_repo.close()
  108. def test_clone_with_blob_limit_filter(self) -> None:
  109. """Test that git client can clone with blob:limit filter."""
  110. # Create repository
  111. repo_path = tempfile.mkdtemp()
  112. self.addCleanup(shutil.rmtree, repo_path)
  113. source_repo = Repo.init(repo_path, mkdir=False)
  114. # Create blobs of different sizes
  115. small_blob = Blob.from_string(b"small") # 5 bytes
  116. large_blob = Blob.from_string(b"x" * 1000) # 1000 bytes
  117. tree = Tree()
  118. tree.add(b"small.txt", 0o100644, small_blob.id)
  119. tree.add(b"large.txt", 0o100644, large_blob.id)
  120. source_repo.object_store.add_object(small_blob)
  121. source_repo.object_store.add_object(large_blob)
  122. source_repo.object_store.add_object(tree)
  123. commit = make_commit(tree=tree.id, message=b"Test commit with mixed sizes")
  124. source_repo.object_store.add_object(commit)
  125. source_repo.refs[b"refs/heads/master"] = commit.id
  126. # Start server
  127. port = self._start_server(source_repo)
  128. # Clone with blob:limit=100 filter (should exclude large blob)
  129. clone_path = tempfile.mkdtemp()
  130. self.addCleanup(shutil.rmtree, clone_path)
  131. clone_dir = os.path.join(clone_path, "cloned_repo")
  132. run_git_or_fail(
  133. ["clone", "--filter=blob:limit=100", self.url(port), clone_dir],
  134. cwd=clone_path,
  135. )
  136. # Verify it's a partial clone
  137. cloned_repo = Repo(clone_dir)
  138. self.addCleanup(cloned_repo.close)
  139. config_output = run_git_or_fail(
  140. ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
  141. )
  142. self.assertEqual(config_output.strip(), b"true")
  143. source_repo.close()
  144. def test_clone_with_tree_depth_filter(self) -> None:
  145. """Test that git client can clone with tree:0 filter."""
  146. # Create repository with nested structure
  147. repo_path = tempfile.mkdtemp()
  148. self.addCleanup(shutil.rmtree, repo_path)
  149. source_repo = Repo.init(repo_path, mkdir=False)
  150. # Create nested tree structure
  151. blob1 = Blob.from_string(b"root file")
  152. blob2 = Blob.from_string(b"nested file")
  153. inner_tree = Tree()
  154. inner_tree.add(b"nested.txt", 0o100644, blob2.id)
  155. outer_tree = Tree()
  156. outer_tree.add(b"root.txt", 0o100644, blob1.id)
  157. outer_tree.add(b"subdir", 0o040000, inner_tree.id)
  158. source_repo.object_store.add_object(blob1)
  159. source_repo.object_store.add_object(blob2)
  160. source_repo.object_store.add_object(inner_tree)
  161. source_repo.object_store.add_object(outer_tree)
  162. commit = make_commit(tree=outer_tree.id, message=b"Test nested structure")
  163. source_repo.object_store.add_object(commit)
  164. source_repo.refs[b"refs/heads/master"] = commit.id
  165. # Start server
  166. port = self._start_server(source_repo)
  167. # Clone with tree:0 filter
  168. clone_path = tempfile.mkdtemp()
  169. self.addCleanup(shutil.rmtree, clone_path)
  170. clone_dir = os.path.join(clone_path, "cloned_repo")
  171. run_git_or_fail(
  172. ["clone", "--filter=tree:0", self.url(port), clone_dir],
  173. cwd=clone_path,
  174. )
  175. # Verify it's a partial clone
  176. cloned_repo = Repo(clone_dir)
  177. self.addCleanup(cloned_repo.close)
  178. config_output = run_git_or_fail(
  179. ["config", "--get", "remote.origin.promisor"], cwd=clone_dir
  180. )
  181. self.assertEqual(config_output.strip(), b"true")
  182. source_repo.close()
  183. @skipIf(sys.platform == "win32", "Broken on windows, with very long fail time.")
  184. class PartialCloneClientTestCase(CompatTestCase):
  185. """Tests for partial clone client compatibility with git server."""
  186. # Partial clone support was introduced in git 2.17.0
  187. min_git_version = (2, 17, 0)
  188. def setUp(self) -> None:
  189. super().setUp()
  190. require_git_version(self.min_git_version)
  191. def test_fetch_with_blob_none_filter(self) -> None:
  192. """Test that dulwich client can fetch with blob:none filter."""
  193. from dulwich.client import get_transport_and_path
  194. # Create a git repository using git itself
  195. repo_path = tempfile.mkdtemp()
  196. self.addCleanup(shutil.rmtree, repo_path)
  197. # Initialize with git
  198. run_git_or_fail(["init"], cwd=repo_path)
  199. run_git_or_fail(["config", "user.name", "Test User"], cwd=repo_path)
  200. run_git_or_fail(["config", "user.email", "test@example.com"], cwd=repo_path)
  201. # Create test files
  202. file1 = os.path.join(repo_path, "file1.txt")
  203. with open(file1, "wb") as f:
  204. f.write(b"Content of file 1")
  205. file2 = os.path.join(repo_path, "file2.txt")
  206. with open(file2, "wb") as f:
  207. f.write(b"Content of file 2")
  208. # Commit files
  209. run_git_or_fail(["add", "."], cwd=repo_path)
  210. run_git_or_fail(["commit", "-m", "Initial commit"], cwd=repo_path)
  211. # Start git daemon
  212. daemon_port = self._start_git_daemon(repo_path)
  213. # Create destination repo
  214. dest_path = tempfile.mkdtemp()
  215. self.addCleanup(shutil.rmtree, dest_path)
  216. dest_repo = Repo.init(dest_path, mkdir=False)
  217. self.addCleanup(dest_repo.close)
  218. # Fetch with blob:none filter using dulwich client
  219. client, path = get_transport_and_path(
  220. f"git://localhost:{daemon_port}/",
  221. thin_packs=False,
  222. )
  223. def determine_wants(refs, depth=None):
  224. # Get all refs
  225. return list(refs.values())
  226. # Fetch with filter
  227. result = client.fetch(
  228. path,
  229. dest_repo,
  230. determine_wants=determine_wants,
  231. progress=None,
  232. filter_spec=b"blob:none",
  233. )
  234. # The fetch should succeed with partial clone
  235. self.assertIsNotNone(result)
  236. def test_clone_with_filter(self) -> None:
  237. """Test that dulwich clone function works with filter."""
  238. from dulwich.client import get_transport_and_path
  239. # Create a git repository
  240. repo_path = tempfile.mkdtemp()
  241. self.addCleanup(shutil.rmtree, repo_path)
  242. run_git_or_fail(["init"], cwd=repo_path)
  243. run_git_or_fail(["config", "user.name", "Test User"], cwd=repo_path)
  244. run_git_or_fail(["config", "user.email", "test@example.com"], cwd=repo_path)
  245. # Create and commit a file
  246. test_file = os.path.join(repo_path, "test.txt")
  247. with open(test_file, "wb") as f:
  248. f.write(b"Test content for partial clone")
  249. run_git_or_fail(["add", "."], cwd=repo_path)
  250. run_git_or_fail(["commit", "-m", "Test commit"], cwd=repo_path)
  251. # Start git daemon
  252. daemon_port = self._start_git_daemon(repo_path)
  253. # Clone with dulwich using filter
  254. dest_path = tempfile.mkdtemp()
  255. self.addCleanup(shutil.rmtree, dest_path)
  256. client, path = get_transport_and_path(f"git://localhost:{daemon_port}/")
  257. # Clone with blob:limit filter
  258. cloned_repo = client.clone(
  259. path,
  260. dest_path,
  261. mkdir=False,
  262. filter_spec=b"blob:limit=100",
  263. )
  264. self.addCleanup(cloned_repo.close)
  265. # Verify clone succeeded
  266. self.assertTrue(os.path.exists(dest_path))
  267. self.assertTrue(os.path.exists(os.path.join(dest_path, ".git")))
  268. def _start_git_daemon(self, repo_path):
  269. """Start git daemon for testing."""
  270. import socket
  271. import subprocess
  272. import time
  273. # Find an available port
  274. sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
  275. sock.bind(("localhost", 0))
  276. _, port = sock.getsockname()
  277. sock.close()
  278. # Mark directory as git daemon export
  279. export_file = os.path.join(repo_path, "git-daemon-export-ok")
  280. with open(export_file, "w") as f:
  281. f.write("")
  282. # Start git daemon
  283. daemon_process = subprocess.Popen(
  284. [
  285. "git",
  286. "daemon",
  287. "--reuseaddr",
  288. f"--port={port}",
  289. "--base-path=.",
  290. "--export-all",
  291. "--enable=receive-pack",
  292. ".",
  293. ],
  294. cwd=repo_path,
  295. stdout=subprocess.PIPE,
  296. stderr=subprocess.PIPE,
  297. )
  298. # Give daemon time to start
  299. time.sleep(0.5)
  300. def cleanup_daemon():
  301. daemon_process.terminate()
  302. daemon_process.wait(timeout=2)
  303. self.addCleanup(cleanup_daemon)
  304. return port