greenthreads.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. # greenthreads.py -- Utility module for querying an ObjectStore with gevent
  2. # Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
  3. #
  4. # Author: Fabien Boucher <fabien.boucher@enovance.com>
  5. #
  6. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  7. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  8. # General Public License as public by the Free Software Foundation; version 2.0
  9. # or (at your option) any later version. You can redistribute it and/or
  10. # modify it under the terms of either of these two licenses.
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17. #
  18. # You should have received a copy of the licenses; if not, see
  19. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  20. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  21. # License, Version 2.0.
  22. #
  23. """Utility module for querying an ObjectStore with gevent."""
  24. from typing import Optional
  25. import gevent
  26. from gevent import pool
  27. from .object_store import (
  28. MissingObjectFinder,
  29. _collect_ancestors,
  30. _collect_filetree_revs,
  31. )
  32. from .objects import Commit, ObjectID, Tag
  33. def _split_commits_and_tags(obj_store, lst, *, ignore_unknown=False, pool=None):
  34. """Split object id list into two list with commit SHA1s and tag SHA1s.
  35. Same implementation as object_store._split_commits_and_tags
  36. except we use gevent to parallelize object retrieval.
  37. """
  38. commits = set()
  39. tags = set()
  40. def find_commit_type(sha) -> None:
  41. try:
  42. o = obj_store[sha]
  43. except KeyError:
  44. if not ignore_unknown:
  45. raise
  46. else:
  47. if isinstance(o, Commit):
  48. commits.add(sha)
  49. elif isinstance(o, Tag):
  50. tags.add(sha)
  51. commits.add(o.object[1])
  52. else:
  53. raise KeyError(f"Not a commit or a tag: {sha}")
  54. jobs = [pool.spawn(find_commit_type, s) for s in lst]
  55. gevent.joinall(jobs)
  56. return (commits, tags)
  57. class GreenThreadsMissingObjectFinder(MissingObjectFinder):
  58. """Find the objects missing from another object store.
  59. Same implementation as object_store.MissingObjectFinder
  60. except we use gevent to parallelize object retrieval.
  61. """
  62. def __init__(
  63. self,
  64. object_store,
  65. haves,
  66. wants,
  67. progress=None,
  68. get_tagged=None,
  69. concurrency=1,
  70. get_parents=None,
  71. ) -> None:
  72. def collect_tree_sha(sha) -> None:
  73. self.sha_done.add(sha)
  74. cmt = object_store[sha]
  75. _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
  76. self.object_store = object_store
  77. p = pool.Pool(size=concurrency)
  78. have_commits, have_tags = _split_commits_and_tags(
  79. object_store, haves, ignore_unknown=True, pool=p
  80. )
  81. want_commits, want_tags = _split_commits_and_tags(
  82. object_store, wants, ignore_unknown=False, pool=p
  83. )
  84. all_ancestors: frozenset[ObjectID] = frozenset(
  85. _collect_ancestors(object_store, have_commits)[0]
  86. )
  87. missing_commits, common_commits = _collect_ancestors(
  88. object_store, want_commits, all_ancestors
  89. )
  90. self.sha_done = set()
  91. jobs = [p.spawn(collect_tree_sha, c) for c in common_commits]
  92. gevent.joinall(jobs)
  93. for t in have_tags:
  94. self.sha_done.add(t)
  95. missing_tags = want_tags.difference(have_tags)
  96. wants = missing_commits.union(missing_tags)
  97. self.objects_to_send: set[
  98. tuple[ObjectID, Optional[bytes], Optional[int], bool]
  99. ] = {(w, None, 0, False) for w in wants}
  100. if progress is None:
  101. self.progress = lambda x: None
  102. else:
  103. self.progress = progress
  104. self._tagged = get_tagged and get_tagged() or {}