greenthreads.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. # greenthreads.py -- Utility module for querying an ObjectStore with gevent
  2. # Copyright (C) 2013 eNovance SAS <licensing@enovance.com>
  3. #
  4. # Author: Fabien Boucher <fabien.boucher@enovance.com>
  5. #
  6. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  7. # General Public License as public by the Free Software Foundation; version 2.0
  8. # or (at your option) any later version. You can redistribute it and/or
  9. # modify it under the terms of either of these two licenses.
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. #
  17. # You should have received a copy of the licenses; if not, see
  18. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  19. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  20. # License, Version 2.0.
  21. #
  22. """Utility module for querying an ObjectStore with gevent."""
  23. import gevent
  24. from gevent import pool
  25. from dulwich.objects import (
  26. Commit,
  27. Tag,
  28. )
  29. from dulwich.object_store import (
  30. MissingObjectFinder,
  31. _collect_filetree_revs,
  32. ObjectStoreIterator,
  33. )
  34. def _split_commits_and_tags(obj_store, lst,
  35. ignore_unknown=False, pool=None):
  36. """Split object id list into two list with commit SHA1s and tag SHA1s.
  37. Same implementation as object_store._split_commits_and_tags
  38. except we use gevent to parallelize object retrieval.
  39. """
  40. commits = set()
  41. tags = set()
  42. def find_commit_type(sha):
  43. try:
  44. o = obj_store[sha]
  45. except KeyError:
  46. if not ignore_unknown:
  47. raise
  48. else:
  49. if isinstance(o, Commit):
  50. commits.add(sha)
  51. elif isinstance(o, Tag):
  52. tags.add(sha)
  53. commits.add(o.object[1])
  54. else:
  55. raise KeyError('Not a commit or a tag: %s' % sha)
  56. jobs = [pool.spawn(find_commit_type, s) for s in lst]
  57. gevent.joinall(jobs)
  58. return (commits, tags)
  59. class GreenThreadsMissingObjectFinder(MissingObjectFinder):
  60. """Find the objects missing from another object store.
  61. Same implementation as object_store.MissingObjectFinder
  62. except we use gevent to parallelize object retrieval.
  63. """
  64. def __init__(self, object_store, haves, wants,
  65. progress=None, get_tagged=None,
  66. concurrency=1, get_parents=None):
  67. def collect_tree_sha(sha):
  68. self.sha_done.add(sha)
  69. cmt = object_store[sha]
  70. _collect_filetree_revs(object_store, cmt.tree, self.sha_done)
  71. self.object_store = object_store
  72. p = pool.Pool(size=concurrency)
  73. have_commits, have_tags = \
  74. _split_commits_and_tags(object_store, haves,
  75. True, p)
  76. want_commits, want_tags = \
  77. _split_commits_and_tags(object_store, wants,
  78. False, p)
  79. all_ancestors = object_store._collect_ancestors(have_commits)[0]
  80. missing_commits, common_commits = \
  81. object_store._collect_ancestors(want_commits, all_ancestors)
  82. self.sha_done = set()
  83. jobs = [p.spawn(collect_tree_sha, c) for c in common_commits]
  84. gevent.joinall(jobs)
  85. for t in have_tags:
  86. self.sha_done.add(t)
  87. missing_tags = want_tags.difference(have_tags)
  88. wants = missing_commits.union(missing_tags)
  89. self.objects_to_send = set([(w, None, False) for w in wants])
  90. if progress is None:
  91. self.progress = lambda x: None
  92. else:
  93. self.progress = progress
  94. self._tagged = get_tagged and get_tagged() or {}
  95. class GreenThreadsObjectStoreIterator(ObjectStoreIterator):
  96. """ObjectIterator that works on top of an ObjectStore.
  97. Same implementation as object_store.ObjectStoreIterator
  98. except we use gevent to parallelize object retrieval.
  99. """
  100. def __init__(self, store, shas, finder, concurrency=1):
  101. self.finder = finder
  102. self.p = pool.Pool(size=concurrency)
  103. super(GreenThreadsObjectStoreIterator, self).__init__(store, shas)
  104. def retrieve(self, args):
  105. sha, path = args
  106. return self.store[sha], path
  107. def __iter__(self):
  108. for sha, path in self.p.imap_unordered(self.retrieve,
  109. self.itershas()):
  110. yield sha, path
  111. def __len__(self):
  112. if len(self._shas) > 0:
  113. return len(self._shas)
  114. while len(self.finder.objects_to_send):
  115. jobs = []
  116. for _ in range(0, len(self.finder.objects_to_send)):
  117. jobs.append(self.p.spawn(self.finder.next))
  118. gevent.joinall(jobs)
  119. for j in jobs:
  120. if j.value is not None:
  121. self._shas.append(j.value)
  122. return len(self._shas)