object_store.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. # object_store.py -- Object store for git objects
  2. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. import os
  19. import tempfile
  20. import urllib2
  21. from dulwich.objects import (
  22. ShaFile,
  23. hex_to_sha,
  24. sha_to_hex,
  25. )
  26. from dulwich.pack import (
  27. Pack,
  28. PackData,
  29. iter_sha1,
  30. load_packs,
  31. load_pack_index,
  32. write_pack,
  33. write_pack_data,
  34. write_pack_index_v2,
  35. )
  36. PACKDIR = 'pack'
  37. class ObjectStore(object):
  38. """Object store."""
  39. def __init__(self, path):
  40. """Open an object store.
  41. :param path: Path of the object store.
  42. """
  43. self.path = path
  44. self._pack_cache = None
  45. self.pack_dir = os.path.join(self.path, PACKDIR)
  46. def determine_wants_all(self, refs):
  47. return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
  48. def iter_shas(self, shas):
  49. """Iterate over the objects for the specified shas.
  50. :param shas: Iterable object with SHAs
  51. """
  52. return ObjectStoreIterator(self, shas)
  53. def __contains__(self, sha):
  54. for pack in self.packs:
  55. if sha in pack:
  56. return True
  57. ret = self._get_shafile(sha)
  58. if ret is not None:
  59. return True
  60. return False
  61. @property
  62. def packs(self):
  63. """List with pack objects."""
  64. if self._pack_cache is None:
  65. self._pack_cache = list(load_packs(self.pack_dir))
  66. return self._pack_cache
  67. def _add_known_pack(self, path):
  68. """Add a newly appeared pack to the cache by path.
  69. """
  70. if self._pack_cache is not None:
  71. self._pack_cache.append(Pack(path))
  72. def _get_shafile_path(self, sha):
  73. dir = sha[:2]
  74. file = sha[2:]
  75. # Check from object dir
  76. return os.path.join(self.path, dir, file)
  77. def _get_shafile(self, sha):
  78. path = self._get_shafile_path(sha)
  79. if os.path.exists(path):
  80. return ShaFile.from_file(path)
  81. return None
  82. def _add_shafile(self, sha, o):
  83. path = self._get_shafile_path(sha)
  84. f = os.path.open(path, 'w')
  85. try:
  86. f.write(o._header())
  87. f.write(o._text)
  88. finally:
  89. f.close()
  90. def get_raw(self, name):
  91. """Obtain the raw text for an object.
  92. :param name: sha for the object.
  93. :return: tuple with object type and object contents.
  94. """
  95. if len(name) == 40:
  96. sha = hex_to_sha(name)
  97. hexsha = name
  98. elif len(name) == 20:
  99. sha = name
  100. hexsha = None
  101. else:
  102. raise AssertionError
  103. for pack in self.packs:
  104. try:
  105. return pack.get_raw(sha)
  106. except KeyError:
  107. pass
  108. if hexsha is None:
  109. hexsha = sha_to_hex(name)
  110. ret = self._get_shafile(hexsha)
  111. if ret is not None:
  112. return ret.as_raw_string()
  113. raise KeyError(hexsha)
  114. def __getitem__(self, sha):
  115. type, uncomp = self.get_raw(sha)
  116. return ShaFile.from_raw_string(type, uncomp)
  117. def move_in_thin_pack(self, path):
  118. """Move a specific file containing a pack into the pack directory.
  119. :note: The file should be on the same file system as the
  120. packs directory.
  121. :param path: Path to the pack file.
  122. """
  123. p = PackData(path)
  124. temppath = os.path.join(self.pack_dir,
  125. sha_to_hex(urllib2.randombytes(20))+".temppack")
  126. write_pack(temppath, p.iterobjects(self.get_raw), len(p))
  127. pack_sha = load_pack_index(temppath+".idx").objects_sha1()
  128. newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
  129. os.rename(temppath+".pack", newbasename+".pack")
  130. os.rename(temppath+".idx", newbasename+".idx")
  131. self._add_known_pack(newbasename)
  132. def move_in_pack(self, path):
  133. """Move a specific file containing a pack into the pack directory.
  134. :note: The file should be on the same file system as the
  135. packs directory.
  136. :param path: Path to the pack file.
  137. """
  138. p = PackData(path)
  139. entries = p.sorted_entries()
  140. basename = os.path.join(self.pack_dir,
  141. "pack-%s" % iter_sha1(entry[0] for entry in entries))
  142. write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
  143. os.rename(path, basename + ".pack")
  144. self._add_known_pack(basename)
  145. def add_thin_pack(self):
  146. """Add a new thin pack to this object store.
  147. Thin packs are packs that contain deltas with parents that exist
  148. in a different pack.
  149. """
  150. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  151. f = os.fdopen(fd, 'w')
  152. def commit():
  153. os.fsync(fd)
  154. f.close()
  155. if os.path.getsize(path) > 0:
  156. self.move_in_thin_pack(path)
  157. return f, commit
  158. def add_pack(self):
  159. """Add a new pack to this object store.
  160. :return: Fileobject to write to and a commit function to
  161. call when the pack is finished.
  162. """
  163. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  164. f = os.fdopen(fd, 'w')
  165. def commit():
  166. os.fsync(fd)
  167. f.close()
  168. if os.path.getsize(path) > 0:
  169. self.move_in_pack(path)
  170. return f, commit
  171. def add_objects(self, objects):
  172. """Add a set of objects to this object store.
  173. :param objects: Iterable over a list of objects.
  174. """
  175. if len(objects) == 0:
  176. return
  177. f, commit = self.add_pack()
  178. write_pack_data(f, objects, len(objects))
  179. commit()
  180. class ObjectImporter(object):
  181. """Interface for importing objects."""
  182. def __init__(self, count):
  183. """Create a new ObjectImporter.
  184. :param count: Number of objects that's going to be imported.
  185. """
  186. self.count = count
  187. def add_object(self, object):
  188. """Add an object."""
  189. raise NotImplementedError(self.add_object)
  190. def finish(self, object):
  191. """Finish the imoprt and write objects to disk."""
  192. raise NotImplementedError(self.finish)
  193. class ObjectIterator(object):
  194. """Interface for iterating over objects."""
  195. def iterobjects(self):
  196. raise NotImplementedError(self.iterobjects)
  197. class ObjectStoreIterator(ObjectIterator):
  198. """ObjectIterator that works on top of an ObjectStore."""
  199. def __init__(self, store, sha_iter):
  200. self.store = store
  201. self.sha_iter = sha_iter
  202. self._shas = []
  203. def __iter__(self):
  204. for sha, path in self.itershas():
  205. yield self.store[sha], path
  206. def iterobjects(self):
  207. for o, path in self:
  208. yield o
  209. def itershas(self):
  210. for sha in self._shas:
  211. yield sha
  212. for sha in self.sha_iter:
  213. self._shas.append(sha)
  214. yield sha
  215. def __contains__(self, needle):
  216. """Check if an object is present.
  217. :param needle: SHA1 of the object to check for
  218. """
  219. return needle in self.store
  220. def __getitem__(self, key):
  221. """Find an object by SHA1."""
  222. return self.store[key]
  223. def __len__(self):
  224. """Return the number of objects."""
  225. return len(list(self.itershas()))