object_store.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. # object_store.py -- Object store for git objects
  2. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. import itertools
  19. import os
  20. import tempfile
  21. import urllib2
  22. from dulwich.errors import (
  23. NotTreeError,
  24. )
  25. from dulwich.objects import (
  26. ShaFile,
  27. Tree,
  28. hex_to_sha,
  29. sha_to_hex,
  30. )
  31. from dulwich.pack import (
  32. Pack,
  33. PackData,
  34. iter_sha1,
  35. load_packs,
  36. load_pack_index,
  37. write_pack,
  38. write_pack_data,
  39. write_pack_index_v2,
  40. )
  41. PACKDIR = 'pack'
  42. class ObjectStore(object):
  43. """Object store."""
  44. def __init__(self, path):
  45. """Open an object store.
  46. :param path: Path of the object store.
  47. """
  48. self.path = path
  49. self._pack_cache = None
  50. self.pack_dir = os.path.join(self.path, PACKDIR)
  51. def determine_wants_all(self, refs):
  52. return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
  53. def iter_shas(self, shas):
  54. """Iterate over the objects for the specified shas.
  55. :param shas: Iterable object with SHAs
  56. """
  57. return ObjectStoreIterator(self, shas)
  58. def __contains__(self, sha):
  59. for pack in self.packs:
  60. if sha in pack:
  61. return True
  62. ret = self._get_shafile(sha)
  63. if ret is not None:
  64. return True
  65. return False
  66. def __iter__(self):
  67. iterables = self.packs + [self._iter_shafile_shas()]
  68. return itertools.chain(*iterables)
  69. @property
  70. def packs(self):
  71. """List with pack objects."""
  72. if self._pack_cache is None:
  73. self._pack_cache = list(load_packs(self.pack_dir))
  74. return self._pack_cache
  75. def _add_known_pack(self, path):
  76. """Add a newly appeared pack to the cache by path.
  77. """
  78. if self._pack_cache is not None:
  79. self._pack_cache.append(Pack(path))
  80. def _get_shafile_path(self, sha):
  81. dir = sha[:2]
  82. file = sha[2:]
  83. # Check from object dir
  84. return os.path.join(self.path, dir, file)
  85. def _iter_shafile_shas(self):
  86. for base in os.listdir(self.path):
  87. if len(base) != 2:
  88. continue
  89. for rest in os.listdir(os.path.join(self.path, base)):
  90. yield base+rest
  91. def _get_shafile(self, sha):
  92. path = self._get_shafile_path(sha)
  93. if os.path.exists(path):
  94. return ShaFile.from_file(path)
  95. return None
  96. def _add_shafile(self, sha, o):
  97. dir = os.path.join(self.path, sha[:2])
  98. if not os.path.isdir(dir):
  99. os.mkdir(dir)
  100. path = os.path.join(dir, sha[2:])
  101. f = open(path, 'w+')
  102. try:
  103. f.write(o.as_legacy_object())
  104. finally:
  105. f.close()
  106. def get_raw(self, name):
  107. """Obtain the raw text for an object.
  108. :param name: sha for the object.
  109. :return: tuple with object type and object contents.
  110. """
  111. if len(name) == 40:
  112. sha = hex_to_sha(name)
  113. hexsha = name
  114. elif len(name) == 20:
  115. sha = name
  116. hexsha = None
  117. else:
  118. raise AssertionError
  119. for pack in self.packs:
  120. try:
  121. return pack.get_raw(sha)
  122. except KeyError:
  123. pass
  124. if hexsha is None:
  125. hexsha = sha_to_hex(name)
  126. ret = self._get_shafile(hexsha)
  127. if ret is not None:
  128. return ret.type, ret.as_raw_string()
  129. raise KeyError(hexsha)
  130. def __getitem__(self, sha):
  131. type, uncomp = self.get_raw(sha)
  132. return ShaFile.from_raw_string(type, uncomp)
  133. def move_in_thin_pack(self, path):
  134. """Move a specific file containing a pack into the pack directory.
  135. :note: The file should be on the same file system as the
  136. packs directory.
  137. :param path: Path to the pack file.
  138. """
  139. data = PackData(path)
  140. # Write index for the thin pack (do we really need this?)
  141. temppath = os.path.join(self.pack_dir,
  142. sha_to_hex(urllib2.randombytes(20))+".tempidx")
  143. data.create_index_v2(temppath, self.get_raw)
  144. p = Pack.from_objects(data, load_pack_index(temppath))
  145. # Write a full pack version
  146. temppath = os.path.join(self.pack_dir,
  147. sha_to_hex(urllib2.randombytes(20))+".temppack")
  148. write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
  149. len(p))
  150. pack_sha = load_pack_index(temppath+".idx").objects_sha1()
  151. newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
  152. os.rename(temppath+".pack", newbasename+".pack")
  153. os.rename(temppath+".idx", newbasename+".idx")
  154. self._add_known_pack(newbasename)
  155. def move_in_pack(self, path):
  156. """Move a specific file containing a pack into the pack directory.
  157. :note: The file should be on the same file system as the
  158. packs directory.
  159. :param path: Path to the pack file.
  160. """
  161. p = PackData(path)
  162. entries = p.sorted_entries()
  163. basename = os.path.join(self.pack_dir,
  164. "pack-%s" % iter_sha1(entry[0] for entry in entries))
  165. write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
  166. os.rename(path, basename + ".pack")
  167. self._add_known_pack(basename)
  168. def add_thin_pack(self):
  169. """Add a new thin pack to this object store.
  170. Thin packs are packs that contain deltas with parents that exist
  171. in a different pack.
  172. """
  173. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  174. f = os.fdopen(fd, 'w')
  175. def commit():
  176. os.fsync(fd)
  177. f.close()
  178. if os.path.getsize(path) > 0:
  179. self.move_in_thin_pack(path)
  180. return f, commit
  181. def add_pack(self):
  182. """Add a new pack to this object store.
  183. :return: Fileobject to write to and a commit function to
  184. call when the pack is finished.
  185. """
  186. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  187. f = os.fdopen(fd, 'w')
  188. def commit():
  189. os.fsync(fd)
  190. f.close()
  191. if os.path.getsize(path) > 0:
  192. self.move_in_pack(path)
  193. return f, commit
  194. def add_object(self, obj):
  195. self._add_shafile(obj.id, obj)
  196. def add_objects(self, objects):
  197. """Add a set of objects to this object store.
  198. :param objects: Iterable over a list of objects.
  199. """
  200. if len(objects) == 0:
  201. return
  202. f, commit = self.add_pack()
  203. write_pack_data(f, objects, len(objects))
  204. commit()
  205. class ObjectImporter(object):
  206. """Interface for importing objects."""
  207. def __init__(self, count):
  208. """Create a new ObjectImporter.
  209. :param count: Number of objects that's going to be imported.
  210. """
  211. self.count = count
  212. def add_object(self, object):
  213. """Add an object."""
  214. raise NotImplementedError(self.add_object)
  215. def finish(self, object):
  216. """Finish the imoprt and write objects to disk."""
  217. raise NotImplementedError(self.finish)
  218. class ObjectIterator(object):
  219. """Interface for iterating over objects."""
  220. def iterobjects(self):
  221. raise NotImplementedError(self.iterobjects)
  222. class ObjectStoreIterator(ObjectIterator):
  223. """ObjectIterator that works on top of an ObjectStore."""
  224. def __init__(self, store, sha_iter):
  225. self.store = store
  226. self.sha_iter = sha_iter
  227. self._shas = []
  228. def __iter__(self):
  229. for sha, path in self.itershas():
  230. yield self.store[sha], path
  231. def iterobjects(self):
  232. for o, path in self:
  233. yield o
  234. def itershas(self):
  235. for sha in self._shas:
  236. yield sha
  237. for sha in self.sha_iter:
  238. self._shas.append(sha)
  239. yield sha
  240. def __contains__(self, needle):
  241. """Check if an object is present.
  242. :param needle: SHA1 of the object to check for
  243. """
  244. return needle in self.store
  245. def __getitem__(self, key):
  246. """Find an object by SHA1."""
  247. return self.store[key]
  248. def __len__(self):
  249. """Return the number of objects."""
  250. return len(list(self.itershas()))
  251. def tree_lookup_path(lookup_obj, root_sha, path):
  252. parts = path.split("/")
  253. sha = root_sha
  254. for p in parts:
  255. obj = lookup_obj(sha)
  256. if type(obj) is not Tree:
  257. raise NotTreeError(sha)
  258. if p == '':
  259. continue
  260. mode, sha = obj[p]
  261. return lookup_obj(sha)