2
0

object_store.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. # object_store.py -- Object store for git objects
  2. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Git object store interfaces and implementation."""
  19. import itertools
  20. import os
  21. import stat
  22. import tempfile
  23. import urllib2
  24. from dulwich.errors import (
  25. NotTreeError,
  26. )
  27. from dulwich.objects import (
  28. Commit,
  29. ShaFile,
  30. Tag,
  31. Tree,
  32. hex_to_sha,
  33. sha_to_hex,
  34. )
  35. from dulwich.pack import (
  36. Pack,
  37. PackData,
  38. iter_sha1,
  39. load_packs,
  40. load_pack_index,
  41. write_pack,
  42. write_pack_data,
  43. write_pack_index_v2,
  44. )
  45. PACKDIR = 'pack'
  46. class BaseObjectStore(object):
  47. """Object store interface."""
  48. def determine_wants_all(self, refs):
  49. return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
  50. def iter_shas(self, shas):
  51. """Iterate over the objects for the specified shas.
  52. :param shas: Iterable object with SHAs
  53. """
  54. return ObjectStoreIterator(self, shas)
  55. def __contains__(self, sha):
  56. """Check if a particular object is present by SHA1."""
  57. raise NotImplementedError(self.__contains__)
  58. def get_raw(self, name):
  59. """Obtain the raw text for an object.
  60. :param name: sha for the object.
  61. :return: tuple with object type and object contents.
  62. """
  63. raise NotImplementedError(self.get_raw)
  64. def __getitem__(self, sha):
  65. """Obtain an object by SHA1."""
  66. type, uncomp = self.get_raw(sha)
  67. return ShaFile.from_raw_string(type, uncomp)
  68. def __iter__(self):
  69. """Iterate over the SHAs that are present in this store."""
  70. raise NotImplementedError(self.__iter__)
  71. def add_object(self, obj):
  72. """Add a single object to this object store.
  73. """
  74. raise NotImplementedError(self.add_object)
  75. def add_objects(self, objects):
  76. """Add a set of objects to this object store.
  77. :param objects: Iterable over a list of objects.
  78. """
  79. raise NotImplementedError(self.add_objects)
  80. def find_missing_objects(self, haves, wants, progress=None):
  81. """Find the missing objects required for a set of revisions.
  82. :param haves: Iterable over SHAs already in common.
  83. :param wants: Iterable over SHAs of objects to fetch.
  84. :param progress: Simple progress function that will be called with
  85. updated progress strings.
  86. :return: Iterator over (sha, path) pairs.
  87. """
  88. return iter(MissingObjectFinder(self, haves, wants, progress).next, None)
  89. def get_graph_walker(self, heads):
  90. """Obtain a graph walker for this object store.
  91. :param heads: Local heads to start search with
  92. :return: GraphWalker object
  93. """
  94. return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
  95. class DiskObjectStore(BaseObjectStore):
  96. """Git-style object store that exists on disk."""
  97. def __init__(self, path):
  98. """Open an object store.
  99. :param path: Path of the object store.
  100. """
  101. self.path = path
  102. self._pack_cache = None
  103. self.pack_dir = os.path.join(self.path, PACKDIR)
  104. def __contains__(self, sha):
  105. """Check if a particular object is present by SHA1."""
  106. for pack in self.packs:
  107. if sha in pack:
  108. return True
  109. ret = self._get_shafile(sha)
  110. if ret is not None:
  111. return True
  112. return False
  113. def __iter__(self):
  114. """Iterate over the SHAs that are present in this store."""
  115. iterables = self.packs + [self._iter_shafile_shas()]
  116. return itertools.chain(*iterables)
  117. @property
  118. def packs(self):
  119. """List with pack objects."""
  120. if self._pack_cache is None:
  121. self._pack_cache = list(load_packs(self.pack_dir))
  122. return self._pack_cache
  123. def _add_known_pack(self, path):
  124. """Add a newly appeared pack to the cache by path.
  125. """
  126. if self._pack_cache is not None:
  127. self._pack_cache.append(Pack(path))
  128. def _get_shafile_path(self, sha):
  129. dir = sha[:2]
  130. file = sha[2:]
  131. # Check from object dir
  132. return os.path.join(self.path, dir, file)
  133. def _iter_shafile_shas(self):
  134. for base in os.listdir(self.path):
  135. if len(base) != 2:
  136. continue
  137. for rest in os.listdir(os.path.join(self.path, base)):
  138. yield base+rest
  139. def _get_shafile(self, sha):
  140. path = self._get_shafile_path(sha)
  141. if os.path.exists(path):
  142. return ShaFile.from_file(path)
  143. return None
  144. def _add_shafile(self, sha, o):
  145. dir = os.path.join(self.path, sha[:2])
  146. if not os.path.isdir(dir):
  147. os.mkdir(dir)
  148. path = os.path.join(dir, sha[2:])
  149. f = open(path, 'w+')
  150. try:
  151. f.write(o.as_legacy_object())
  152. finally:
  153. f.close()
  154. def get_raw(self, name):
  155. """Obtain the raw text for an object.
  156. :param name: sha for the object.
  157. :return: tuple with object type and object contents.
  158. """
  159. if len(name) == 40:
  160. sha = hex_to_sha(name)
  161. hexsha = name
  162. elif len(name) == 20:
  163. sha = name
  164. hexsha = None
  165. else:
  166. raise AssertionError
  167. for pack in self.packs:
  168. try:
  169. return pack.get_raw(sha)
  170. except KeyError:
  171. pass
  172. if hexsha is None:
  173. hexsha = sha_to_hex(name)
  174. ret = self._get_shafile(hexsha)
  175. if ret is not None:
  176. return ret.type, ret.as_raw_string()
  177. raise KeyError(hexsha)
  178. def move_in_thin_pack(self, path):
  179. """Move a specific file containing a pack into the pack directory.
  180. :note: The file should be on the same file system as the
  181. packs directory.
  182. :param path: Path to the pack file.
  183. """
  184. data = PackData(path)
  185. # Write index for the thin pack (do we really need this?)
  186. temppath = os.path.join(self.pack_dir,
  187. sha_to_hex(urllib2.randombytes(20))+".tempidx")
  188. data.create_index_v2(temppath, self.get_raw)
  189. p = Pack.from_objects(data, load_pack_index(temppath))
  190. # Write a full pack version
  191. temppath = os.path.join(self.pack_dir,
  192. sha_to_hex(urllib2.randombytes(20))+".temppack")
  193. write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
  194. len(p))
  195. pack_sha = load_pack_index(temppath+".idx").objects_sha1()
  196. newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
  197. os.rename(temppath+".pack", newbasename+".pack")
  198. os.rename(temppath+".idx", newbasename+".idx")
  199. self._add_known_pack(newbasename)
  200. def move_in_pack(self, path):
  201. """Move a specific file containing a pack into the pack directory.
  202. :note: The file should be on the same file system as the
  203. packs directory.
  204. :param path: Path to the pack file.
  205. """
  206. p = PackData(path)
  207. entries = p.sorted_entries()
  208. basename = os.path.join(self.pack_dir,
  209. "pack-%s" % iter_sha1(entry[0] for entry in entries))
  210. write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
  211. os.rename(path, basename + ".pack")
  212. self._add_known_pack(basename)
  213. def add_thin_pack(self):
  214. """Add a new thin pack to this object store.
  215. Thin packs are packs that contain deltas with parents that exist
  216. in a different pack.
  217. """
  218. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  219. f = os.fdopen(fd, 'w')
  220. def commit():
  221. os.fsync(fd)
  222. f.close()
  223. if os.path.getsize(path) > 0:
  224. self.move_in_thin_pack(path)
  225. return f, commit
  226. def add_pack(self):
  227. """Add a new pack to this object store.
  228. :return: Fileobject to write to and a commit function to
  229. call when the pack is finished.
  230. """
  231. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  232. f = os.fdopen(fd, 'w')
  233. def commit():
  234. os.fsync(fd)
  235. f.close()
  236. if os.path.getsize(path) > 0:
  237. self.move_in_pack(path)
  238. return f, commit
  239. def add_object(self, obj):
  240. """Add a single object to this object store.
  241. """
  242. self._add_shafile(obj.id, obj)
  243. def add_objects(self, objects):
  244. """Add a set of objects to this object store.
  245. :param objects: Iterable over a list of objects.
  246. """
  247. if len(objects) == 0:
  248. return
  249. f, commit = self.add_pack()
  250. write_pack_data(f, objects, len(objects))
  251. commit()
  252. class MemoryObjectStore(BaseObjectStore):
  253. def __init__(self):
  254. super(MemoryObjectStore, self).__init__()
  255. self._data = {}
  256. def __contains__(self, sha):
  257. return sha in self._data
  258. def __iter__(self):
  259. """Iterate over the SHAs that are present in this store."""
  260. return self._data.iterkeys()
  261. def get_raw(self, name):
  262. """Obtain the raw text for an object.
  263. :param name: sha for the object.
  264. :return: tuple with object type and object contents.
  265. """
  266. return self[name].as_raw_string()
  267. def __getitem__(self, name):
  268. return self._data[name]
  269. def add_object(self, obj):
  270. """Add a single object to this object store.
  271. """
  272. self._data[obj.id] = obj
  273. def add_objects(self, objects):
  274. """Add a set of objects to this object store.
  275. :param objects: Iterable over a list of objects.
  276. """
  277. for obj in objects:
  278. self._data[obj.id] = obj
  279. class ObjectImporter(object):
  280. """Interface for importing objects."""
  281. def __init__(self, count):
  282. """Create a new ObjectImporter.
  283. :param count: Number of objects that's going to be imported.
  284. """
  285. self.count = count
  286. def add_object(self, object):
  287. """Add an object."""
  288. raise NotImplementedError(self.add_object)
  289. def finish(self, object):
  290. """Finish the imoprt and write objects to disk."""
  291. raise NotImplementedError(self.finish)
  292. class ObjectIterator(object):
  293. """Interface for iterating over objects."""
  294. def iterobjects(self):
  295. raise NotImplementedError(self.iterobjects)
  296. class ObjectStoreIterator(ObjectIterator):
  297. """ObjectIterator that works on top of an ObjectStore."""
  298. def __init__(self, store, sha_iter):
  299. self.store = store
  300. self.sha_iter = sha_iter
  301. self._shas = []
  302. def __iter__(self):
  303. for sha, path in self.itershas():
  304. yield self.store[sha], path
  305. def iterobjects(self):
  306. for o, path in self:
  307. yield o
  308. def itershas(self):
  309. for sha in self._shas:
  310. yield sha
  311. for sha in self.sha_iter:
  312. self._shas.append(sha)
  313. yield sha
  314. def __contains__(self, needle):
  315. """Check if an object is present.
  316. :param needle: SHA1 of the object to check for
  317. """
  318. return needle in self.store
  319. def __getitem__(self, key):
  320. """Find an object by SHA1."""
  321. return self.store[key]
  322. def __len__(self):
  323. """Return the number of objects."""
  324. return len(list(self.itershas()))
  325. def tree_lookup_path(lookup_obj, root_sha, path):
  326. """Lookup an object in a Git tree.
  327. :param lookup_obj: Callback for retrieving object by SHA1
  328. :param root_sha: SHA1 of the root tree
  329. :param path: Path to lookup
  330. """
  331. parts = path.split("/")
  332. sha = root_sha
  333. for p in parts:
  334. obj = lookup_obj(sha)
  335. if type(obj) is not Tree:
  336. raise NotTreeError(sha)
  337. if p == '':
  338. continue
  339. mode, sha = obj[p]
  340. return lookup_obj(sha)
  341. class MissingObjectFinder(object):
  342. """Find the objects missing from another object store.
  343. :param object_store: Object store containing at least all objects to be
  344. sent
  345. :param haves: SHA1s of commits not to send (already present in target)
  346. :param wants: SHA1s of commits to send
  347. :param progress: Optional function to report progress to.
  348. """
  349. def __init__(self, object_store, haves, wants, progress=None):
  350. self.sha_done = set(haves)
  351. self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
  352. self.object_store = object_store
  353. if progress is None:
  354. self.progress = lambda x: None
  355. else:
  356. self.progress = progress
  357. def add_todo(self, entries):
  358. self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
  359. def parse_tree(self, tree):
  360. self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries()])
  361. def parse_commit(self, commit):
  362. self.add_todo([(commit.tree, "", False)])
  363. self.add_todo([(p, None, False) for p in commit.parents])
  364. def parse_tag(self, tag):
  365. self.add_todo([(tag.object[1], None, False)])
  366. def next(self):
  367. if not self.objects_to_send:
  368. return None
  369. (sha, name, leaf) = self.objects_to_send.pop()
  370. if not leaf:
  371. o = self.object_store[sha]
  372. if isinstance(o, Commit):
  373. self.parse_commit(o)
  374. elif isinstance(o, Tree):
  375. self.parse_tree(o)
  376. elif isinstance(o, Tag):
  377. self.parse_tag(o)
  378. self.sha_done.add(sha)
  379. self.progress("counting objects: %d\r" % len(self.sha_done))
  380. return (sha, name)
  381. class ObjectStoreGraphWalker(object):
  382. """Graph walker that finds out what commits are missing from an object store."""
  383. def __init__(self, local_heads, get_parents):
  384. """Create a new instance.
  385. :param local_heads: Heads to start search with
  386. :param get_parents: Function for finding the parents of a SHA1.
  387. """
  388. self.heads = set(local_heads)
  389. self.get_parents = get_parents
  390. self.parents = {}
  391. def ack(self, sha):
  392. """Ack that a particular revision and its ancestors are present in the source."""
  393. if sha in self.heads:
  394. self.heads.remove(sha)
  395. if sha in self.parents:
  396. for p in self.parents[sha]:
  397. self.ack(p)
  398. def next(self):
  399. """Iterate over ancestors of heads in the target."""
  400. if self.heads:
  401. ret = self.heads.pop()
  402. ps = self.get_parents(ret)
  403. self.parents[ret] = ps
  404. self.heads.update(ps)
  405. return ret
  406. return None