object_store.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563
  1. # object_store.py -- Object store for git objects
  2. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Git object store interfaces and implementation."""
  19. import itertools
  20. import os
  21. import stat
  22. import tempfile
  23. import urllib2
  24. from dulwich.errors import (
  25. NotTreeError,
  26. )
  27. from dulwich.objects import (
  28. Commit,
  29. ShaFile,
  30. Tag,
  31. Tree,
  32. hex_to_sha,
  33. sha_to_hex,
  34. )
  35. from dulwich.pack import (
  36. Pack,
  37. PackData,
  38. iter_sha1,
  39. load_pack_index,
  40. write_pack,
  41. write_pack_data,
  42. write_pack_index_v2,
  43. )
  44. PACKDIR = 'pack'
  45. class BaseObjectStore(object):
  46. """Object store interface."""
  47. def determine_wants_all(self, refs):
  48. return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
  49. def iter_shas(self, shas):
  50. """Iterate over the objects for the specified shas.
  51. :param shas: Iterable object with SHAs
  52. :return: Object iterator
  53. """
  54. return ObjectStoreIterator(self, shas)
  55. def __contains__(self, sha):
  56. """Check if a particular object is present by SHA1."""
  57. raise NotImplementedError(self.__contains__)
  58. def get_raw(self, name):
  59. """Obtain the raw text for an object.
  60. :param name: sha for the object.
  61. :return: tuple with object type and object contents.
  62. """
  63. raise NotImplementedError(self.get_raw)
  64. def __getitem__(self, sha):
  65. """Obtain an object by SHA1."""
  66. type, uncomp = self.get_raw(sha)
  67. return ShaFile.from_raw_string(type, uncomp)
  68. def __iter__(self):
  69. """Iterate over the SHAs that are present in this store."""
  70. raise NotImplementedError(self.__iter__)
  71. def add_object(self, obj):
  72. """Add a single object to this object store.
  73. """
  74. raise NotImplementedError(self.add_object)
  75. def add_objects(self, objects):
  76. """Add a set of objects to this object store.
  77. :param objects: Iterable over a list of objects.
  78. """
  79. raise NotImplementedError(self.add_objects)
  80. def find_missing_objects(self, haves, wants, progress=None):
  81. """Find the missing objects required for a set of revisions.
  82. :param haves: Iterable over SHAs already in common.
  83. :param wants: Iterable over SHAs of objects to fetch.
  84. :param progress: Simple progress function that will be called with
  85. updated progress strings.
  86. :return: Iterator over (sha, path) pairs.
  87. """
  88. return iter(MissingObjectFinder(self, haves, wants, progress).next, None)
  89. def find_common_revisions(self, graphwalker):
  90. """Find which revisions this store has in common using graphwalker.
  91. :param graphwalker: A graphwalker object.
  92. :return: List of SHAs that are in common
  93. """
  94. haves = []
  95. sha = graphwalker.next()
  96. while sha:
  97. if sha in self:
  98. haves.append(sha)
  99. graphwalker.ack(sha)
  100. sha = graphwalker.next()
  101. return haves
  102. def get_graph_walker(self, heads):
  103. """Obtain a graph walker for this object store.
  104. :param heads: Local heads to start search with
  105. :return: GraphWalker object
  106. """
  107. return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
  108. def generate_pack_contents(self, have, want):
  109. """Iterate over the contents of a pack file.
  110. :param have: List of SHA1s of objects that should not be sent
  111. :param want: List of SHA1s of objects that should be sent
  112. """
  113. return self.iter_shas(self.find_missing_objects(have, want))
  114. class DiskObjectStore(BaseObjectStore):
  115. """Git-style object store that exists on disk."""
  116. def __init__(self, path):
  117. """Open an object store.
  118. :param path: Path of the object store.
  119. """
  120. self.path = path
  121. self._pack_cache = None
  122. self.pack_dir = os.path.join(self.path, PACKDIR)
  123. def __contains__(self, sha):
  124. """Check if a particular object is present by SHA1."""
  125. for pack in self.packs:
  126. if sha in pack:
  127. return True
  128. ret = self._get_shafile(sha)
  129. if ret is not None:
  130. return True
  131. return False
  132. def __iter__(self):
  133. """Iterate over the SHAs that are present in this store."""
  134. iterables = self.packs + [self._iter_shafile_shas()]
  135. return itertools.chain(*iterables)
  136. @property
  137. def packs(self):
  138. """List with pack objects."""
  139. if self._pack_cache is None:
  140. self._pack_cache = list(self._load_packs())
  141. return self._pack_cache
  142. def _load_packs(self):
  143. if not os.path.exists(self.pack_dir):
  144. return
  145. for name in os.listdir(self.pack_dir):
  146. if name.startswith("pack-") and name.endswith(".pack"):
  147. yield Pack(os.path.join(self.pack_dir, name[:-len(".pack")]))
  148. def _add_known_pack(self, path):
  149. """Add a newly appeared pack to the cache by path.
  150. """
  151. if self._pack_cache is not None:
  152. self._pack_cache.append(Pack(path))
  153. def _get_shafile_path(self, sha):
  154. dir = sha[:2]
  155. file = sha[2:]
  156. # Check from object dir
  157. return os.path.join(self.path, dir, file)
  158. def _iter_shafile_shas(self):
  159. for base in os.listdir(self.path):
  160. if len(base) != 2:
  161. continue
  162. for rest in os.listdir(os.path.join(self.path, base)):
  163. yield base+rest
  164. def _get_shafile(self, sha):
  165. path = self._get_shafile_path(sha)
  166. if os.path.exists(path):
  167. return ShaFile.from_file(path)
  168. return None
  169. def _add_shafile(self, sha, o):
  170. dir = os.path.join(self.path, sha[:2])
  171. if not os.path.isdir(dir):
  172. os.mkdir(dir)
  173. path = os.path.join(dir, sha[2:])
  174. if os.path.exists(path):
  175. return # Already there, no need to write again
  176. f = open(path, 'w+')
  177. try:
  178. f.write(o.as_legacy_object())
  179. finally:
  180. f.close()
  181. def get_raw(self, name):
  182. """Obtain the raw text for an object.
  183. :param name: sha for the object.
  184. :return: tuple with object type and object contents.
  185. """
  186. if len(name) == 40:
  187. sha = hex_to_sha(name)
  188. hexsha = name
  189. elif len(name) == 20:
  190. sha = name
  191. hexsha = None
  192. else:
  193. raise AssertionError
  194. for pack in self.packs:
  195. try:
  196. return pack.get_raw(sha)
  197. except KeyError:
  198. pass
  199. if hexsha is None:
  200. hexsha = sha_to_hex(name)
  201. ret = self._get_shafile(hexsha)
  202. if ret is not None:
  203. return ret.type, ret.as_raw_string()
  204. raise KeyError(hexsha)
  205. def move_in_thin_pack(self, path):
  206. """Move a specific file containing a pack into the pack directory.
  207. :note: The file should be on the same file system as the
  208. packs directory.
  209. :param path: Path to the pack file.
  210. """
  211. data = PackData(path)
  212. # Write index for the thin pack (do we really need this?)
  213. temppath = os.path.join(self.pack_dir,
  214. sha_to_hex(urllib2.randombytes(20))+".tempidx")
  215. data.create_index_v2(temppath, self.get_raw)
  216. p = Pack.from_objects(data, load_pack_index(temppath))
  217. # Write a full pack version
  218. temppath = os.path.join(self.pack_dir,
  219. sha_to_hex(urllib2.randombytes(20))+".temppack")
  220. write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
  221. len(p))
  222. pack_sha = load_pack_index(temppath+".idx").objects_sha1()
  223. newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
  224. os.rename(temppath+".pack", newbasename+".pack")
  225. os.rename(temppath+".idx", newbasename+".idx")
  226. self._add_known_pack(newbasename)
  227. def move_in_pack(self, path):
  228. """Move a specific file containing a pack into the pack directory.
  229. :note: The file should be on the same file system as the
  230. packs directory.
  231. :param path: Path to the pack file.
  232. """
  233. p = PackData(path)
  234. entries = p.sorted_entries()
  235. basename = os.path.join(self.pack_dir,
  236. "pack-%s" % iter_sha1(entry[0] for entry in entries))
  237. write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
  238. os.rename(path, basename + ".pack")
  239. self._add_known_pack(basename)
  240. def add_thin_pack(self):
  241. """Add a new thin pack to this object store.
  242. Thin packs are packs that contain deltas with parents that exist
  243. in a different pack.
  244. """
  245. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  246. f = os.fdopen(fd, 'wb')
  247. def commit():
  248. os.fsync(fd)
  249. f.close()
  250. if os.path.getsize(path) > 0:
  251. self.move_in_thin_pack(path)
  252. return f, commit
  253. def add_pack(self):
  254. """Add a new pack to this object store.
  255. :return: Fileobject to write to and a commit function to
  256. call when the pack is finished.
  257. """
  258. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  259. f = os.fdopen(fd, 'wb')
  260. def commit():
  261. os.fsync(fd)
  262. f.close()
  263. if os.path.getsize(path) > 0:
  264. self.move_in_pack(path)
  265. return f, commit
  266. def add_object(self, obj):
  267. """Add a single object to this object store.
  268. :param obj: Object to add
  269. """
  270. self._add_shafile(obj.id, obj)
  271. def add_objects(self, objects):
  272. """Add a set of objects to this object store.
  273. :param objects: Iterable over objects, should support __len__.
  274. """
  275. if len(objects) == 0:
  276. # Don't bother writing an empty pack file
  277. return
  278. f, commit = self.add_pack()
  279. write_pack_data(f, objects, len(objects))
  280. commit()
  281. class MemoryObjectStore(BaseObjectStore):
  282. """Object store that keeps all objects in memory."""
  283. def __init__(self):
  284. super(MemoryObjectStore, self).__init__()
  285. self._data = {}
  286. def __contains__(self, sha):
  287. """Check if the object with a particular SHA is present."""
  288. return sha in self._data
  289. def __iter__(self):
  290. """Iterate over the SHAs that are present in this store."""
  291. return self._data.iterkeys()
  292. def get_raw(self, name):
  293. """Obtain the raw text for an object.
  294. :param name: sha for the object.
  295. :return: tuple with object type and object contents.
  296. """
  297. return self[name].as_raw_string()
  298. def __getitem__(self, name):
  299. return self._data[name]
  300. def add_object(self, obj):
  301. """Add a single object to this object store.
  302. """
  303. self._data[obj.id] = obj
  304. def add_objects(self, objects):
  305. """Add a set of objects to this object store.
  306. :param objects: Iterable over a list of objects.
  307. """
  308. for obj, path in objects:
  309. self._data[obj.id] = obj
  310. class ObjectImporter(object):
  311. """Interface for importing objects."""
  312. def __init__(self, count):
  313. """Create a new ObjectImporter.
  314. :param count: Number of objects that's going to be imported.
  315. """
  316. self.count = count
  317. def add_object(self, object):
  318. """Add an object."""
  319. raise NotImplementedError(self.add_object)
  320. def finish(self, object):
  321. """Finish the imoprt and write objects to disk."""
  322. raise NotImplementedError(self.finish)
  323. class ObjectIterator(object):
  324. """Interface for iterating over objects."""
  325. def iterobjects(self):
  326. raise NotImplementedError(self.iterobjects)
  327. class ObjectStoreIterator(ObjectIterator):
  328. """ObjectIterator that works on top of an ObjectStore."""
  329. def __init__(self, store, sha_iter):
  330. """Create a new ObjectIterator.
  331. :param store: Object store to retrieve from
  332. :param sha_iter: Iterator over (sha, path) tuples
  333. """
  334. self.store = store
  335. self.sha_iter = sha_iter
  336. self._shas = []
  337. def __iter__(self):
  338. """Yield tuple with next object and path."""
  339. for sha, path in self.itershas():
  340. yield self.store[sha], path
  341. def iterobjects(self):
  342. """Iterate over just the objects."""
  343. for o, path in self:
  344. yield o
  345. def itershas(self):
  346. """Iterate over the SHAs."""
  347. for sha in self._shas:
  348. yield sha
  349. for sha in self.sha_iter:
  350. self._shas.append(sha)
  351. yield sha
  352. def __contains__(self, needle):
  353. """Check if an object is present.
  354. :note: This checks if the object is present in
  355. the underlying object store, not if it would
  356. be yielded by the iterator.
  357. :param needle: SHA1 of the object to check for
  358. """
  359. return needle in self.store
  360. def __getitem__(self, key):
  361. """Find an object by SHA1.
  362. :note: This retrieves the object from the underlying
  363. object store. It will also succeed if the object would
  364. not be returned by the iterator.
  365. """
  366. return self.store[key]
  367. def __len__(self):
  368. """Return the number of objects."""
  369. return len(list(self.itershas()))
  370. def tree_lookup_path(lookup_obj, root_sha, path):
  371. """Lookup an object in a Git tree.
  372. :param lookup_obj: Callback for retrieving object by SHA1
  373. :param root_sha: SHA1 of the root tree
  374. :param path: Path to lookup
  375. """
  376. parts = path.split("/")
  377. sha = root_sha
  378. for p in parts:
  379. obj = lookup_obj(sha)
  380. if type(obj) is not Tree:
  381. raise NotTreeError(sha)
  382. if p == '':
  383. continue
  384. mode, sha = obj[p]
  385. return lookup_obj(sha)
  386. class MissingObjectFinder(object):
  387. """Find the objects missing from another object store.
  388. :param object_store: Object store containing at least all objects to be
  389. sent
  390. :param haves: SHA1s of commits not to send (already present in target)
  391. :param wants: SHA1s of commits to send
  392. :param progress: Optional function to report progress to.
  393. """
  394. def __init__(self, object_store, haves, wants, progress=None):
  395. self.sha_done = set(haves)
  396. self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
  397. self.object_store = object_store
  398. if progress is None:
  399. self.progress = lambda x: None
  400. else:
  401. self.progress = progress
  402. def add_todo(self, entries):
  403. self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
  404. def parse_tree(self, tree):
  405. self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries()])
  406. def parse_commit(self, commit):
  407. self.add_todo([(commit.tree, "", False)])
  408. self.add_todo([(p, None, False) for p in commit.parents])
  409. def parse_tag(self, tag):
  410. self.add_todo([(tag.object[1], None, False)])
  411. def next(self):
  412. if not self.objects_to_send:
  413. return None
  414. (sha, name, leaf) = self.objects_to_send.pop()
  415. if not leaf:
  416. o = self.object_store[sha]
  417. if isinstance(o, Commit):
  418. self.parse_commit(o)
  419. elif isinstance(o, Tree):
  420. self.parse_tree(o)
  421. elif isinstance(o, Tag):
  422. self.parse_tag(o)
  423. self.sha_done.add(sha)
  424. self.progress("counting objects: %d\r" % len(self.sha_done))
  425. return (sha, name)
  426. class ObjectStoreGraphWalker(object):
  427. """Graph walker that finds out what commits are missing from an object store."""
  428. def __init__(self, local_heads, get_parents):
  429. """Create a new instance.
  430. :param local_heads: Heads to start search with
  431. :param get_parents: Function for finding the parents of a SHA1.
  432. """
  433. self.heads = set(local_heads)
  434. self.get_parents = get_parents
  435. self.parents = {}
  436. def ack(self, sha):
  437. """Ack that a particular revision and its ancestors are present in the source."""
  438. if sha in self.heads:
  439. self.heads.remove(sha)
  440. if sha in self.parents:
  441. for p in self.parents[sha]:
  442. self.ack(p)
  443. def next(self):
  444. """Iterate over ancestors of heads in the target."""
  445. if self.heads:
  446. ret = self.heads.pop()
  447. ps = self.get_parents(ret)
  448. self.parents[ret] = ps
  449. self.heads.update(ps)
  450. return ret
  451. return None