2
0

object_store.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. # object_store.py -- Object store for git objects
  2. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Git object store interfaces and implementation."""
  19. import itertools
  20. import os
  21. import stat
  22. import tempfile
  23. import urllib2
  24. from dulwich.errors import (
  25. NotTreeError,
  26. )
  27. from dulwich.objects import (
  28. Commit,
  29. ShaFile,
  30. Tag,
  31. Tree,
  32. hex_to_sha,
  33. sha_to_hex,
  34. S_ISGITLINK,
  35. )
  36. from dulwich.pack import (
  37. Pack,
  38. PackData,
  39. iter_sha1,
  40. load_pack_index,
  41. write_pack,
  42. write_pack_data,
  43. write_pack_index_v2,
  44. )
  45. PACKDIR = 'pack'
  46. class BaseObjectStore(object):
  47. """Object store interface."""
  48. def determine_wants_all(self, refs):
  49. return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
  50. def iter_shas(self, shas):
  51. """Iterate over the objects for the specified shas.
  52. :param shas: Iterable object with SHAs
  53. :return: Object iterator
  54. """
  55. return ObjectStoreIterator(self, shas)
  56. def __contains__(self, sha):
  57. """Check if a particular object is present by SHA1."""
  58. raise NotImplementedError(self.__contains__)
  59. def get_raw(self, name):
  60. """Obtain the raw text for an object.
  61. :param name: sha for the object.
  62. :return: tuple with object type and object contents.
  63. """
  64. raise NotImplementedError(self.get_raw)
  65. def __getitem__(self, sha):
  66. """Obtain an object by SHA1."""
  67. type, uncomp = self.get_raw(sha)
  68. return ShaFile.from_raw_string(type, uncomp)
  69. def __iter__(self):
  70. """Iterate over the SHAs that are present in this store."""
  71. raise NotImplementedError(self.__iter__)
  72. def add_object(self, obj):
  73. """Add a single object to this object store.
  74. """
  75. raise NotImplementedError(self.add_object)
  76. def add_objects(self, objects):
  77. """Add a set of objects to this object store.
  78. :param objects: Iterable over a list of objects.
  79. """
  80. raise NotImplementedError(self.add_objects)
  81. def iter_tree_contents(self, tree):
  82. """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
  83. :param tree: SHA1 of the root of the tree
  84. """
  85. todo = set([(tree, "")])
  86. while todo:
  87. (tid, tpath) = todo.pop()
  88. tree = self[tid]
  89. for name, mode, hexsha in tree.iteritems():
  90. if tpath == "":
  91. path = name
  92. else:
  93. path = "%s/%s" % (tpath, name)
  94. if stat.S_ISDIR(mode):
  95. todo.add((hexsha, path))
  96. else:
  97. yield path, mode, hexsha
  98. def find_missing_objects(self, haves, wants, progress=None):
  99. """Find the missing objects required for a set of revisions.
  100. :param haves: Iterable over SHAs already in common.
  101. :param wants: Iterable over SHAs of objects to fetch.
  102. :param progress: Simple progress function that will be called with
  103. updated progress strings.
  104. :return: Iterator over (sha, path) pairs.
  105. """
  106. return iter(MissingObjectFinder(self, haves, wants, progress).next, None)
  107. def find_common_revisions(self, graphwalker):
  108. """Find which revisions this store has in common using graphwalker.
  109. :param graphwalker: A graphwalker object.
  110. :return: List of SHAs that are in common
  111. """
  112. haves = []
  113. sha = graphwalker.next()
  114. while sha:
  115. if sha in self:
  116. haves.append(sha)
  117. graphwalker.ack(sha)
  118. sha = graphwalker.next()
  119. return haves
  120. def get_graph_walker(self, heads):
  121. """Obtain a graph walker for this object store.
  122. :param heads: Local heads to start search with
  123. :return: GraphWalker object
  124. """
  125. return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
  126. def generate_pack_contents(self, have, want):
  127. """Iterate over the contents of a pack file.
  128. :param have: List of SHA1s of objects that should not be sent
  129. :param want: List of SHA1s of objects that should be sent
  130. """
  131. return self.iter_shas(self.find_missing_objects(have, want))
  132. class DiskObjectStore(BaseObjectStore):
  133. """Git-style object store that exists on disk."""
  134. def __init__(self, path):
  135. """Open an object store.
  136. :param path: Path of the object store.
  137. """
  138. self.path = path
  139. self._pack_cache = None
  140. self.pack_dir = os.path.join(self.path, PACKDIR)
  141. def __contains__(self, sha):
  142. """Check if a particular object is present by SHA1."""
  143. for pack in self.packs:
  144. if sha in pack:
  145. return True
  146. ret = self._get_shafile(sha)
  147. if ret is not None:
  148. return True
  149. return False
  150. def __iter__(self):
  151. """Iterate over the SHAs that are present in this store."""
  152. iterables = self.packs + [self._iter_shafile_shas()]
  153. return itertools.chain(*iterables)
  154. @property
  155. def packs(self):
  156. """List with pack objects."""
  157. if self._pack_cache is None:
  158. self._pack_cache = list(self._load_packs())
  159. return self._pack_cache
  160. def _load_packs(self):
  161. if not os.path.exists(self.pack_dir):
  162. return
  163. for name in os.listdir(self.pack_dir):
  164. if name.startswith("pack-") and name.endswith(".pack"):
  165. yield Pack(os.path.join(self.pack_dir, name[:-len(".pack")]))
  166. def _add_known_pack(self, path):
  167. """Add a newly appeared pack to the cache by path.
  168. """
  169. if self._pack_cache is not None:
  170. self._pack_cache.append(Pack(path))
  171. def _get_shafile_path(self, sha):
  172. dir = sha[:2]
  173. file = sha[2:]
  174. # Check from object dir
  175. return os.path.join(self.path, dir, file)
  176. def _iter_shafile_shas(self):
  177. for base in os.listdir(self.path):
  178. if len(base) != 2:
  179. continue
  180. for rest in os.listdir(os.path.join(self.path, base)):
  181. yield base+rest
  182. def _get_shafile(self, sha):
  183. path = self._get_shafile_path(sha)
  184. if os.path.exists(path):
  185. return ShaFile.from_file(path)
  186. return None
  187. def _add_shafile(self, sha, o):
  188. dir = os.path.join(self.path, sha[:2])
  189. if not os.path.isdir(dir):
  190. os.mkdir(dir)
  191. path = os.path.join(dir, sha[2:])
  192. if os.path.exists(path):
  193. return # Already there, no need to write again
  194. f = open(path, 'w+')
  195. try:
  196. f.write(o.as_legacy_object())
  197. finally:
  198. f.close()
  199. def get_raw(self, name):
  200. """Obtain the raw text for an object.
  201. :param name: sha for the object.
  202. :return: tuple with object type and object contents.
  203. """
  204. if len(name) == 40:
  205. sha = hex_to_sha(name)
  206. hexsha = name
  207. elif len(name) == 20:
  208. sha = name
  209. hexsha = None
  210. else:
  211. raise AssertionError
  212. for pack in self.packs:
  213. try:
  214. return pack.get_raw(sha)
  215. except KeyError:
  216. pass
  217. if hexsha is None:
  218. hexsha = sha_to_hex(name)
  219. ret = self._get_shafile(hexsha)
  220. if ret is not None:
  221. return ret.type, ret.as_raw_string()
  222. raise KeyError(hexsha)
  223. def move_in_thin_pack(self, path):
  224. """Move a specific file containing a pack into the pack directory.
  225. :note: The file should be on the same file system as the
  226. packs directory.
  227. :param path: Path to the pack file.
  228. """
  229. data = PackData(path)
  230. # Write index for the thin pack (do we really need this?)
  231. temppath = os.path.join(self.pack_dir,
  232. sha_to_hex(urllib2.randombytes(20))+".tempidx")
  233. data.create_index_v2(temppath, self.get_raw)
  234. p = Pack.from_objects(data, load_pack_index(temppath))
  235. # Write a full pack version
  236. temppath = os.path.join(self.pack_dir,
  237. sha_to_hex(urllib2.randombytes(20))+".temppack")
  238. write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
  239. len(p))
  240. pack_sha = load_pack_index(temppath+".idx").objects_sha1()
  241. newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
  242. os.rename(temppath+".pack", newbasename+".pack")
  243. os.rename(temppath+".idx", newbasename+".idx")
  244. self._add_known_pack(newbasename)
  245. def move_in_pack(self, path):
  246. """Move a specific file containing a pack into the pack directory.
  247. :note: The file should be on the same file system as the
  248. packs directory.
  249. :param path: Path to the pack file.
  250. """
  251. p = PackData(path)
  252. entries = p.sorted_entries()
  253. basename = os.path.join(self.pack_dir,
  254. "pack-%s" % iter_sha1(entry[0] for entry in entries))
  255. write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
  256. p.close()
  257. os.rename(path, basename + ".pack")
  258. self._add_known_pack(basename)
  259. def add_thin_pack(self):
  260. """Add a new thin pack to this object store.
  261. Thin packs are packs that contain deltas with parents that exist
  262. in a different pack.
  263. """
  264. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  265. f = os.fdopen(fd, 'wb')
  266. def commit():
  267. os.fsync(fd)
  268. f.close()
  269. if os.path.getsize(path) > 0:
  270. self.move_in_thin_pack(path)
  271. return f, commit
  272. def add_pack(self):
  273. """Add a new pack to this object store.
  274. :return: Fileobject to write to and a commit function to
  275. call when the pack is finished.
  276. """
  277. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  278. f = os.fdopen(fd, 'wb')
  279. def commit():
  280. os.fsync(fd)
  281. f.close()
  282. if os.path.getsize(path) > 0:
  283. self.move_in_pack(path)
  284. return f, commit
  285. def add_object(self, obj):
  286. """Add a single object to this object store.
  287. :param obj: Object to add
  288. """
  289. self._add_shafile(obj.id, obj)
  290. def add_objects(self, objects):
  291. """Add a set of objects to this object store.
  292. :param objects: Iterable over objects, should support __len__.
  293. """
  294. if len(objects) == 0:
  295. # Don't bother writing an empty pack file
  296. return
  297. f, commit = self.add_pack()
  298. write_pack_data(f, objects, len(objects))
  299. commit()
  300. class MemoryObjectStore(BaseObjectStore):
  301. """Object store that keeps all objects in memory."""
  302. def __init__(self):
  303. super(MemoryObjectStore, self).__init__()
  304. self._data = {}
  305. def __contains__(self, sha):
  306. """Check if the object with a particular SHA is present."""
  307. return sha in self._data
  308. def __iter__(self):
  309. """Iterate over the SHAs that are present in this store."""
  310. return self._data.iterkeys()
  311. def get_raw(self, name):
  312. """Obtain the raw text for an object.
  313. :param name: sha for the object.
  314. :return: tuple with object type and object contents.
  315. """
  316. return self[name].as_raw_string()
  317. def __getitem__(self, name):
  318. return self._data[name]
  319. def add_object(self, obj):
  320. """Add a single object to this object store.
  321. """
  322. self._data[obj.id] = obj
  323. def add_objects(self, objects):
  324. """Add a set of objects to this object store.
  325. :param objects: Iterable over a list of objects.
  326. """
  327. for obj, path in objects:
  328. self._data[obj.id] = obj
  329. class ObjectImporter(object):
  330. """Interface for importing objects."""
  331. def __init__(self, count):
  332. """Create a new ObjectImporter.
  333. :param count: Number of objects that's going to be imported.
  334. """
  335. self.count = count
  336. def add_object(self, object):
  337. """Add an object."""
  338. raise NotImplementedError(self.add_object)
  339. def finish(self, object):
  340. """Finish the imoprt and write objects to disk."""
  341. raise NotImplementedError(self.finish)
  342. class ObjectIterator(object):
  343. """Interface for iterating over objects."""
  344. def iterobjects(self):
  345. raise NotImplementedError(self.iterobjects)
  346. class ObjectStoreIterator(ObjectIterator):
  347. """ObjectIterator that works on top of an ObjectStore."""
  348. def __init__(self, store, sha_iter):
  349. """Create a new ObjectIterator.
  350. :param store: Object store to retrieve from
  351. :param sha_iter: Iterator over (sha, path) tuples
  352. """
  353. self.store = store
  354. self.sha_iter = sha_iter
  355. self._shas = []
  356. def __iter__(self):
  357. """Yield tuple with next object and path."""
  358. for sha, path in self.itershas():
  359. yield self.store[sha], path
  360. def iterobjects(self):
  361. """Iterate over just the objects."""
  362. for o, path in self:
  363. yield o
  364. def itershas(self):
  365. """Iterate over the SHAs."""
  366. for sha in self._shas:
  367. yield sha
  368. for sha in self.sha_iter:
  369. self._shas.append(sha)
  370. yield sha
  371. def __contains__(self, needle):
  372. """Check if an object is present.
  373. :note: This checks if the object is present in
  374. the underlying object store, not if it would
  375. be yielded by the iterator.
  376. :param needle: SHA1 of the object to check for
  377. """
  378. return needle in self.store
  379. def __getitem__(self, key):
  380. """Find an object by SHA1.
  381. :note: This retrieves the object from the underlying
  382. object store. It will also succeed if the object would
  383. not be returned by the iterator.
  384. """
  385. return self.store[key]
  386. def __len__(self):
  387. """Return the number of objects."""
  388. return len(list(self.itershas()))
  389. def tree_lookup_path(lookup_obj, root_sha, path):
  390. """Lookup an object in a Git tree.
  391. :param lookup_obj: Callback for retrieving object by SHA1
  392. :param root_sha: SHA1 of the root tree
  393. :param path: Path to lookup
  394. """
  395. parts = path.split("/")
  396. sha = root_sha
  397. for p in parts:
  398. obj = lookup_obj(sha)
  399. if type(obj) is not Tree:
  400. raise NotTreeError(sha)
  401. if p == '':
  402. continue
  403. mode, sha = obj[p]
  404. return mode, sha
  405. class MissingObjectFinder(object):
  406. """Find the objects missing from another object store.
  407. :param object_store: Object store containing at least all objects to be
  408. sent
  409. :param haves: SHA1s of commits not to send (already present in target)
  410. :param wants: SHA1s of commits to send
  411. :param progress: Optional function to report progress to.
  412. """
  413. def __init__(self, object_store, haves, wants, progress=None):
  414. self.sha_done = set(haves)
  415. self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
  416. self.object_store = object_store
  417. if progress is None:
  418. self.progress = lambda x: None
  419. else:
  420. self.progress = progress
  421. def add_todo(self, entries):
  422. self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
  423. def parse_tree(self, tree):
  424. self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries() if not S_ISGITLINK(mode)])
  425. def parse_commit(self, commit):
  426. self.add_todo([(commit.tree, "", False)])
  427. self.add_todo([(p, None, False) for p in commit.parents])
  428. def parse_tag(self, tag):
  429. self.add_todo([(tag.object[1], None, False)])
  430. def next(self):
  431. if not self.objects_to_send:
  432. return None
  433. (sha, name, leaf) = self.objects_to_send.pop()
  434. if not leaf:
  435. o = self.object_store[sha]
  436. if isinstance(o, Commit):
  437. self.parse_commit(o)
  438. elif isinstance(o, Tree):
  439. self.parse_tree(o)
  440. elif isinstance(o, Tag):
  441. self.parse_tag(o)
  442. self.sha_done.add(sha)
  443. self.progress("counting objects: %d\r" % len(self.sha_done))
  444. return (sha, name)
  445. class ObjectStoreGraphWalker(object):
  446. """Graph walker that finds out what commits are missing from an object store."""
  447. def __init__(self, local_heads, get_parents):
  448. """Create a new instance.
  449. :param local_heads: Heads to start search with
  450. :param get_parents: Function for finding the parents of a SHA1.
  451. """
  452. self.heads = set(local_heads)
  453. self.get_parents = get_parents
  454. self.parents = {}
  455. def ack(self, sha):
  456. """Ack that a particular revision and its ancestors are present in the source."""
  457. if sha in self.heads:
  458. self.heads.remove(sha)
  459. if sha in self.parents:
  460. for p in self.parents[sha]:
  461. self.ack(p)
  462. def next(self):
  463. """Iterate over ancestors of heads in the target."""
  464. if self.heads:
  465. ret = self.heads.pop()
  466. ps = self.get_parents(ret)
  467. self.parents[ret] = ps
  468. self.heads.update(ps)
  469. return ret
  470. return None