object_store.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722
  1. # object_store.py -- Object store for git objects
  2. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Git object store interfaces and implementation."""
  19. import errno
  20. import itertools
  21. import os
  22. import stat
  23. import tempfile
  24. import urllib2
  25. from dulwich.errors import (
  26. NotTreeError,
  27. )
  28. from dulwich.file import GitFile
  29. from dulwich.objects import (
  30. Commit,
  31. ShaFile,
  32. Tag,
  33. Tree,
  34. hex_to_sha,
  35. sha_to_hex,
  36. S_ISGITLINK,
  37. )
  38. from dulwich.pack import (
  39. Pack,
  40. PackData,
  41. iter_sha1,
  42. load_pack_index,
  43. write_pack,
  44. write_pack_data,
  45. write_pack_index_v2,
  46. )
  47. PACKDIR = 'pack'
  48. class BaseObjectStore(object):
  49. """Object store interface."""
  50. def determine_wants_all(self, refs):
  51. return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
  52. def iter_shas(self, shas):
  53. """Iterate over the objects for the specified shas.
  54. :param shas: Iterable object with SHAs
  55. :return: Object iterator
  56. """
  57. return ObjectStoreIterator(self, shas)
  58. def contains_loose(self, sha):
  59. """Check if a particular object is present by SHA1 and is loose."""
  60. raise NotImplementedError(self.contains_loose)
  61. def contains_packed(self, sha):
  62. """Check if a particular object is present by SHA1 and is packed."""
  63. raise NotImplementedError(self.contains_packed)
  64. def __contains__(self, sha):
  65. """Check if a particular object is present by SHA1.
  66. This method makes no distinction between loose and packed objects.
  67. """
  68. return self.contains_packed(sha) or self.contains_loose(sha)
  69. @property
  70. def packs(self):
  71. """Iterable of pack objects."""
  72. raise NotImplementedError
  73. def get_raw(self, name):
  74. """Obtain the raw text for an object.
  75. :param name: sha for the object.
  76. :return: tuple with object type and object contents.
  77. """
  78. raise NotImplementedError(self.get_raw)
  79. def __getitem__(self, sha):
  80. """Obtain an object by SHA1."""
  81. type, uncomp = self.get_raw(sha)
  82. return ShaFile.from_raw_string(type, uncomp)
  83. def __iter__(self):
  84. """Iterate over the SHAs that are present in this store."""
  85. raise NotImplementedError(self.__iter__)
  86. def add_object(self, obj):
  87. """Add a single object to this object store.
  88. """
  89. raise NotImplementedError(self.add_object)
  90. def add_objects(self, objects):
  91. """Add a set of objects to this object store.
  92. :param objects: Iterable over a list of objects.
  93. """
  94. raise NotImplementedError(self.add_objects)
  95. def tree_changes(self, source, target, want_unchanged=False):
  96. """Find the differences between the contents of two trees
  97. :param object_store: Object store to use for retrieving tree contents
  98. :param tree: SHA1 of the root tree
  99. :param want_unchanged: Whether unchanged files should be reported
  100. :return: Iterator over tuples with (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
  101. """
  102. todo = set([(source, target, "")])
  103. while todo:
  104. (sid, tid, path) = todo.pop()
  105. if sid is not None:
  106. stree = self[sid]
  107. else:
  108. stree = {}
  109. if tid is not None:
  110. ttree = self[tid]
  111. else:
  112. ttree = {}
  113. for name, oldmode, oldhexsha in stree.iteritems():
  114. if path == "":
  115. oldchildpath = name
  116. else:
  117. oldchildpath = "%s/%s" % (path, name)
  118. try:
  119. (newmode, newhexsha) = ttree[name]
  120. newchildpath = oldchildpath
  121. except KeyError:
  122. newmode = None
  123. newhexsha = None
  124. newchildpath = None
  125. if (want_unchanged or oldmode != newmode or
  126. oldhexsha != newhexsha):
  127. if stat.S_ISDIR(oldmode):
  128. if newmode is None or stat.S_ISDIR(newmode):
  129. todo.add((oldhexsha, newhexsha, oldchildpath))
  130. else:
  131. # entry became a file
  132. todo.add((oldhexsha, None, oldchildpath))
  133. yield ((None, newchildpath), (None, newmode), (None, newhexsha))
  134. else:
  135. if newmode is not None and stat.S_ISDIR(newmode):
  136. # entry became a dir
  137. yield ((oldchildpath, None), (oldmode, None), (oldhexsha, None))
  138. todo.add((None, newhexsha, newchildpath))
  139. else:
  140. yield ((oldchildpath, newchildpath), (oldmode, newmode), (oldhexsha, newhexsha))
  141. for name, newmode, newhexsha in ttree.iteritems():
  142. if path == "":
  143. childpath = name
  144. else:
  145. childpath = "%s/%s" % (path, name)
  146. if not name in stree:
  147. if not stat.S_ISDIR(newmode):
  148. yield ((None, childpath), (None, newmode), (None, newhexsha))
  149. else:
  150. todo.add((None, newhexsha, childpath))
  151. def iter_tree_contents(self, tree):
  152. """Yield (path, mode, hexsha) tuples for all non-Tree objects in a tree.
  153. :param tree: SHA1 of the root of the tree
  154. """
  155. todo = set([(tree, "")])
  156. while todo:
  157. (tid, tpath) = todo.pop()
  158. tree = self[tid]
  159. for name, mode, hexsha in tree.iteritems():
  160. if tpath == "":
  161. path = name
  162. else:
  163. path = "%s/%s" % (tpath, name)
  164. if stat.S_ISDIR(mode):
  165. todo.add((hexsha, path))
  166. else:
  167. yield path, mode, hexsha
  168. def find_missing_objects(self, haves, wants, progress=None,
  169. get_tagged=None):
  170. """Find the missing objects required for a set of revisions.
  171. :param haves: Iterable over SHAs already in common.
  172. :param wants: Iterable over SHAs of objects to fetch.
  173. :param progress: Simple progress function that will be called with
  174. updated progress strings.
  175. :param get_tagged: Function that returns a dict of pointed-to sha -> tag
  176. sha for including tags.
  177. :return: Iterator over (sha, path) pairs.
  178. """
  179. finder = MissingObjectFinder(self, haves, wants, progress, get_tagged)
  180. return iter(finder.next, None)
  181. def find_common_revisions(self, graphwalker):
  182. """Find which revisions this store has in common using graphwalker.
  183. :param graphwalker: A graphwalker object.
  184. :return: List of SHAs that are in common
  185. """
  186. haves = []
  187. sha = graphwalker.next()
  188. while sha:
  189. if sha in self:
  190. haves.append(sha)
  191. graphwalker.ack(sha)
  192. sha = graphwalker.next()
  193. return haves
  194. def get_graph_walker(self, heads):
  195. """Obtain a graph walker for this object store.
  196. :param heads: Local heads to start search with
  197. :return: GraphWalker object
  198. """
  199. return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
  200. def generate_pack_contents(self, have, want):
  201. """Iterate over the contents of a pack file.
  202. :param have: List of SHA1s of objects that should not be sent
  203. :param want: List of SHA1s of objects that should be sent
  204. """
  205. return self.iter_shas(self.find_missing_objects(have, want))
  206. class PackBasedObjectStore(BaseObjectStore):
  207. def __init__(self):
  208. self._pack_cache = None
  209. def contains_packed(self, sha):
  210. """Check if a particular object is present by SHA1 and is packed."""
  211. for pack in self.packs:
  212. if sha in pack:
  213. return True
  214. return False
  215. def _load_packs(self):
  216. raise NotImplementedError(self._load_packs)
  217. def _pack_cache_stale(self):
  218. """Check whether the pack cache is stale."""
  219. raise NotImplementedError(self._pack_cache_stale)
  220. def _add_known_pack(self, pack):
  221. """Add a newly appeared pack to the cache by path.
  222. """
  223. if self._pack_cache is not None:
  224. self._pack_cache.append(pack)
  225. @property
  226. def packs(self):
  227. """List with pack objects."""
  228. if self._pack_cache is None or self._pack_cache_stale():
  229. self._pack_cache = self._load_packs()
  230. return self._pack_cache
  231. def _iter_loose_objects(self):
  232. raise NotImplementedError(self._iter_loose_objects)
  233. def _get_loose_object(self, sha):
  234. raise NotImplementedError(self._get_loose_object)
  235. def __iter__(self):
  236. """Iterate over the SHAs that are present in this store."""
  237. iterables = self.packs + [self._iter_loose_objects()]
  238. return itertools.chain(*iterables)
  239. def contains_loose(self, sha):
  240. """Check if a particular object is present by SHA1 and is loose."""
  241. return self._get_loose_object(sha) is not None
  242. def get_raw(self, name):
  243. """Obtain the raw text for an object.
  244. :param name: sha for the object.
  245. :return: tuple with object type and object contents.
  246. """
  247. if len(name) == 40:
  248. sha = hex_to_sha(name)
  249. hexsha = name
  250. elif len(name) == 20:
  251. sha = name
  252. hexsha = None
  253. else:
  254. raise AssertionError
  255. for pack in self.packs:
  256. try:
  257. return pack.get_raw(sha)
  258. except KeyError:
  259. pass
  260. if hexsha is None:
  261. hexsha = sha_to_hex(name)
  262. ret = self._get_loose_object(hexsha)
  263. if ret is not None:
  264. return ret.type, ret.as_raw_string()
  265. raise KeyError(hexsha)
  266. def add_objects(self, objects):
  267. """Add a set of objects to this object store.
  268. :param objects: Iterable over objects, should support __len__.
  269. """
  270. if len(objects) == 0:
  271. # Don't bother writing an empty pack file
  272. return
  273. f, commit = self.add_pack()
  274. write_pack_data(f, objects, len(objects))
  275. commit()
  276. class DiskObjectStore(PackBasedObjectStore):
  277. """Git-style object store that exists on disk."""
  278. def __init__(self, path):
  279. """Open an object store.
  280. :param path: Path of the object store.
  281. """
  282. super(DiskObjectStore, self).__init__()
  283. self.path = path
  284. self.pack_dir = os.path.join(self.path, PACKDIR)
  285. self._pack_cache_time = 0
  286. def _load_packs(self):
  287. pack_files = []
  288. try:
  289. self._pack_cache_time = os.stat(self.pack_dir).st_mtime
  290. pack_dir_contents = os.listdir(self.pack_dir)
  291. for name in pack_dir_contents:
  292. # TODO: verify that idx exists first
  293. if name.startswith("pack-") and name.endswith(".pack"):
  294. filename = os.path.join(self.pack_dir, name)
  295. pack_files.append((os.stat(filename).st_mtime, filename))
  296. except OSError, e:
  297. if e.errno == errno.ENOENT:
  298. return []
  299. raise
  300. pack_files.sort(reverse=True)
  301. suffix_len = len(".pack")
  302. return [Pack(f[:-suffix_len]) for _, f in pack_files]
  303. def _pack_cache_stale(self):
  304. try:
  305. return os.stat(self.pack_dir).st_mtime > self._pack_cache_time
  306. except OSError, e:
  307. if e.errno == errno.ENOENT:
  308. return True
  309. raise
  310. def _get_shafile_path(self, sha):
  311. dir = sha[:2]
  312. file = sha[2:]
  313. # Check from object dir
  314. return os.path.join(self.path, dir, file)
  315. def _iter_loose_objects(self):
  316. for base in os.listdir(self.path):
  317. if len(base) != 2:
  318. continue
  319. for rest in os.listdir(os.path.join(self.path, base)):
  320. yield base+rest
  321. def _get_loose_object(self, sha):
  322. path = self._get_shafile_path(sha)
  323. try:
  324. return ShaFile.from_file(path)
  325. except OSError, e:
  326. if e.errno == errno.ENOENT:
  327. return None
  328. raise
  329. def move_in_thin_pack(self, path):
  330. """Move a specific file containing a pack into the pack directory.
  331. :note: The file should be on the same file system as the
  332. packs directory.
  333. :param path: Path to the pack file.
  334. """
  335. data = PackData(path)
  336. # Write index for the thin pack (do we really need this?)
  337. temppath = os.path.join(self.pack_dir,
  338. sha_to_hex(urllib2.randombytes(20))+".tempidx")
  339. data.create_index_v2(temppath, self.get_raw)
  340. p = Pack.from_objects(data, load_pack_index(temppath))
  341. # Write a full pack version
  342. temppath = os.path.join(self.pack_dir,
  343. sha_to_hex(urllib2.randombytes(20))+".temppack")
  344. write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
  345. len(p))
  346. pack_sha = load_pack_index(temppath+".idx").objects_sha1()
  347. newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
  348. os.rename(temppath+".pack", newbasename+".pack")
  349. os.rename(temppath+".idx", newbasename+".idx")
  350. self._add_known_pack(Pack(newbasename))
  351. def move_in_pack(self, path):
  352. """Move a specific file containing a pack into the pack directory.
  353. :note: The file should be on the same file system as the
  354. packs directory.
  355. :param path: Path to the pack file.
  356. """
  357. p = PackData(path)
  358. entries = p.sorted_entries()
  359. basename = os.path.join(self.pack_dir,
  360. "pack-%s" % iter_sha1(entry[0] for entry in entries))
  361. write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
  362. p.close()
  363. os.rename(path, basename + ".pack")
  364. self._add_known_pack(Pack(basename))
  365. def add_thin_pack(self):
  366. """Add a new thin pack to this object store.
  367. Thin packs are packs that contain deltas with parents that exist
  368. in a different pack.
  369. """
  370. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  371. f = os.fdopen(fd, 'wb')
  372. def commit():
  373. os.fsync(fd)
  374. f.close()
  375. if os.path.getsize(path) > 0:
  376. self.move_in_thin_pack(path)
  377. return f, commit
  378. def add_pack(self):
  379. """Add a new pack to this object store.
  380. :return: Fileobject to write to and a commit function to
  381. call when the pack is finished.
  382. """
  383. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  384. f = os.fdopen(fd, 'wb')
  385. def commit():
  386. os.fsync(fd)
  387. f.close()
  388. if os.path.getsize(path) > 0:
  389. self.move_in_pack(path)
  390. return f, commit
  391. def add_object(self, obj):
  392. """Add a single object to this object store.
  393. :param obj: Object to add
  394. """
  395. dir = os.path.join(self.path, obj.id[:2])
  396. if not os.path.isdir(dir):
  397. os.mkdir(dir)
  398. path = os.path.join(dir, obj.id[2:])
  399. if os.path.exists(path):
  400. return # Already there, no need to write again
  401. f = GitFile(path, 'wb')
  402. try:
  403. f.write(obj.as_legacy_object())
  404. finally:
  405. f.close()
  406. class MemoryObjectStore(BaseObjectStore):
  407. """Object store that keeps all objects in memory."""
  408. def __init__(self):
  409. super(MemoryObjectStore, self).__init__()
  410. self._data = {}
  411. def contains_loose(self, sha):
  412. """Check if a particular object is present by SHA1 and is loose."""
  413. return sha in self._data
  414. def contains_packed(self, sha):
  415. """Check if a particular object is present by SHA1 and is packed."""
  416. return False
  417. def __iter__(self):
  418. """Iterate over the SHAs that are present in this store."""
  419. return self._data.iterkeys()
  420. @property
  421. def packs(self):
  422. """List with pack objects."""
  423. return []
  424. def get_raw(self, name):
  425. """Obtain the raw text for an object.
  426. :param name: sha for the object.
  427. :return: tuple with object type and object contents.
  428. """
  429. return self[name].as_raw_string()
  430. def __getitem__(self, name):
  431. return self._data[name]
  432. def add_object(self, obj):
  433. """Add a single object to this object store.
  434. """
  435. self._data[obj.id] = obj
  436. def add_objects(self, objects):
  437. """Add a set of objects to this object store.
  438. :param objects: Iterable over a list of objects.
  439. """
  440. for obj, path in objects:
  441. self._data[obj.id] = obj
  442. class ObjectImporter(object):
  443. """Interface for importing objects."""
  444. def __init__(self, count):
  445. """Create a new ObjectImporter.
  446. :param count: Number of objects that's going to be imported.
  447. """
  448. self.count = count
  449. def add_object(self, object):
  450. """Add an object."""
  451. raise NotImplementedError(self.add_object)
  452. def finish(self, object):
  453. """Finish the imoprt and write objects to disk."""
  454. raise NotImplementedError(self.finish)
  455. class ObjectIterator(object):
  456. """Interface for iterating over objects."""
  457. def iterobjects(self):
  458. raise NotImplementedError(self.iterobjects)
  459. class ObjectStoreIterator(ObjectIterator):
  460. """ObjectIterator that works on top of an ObjectStore."""
  461. def __init__(self, store, sha_iter):
  462. """Create a new ObjectIterator.
  463. :param store: Object store to retrieve from
  464. :param sha_iter: Iterator over (sha, path) tuples
  465. """
  466. self.store = store
  467. self.sha_iter = sha_iter
  468. self._shas = []
  469. def __iter__(self):
  470. """Yield tuple with next object and path."""
  471. for sha, path in self.itershas():
  472. yield self.store[sha], path
  473. def iterobjects(self):
  474. """Iterate over just the objects."""
  475. for o, path in self:
  476. yield o
  477. def itershas(self):
  478. """Iterate over the SHAs."""
  479. for sha in self._shas:
  480. yield sha
  481. for sha in self.sha_iter:
  482. self._shas.append(sha)
  483. yield sha
  484. def __contains__(self, needle):
  485. """Check if an object is present.
  486. :note: This checks if the object is present in
  487. the underlying object store, not if it would
  488. be yielded by the iterator.
  489. :param needle: SHA1 of the object to check for
  490. """
  491. return needle in self.store
  492. def __getitem__(self, key):
  493. """Find an object by SHA1.
  494. :note: This retrieves the object from the underlying
  495. object store. It will also succeed if the object would
  496. not be returned by the iterator.
  497. """
  498. return self.store[key]
  499. def __len__(self):
  500. """Return the number of objects."""
  501. return len(list(self.itershas()))
  502. def tree_lookup_path(lookup_obj, root_sha, path):
  503. """Lookup an object in a Git tree.
  504. :param lookup_obj: Callback for retrieving object by SHA1
  505. :param root_sha: SHA1 of the root tree
  506. :param path: Path to lookup
  507. """
  508. parts = path.split("/")
  509. sha = root_sha
  510. mode = None
  511. for p in parts:
  512. obj = lookup_obj(sha)
  513. if type(obj) is not Tree:
  514. raise NotTreeError(sha)
  515. if p == '':
  516. continue
  517. mode, sha = obj[p]
  518. return mode, sha
  519. class MissingObjectFinder(object):
  520. """Find the objects missing from another object store.
  521. :param object_store: Object store containing at least all objects to be
  522. sent
  523. :param haves: SHA1s of commits not to send (already present in target)
  524. :param wants: SHA1s of commits to send
  525. :param progress: Optional function to report progress to.
  526. :param get_tagged: Function that returns a dict of pointed-to sha -> tag
  527. sha for including tags.
  528. :param tagged: dict of pointed-to sha -> tag sha for including tags
  529. """
  530. def __init__(self, object_store, haves, wants, progress=None,
  531. get_tagged=None):
  532. self.sha_done = set(haves)
  533. self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
  534. self.object_store = object_store
  535. if progress is None:
  536. self.progress = lambda x: None
  537. else:
  538. self.progress = progress
  539. self._tagged = get_tagged and get_tagged() or {}
  540. def add_todo(self, entries):
  541. self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
  542. def parse_tree(self, tree):
  543. self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries() if not S_ISGITLINK(mode)])
  544. def parse_commit(self, commit):
  545. self.add_todo([(commit.tree, "", False)])
  546. self.add_todo([(p, None, False) for p in commit.parents])
  547. def parse_tag(self, tag):
  548. self.add_todo([(tag.object[1], None, False)])
  549. def next(self):
  550. if not self.objects_to_send:
  551. return None
  552. (sha, name, leaf) = self.objects_to_send.pop()
  553. if not leaf:
  554. o = self.object_store[sha]
  555. if isinstance(o, Commit):
  556. self.parse_commit(o)
  557. elif isinstance(o, Tree):
  558. self.parse_tree(o)
  559. elif isinstance(o, Tag):
  560. self.parse_tag(o)
  561. if sha in self._tagged:
  562. self.add_todo([(self._tagged[sha], None, True)])
  563. self.sha_done.add(sha)
  564. self.progress("counting objects: %d\r" % len(self.sha_done))
  565. return (sha, name)
  566. class ObjectStoreGraphWalker(object):
  567. """Graph walker that finds out what commits are missing from an object store."""
  568. def __init__(self, local_heads, get_parents):
  569. """Create a new instance.
  570. :param local_heads: Heads to start search with
  571. :param get_parents: Function for finding the parents of a SHA1.
  572. """
  573. self.heads = set(local_heads)
  574. self.get_parents = get_parents
  575. self.parents = {}
  576. def ack(self, sha):
  577. """Ack that a particular revision and its ancestors are present in the source."""
  578. if sha in self.heads:
  579. self.heads.remove(sha)
  580. if sha in self.parents:
  581. for p in self.parents[sha]:
  582. self.ack(p)
  583. def next(self):
  584. """Iterate over ancestors of heads in the target."""
  585. if self.heads:
  586. ret = self.heads.pop()
  587. ps = self.get_parents(ret)
  588. self.parents[ret] = ps
  589. self.heads.update(ps)
  590. return ret
  591. return None