object_store.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. # object_store.py -- Object store for git objects
  2. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. """Git object store interfaces and implementation."""
  19. import itertools
  20. import os
  21. import stat
  22. import tempfile
  23. import urllib2
  24. from dulwich.errors import (
  25. NotTreeError,
  26. )
  27. from dulwich.objects import (
  28. Commit,
  29. ShaFile,
  30. Tag,
  31. Tree,
  32. hex_to_sha,
  33. sha_to_hex,
  34. )
  35. from dulwich.pack import (
  36. Pack,
  37. PackData,
  38. iter_sha1,
  39. load_packs,
  40. load_pack_index,
  41. write_pack,
  42. write_pack_data,
  43. write_pack_index_v2,
  44. )
  45. PACKDIR = 'pack'
  46. class BaseObjectStore(object):
  47. """Object store interface."""
  48. def determine_wants_all(self, refs):
  49. return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
  50. def iter_shas(self, shas):
  51. """Iterate over the objects for the specified shas.
  52. :param shas: Iterable object with SHAs
  53. :return: Object iterator
  54. """
  55. return ObjectStoreIterator(self, shas)
  56. def __contains__(self, sha):
  57. """Check if a particular object is present by SHA1."""
  58. raise NotImplementedError(self.__contains__)
  59. def get_raw(self, name):
  60. """Obtain the raw text for an object.
  61. :param name: sha for the object.
  62. :return: tuple with object type and object contents.
  63. """
  64. raise NotImplementedError(self.get_raw)
  65. def __getitem__(self, sha):
  66. """Obtain an object by SHA1."""
  67. type, uncomp = self.get_raw(sha)
  68. return ShaFile.from_raw_string(type, uncomp)
  69. def __iter__(self):
  70. """Iterate over the SHAs that are present in this store."""
  71. raise NotImplementedError(self.__iter__)
  72. def add_object(self, obj):
  73. """Add a single object to this object store.
  74. """
  75. raise NotImplementedError(self.add_object)
  76. def add_objects(self, objects):
  77. """Add a set of objects to this object store.
  78. :param objects: Iterable over a list of objects.
  79. """
  80. raise NotImplementedError(self.add_objects)
  81. def find_missing_objects(self, haves, wants, progress=None):
  82. """Find the missing objects required for a set of revisions.
  83. :param haves: Iterable over SHAs already in common.
  84. :param wants: Iterable over SHAs of objects to fetch.
  85. :param progress: Simple progress function that will be called with
  86. updated progress strings.
  87. :return: Iterator over (sha, path) pairs.
  88. """
  89. return iter(MissingObjectFinder(self, haves, wants, progress).next, None)
  90. def find_common_revisions(self, graphwalker):
  91. """Find which revisions this store has in common using graphwalker.
  92. :param graphwalker: A graphwalker object.
  93. :return: List of SHAs that are in common
  94. """
  95. haves = []
  96. sha = graphwalker.next()
  97. while sha:
  98. if sha in self:
  99. haves.append(sha)
  100. graphwalker.ack(sha)
  101. sha = graphwalker.next()
  102. return haves
  103. def get_graph_walker(self, heads):
  104. """Obtain a graph walker for this object store.
  105. :param heads: Local heads to start search with
  106. :return: GraphWalker object
  107. """
  108. return ObjectStoreGraphWalker(heads, lambda sha: self[sha].parents)
  109. def generate_pack_contents(self, have, want):
  110. return self.iter_shas(self.find_missing_objects(have, want))
  111. class DiskObjectStore(BaseObjectStore):
  112. """Git-style object store that exists on disk."""
  113. def __init__(self, path):
  114. """Open an object store.
  115. :param path: Path of the object store.
  116. """
  117. self.path = path
  118. self._pack_cache = None
  119. self.pack_dir = os.path.join(self.path, PACKDIR)
  120. def __contains__(self, sha):
  121. """Check if a particular object is present by SHA1."""
  122. for pack in self.packs:
  123. if sha in pack:
  124. return True
  125. ret = self._get_shafile(sha)
  126. if ret is not None:
  127. return True
  128. return False
  129. def __iter__(self):
  130. """Iterate over the SHAs that are present in this store."""
  131. iterables = self.packs + [self._iter_shafile_shas()]
  132. return itertools.chain(*iterables)
  133. @property
  134. def packs(self):
  135. """List with pack objects."""
  136. if self._pack_cache is None:
  137. self._pack_cache = list(load_packs(self.pack_dir))
  138. return self._pack_cache
  139. def _add_known_pack(self, path):
  140. """Add a newly appeared pack to the cache by path.
  141. """
  142. if self._pack_cache is not None:
  143. self._pack_cache.append(Pack(path))
  144. def _get_shafile_path(self, sha):
  145. dir = sha[:2]
  146. file = sha[2:]
  147. # Check from object dir
  148. return os.path.join(self.path, dir, file)
  149. def _iter_shafile_shas(self):
  150. for base in os.listdir(self.path):
  151. if len(base) != 2:
  152. continue
  153. for rest in os.listdir(os.path.join(self.path, base)):
  154. yield base+rest
  155. def _get_shafile(self, sha):
  156. path = self._get_shafile_path(sha)
  157. if os.path.exists(path):
  158. return ShaFile.from_file(path)
  159. return None
  160. def _add_shafile(self, sha, o):
  161. dir = os.path.join(self.path, sha[:2])
  162. if not os.path.isdir(dir):
  163. os.mkdir(dir)
  164. path = os.path.join(dir, sha[2:])
  165. if os.path.exists(path):
  166. return # Already there, no need to write again
  167. f = open(path, 'w+')
  168. try:
  169. f.write(o.as_legacy_object())
  170. finally:
  171. f.close()
  172. def get_raw(self, name):
  173. """Obtain the raw text for an object.
  174. :param name: sha for the object.
  175. :return: tuple with object type and object contents.
  176. """
  177. if len(name) == 40:
  178. sha = hex_to_sha(name)
  179. hexsha = name
  180. elif len(name) == 20:
  181. sha = name
  182. hexsha = None
  183. else:
  184. raise AssertionError
  185. for pack in self.packs:
  186. try:
  187. return pack.get_raw(sha)
  188. except KeyError:
  189. pass
  190. if hexsha is None:
  191. hexsha = sha_to_hex(name)
  192. ret = self._get_shafile(hexsha)
  193. if ret is not None:
  194. return ret.type, ret.as_raw_string()
  195. raise KeyError(hexsha)
  196. def move_in_thin_pack(self, path):
  197. """Move a specific file containing a pack into the pack directory.
  198. :note: The file should be on the same file system as the
  199. packs directory.
  200. :param path: Path to the pack file.
  201. """
  202. data = PackData(path)
  203. # Write index for the thin pack (do we really need this?)
  204. temppath = os.path.join(self.pack_dir,
  205. sha_to_hex(urllib2.randombytes(20))+".tempidx")
  206. data.create_index_v2(temppath, self.get_raw)
  207. p = Pack.from_objects(data, load_pack_index(temppath))
  208. # Write a full pack version
  209. temppath = os.path.join(self.pack_dir,
  210. sha_to_hex(urllib2.randombytes(20))+".temppack")
  211. write_pack(temppath, ((o, None) for o in p.iterobjects(self.get_raw)),
  212. len(p))
  213. pack_sha = load_pack_index(temppath+".idx").objects_sha1()
  214. newbasename = os.path.join(self.pack_dir, "pack-%s" % pack_sha)
  215. os.rename(temppath+".pack", newbasename+".pack")
  216. os.rename(temppath+".idx", newbasename+".idx")
  217. self._add_known_pack(newbasename)
  218. def move_in_pack(self, path):
  219. """Move a specific file containing a pack into the pack directory.
  220. :note: The file should be on the same file system as the
  221. packs directory.
  222. :param path: Path to the pack file.
  223. """
  224. p = PackData(path)
  225. entries = p.sorted_entries()
  226. basename = os.path.join(self.pack_dir,
  227. "pack-%s" % iter_sha1(entry[0] for entry in entries))
  228. write_pack_index_v2(basename+".idx", entries, p.get_stored_checksum())
  229. os.rename(path, basename + ".pack")
  230. self._add_known_pack(basename)
  231. def add_thin_pack(self):
  232. """Add a new thin pack to this object store.
  233. Thin packs are packs that contain deltas with parents that exist
  234. in a different pack.
  235. """
  236. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  237. f = os.fdopen(fd, 'w')
  238. def commit():
  239. os.fsync(fd)
  240. f.close()
  241. if os.path.getsize(path) > 0:
  242. self.move_in_thin_pack(path)
  243. return f, commit
  244. def add_pack(self):
  245. """Add a new pack to this object store.
  246. :return: Fileobject to write to and a commit function to
  247. call when the pack is finished.
  248. """
  249. fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
  250. f = os.fdopen(fd, 'w')
  251. def commit():
  252. os.fsync(fd)
  253. f.close()
  254. if os.path.getsize(path) > 0:
  255. self.move_in_pack(path)
  256. return f, commit
  257. def add_object(self, obj):
  258. """Add a single object to this object store.
  259. """
  260. self._add_shafile(obj.id, obj)
  261. def add_objects(self, objects):
  262. """Add a set of objects to this object store.
  263. :param objects: Iterable over a list of objects.
  264. """
  265. if len(objects) == 0:
  266. return
  267. f, commit = self.add_pack()
  268. write_pack_data(f, objects, len(objects))
  269. commit()
  270. class MemoryObjectStore(BaseObjectStore):
  271. def __init__(self):
  272. super(MemoryObjectStore, self).__init__()
  273. self._data = {}
  274. def __contains__(self, sha):
  275. return sha in self._data
  276. def __iter__(self):
  277. """Iterate over the SHAs that are present in this store."""
  278. return self._data.iterkeys()
  279. def get_raw(self, name):
  280. """Obtain the raw text for an object.
  281. :param name: sha for the object.
  282. :return: tuple with object type and object contents.
  283. """
  284. return self[name].as_raw_string()
  285. def __getitem__(self, name):
  286. return self._data[name]
  287. def add_object(self, obj):
  288. """Add a single object to this object store.
  289. """
  290. self._data[obj.id] = obj
  291. def add_objects(self, objects):
  292. """Add a set of objects to this object store.
  293. :param objects: Iterable over a list of objects.
  294. """
  295. for obj in objects:
  296. self._data[obj.id] = obj
  297. class ObjectImporter(object):
  298. """Interface for importing objects."""
  299. def __init__(self, count):
  300. """Create a new ObjectImporter.
  301. :param count: Number of objects that's going to be imported.
  302. """
  303. self.count = count
  304. def add_object(self, object):
  305. """Add an object."""
  306. raise NotImplementedError(self.add_object)
  307. def finish(self, object):
  308. """Finish the imoprt and write objects to disk."""
  309. raise NotImplementedError(self.finish)
  310. class ObjectIterator(object):
  311. """Interface for iterating over objects."""
  312. def iterobjects(self):
  313. raise NotImplementedError(self.iterobjects)
  314. class ObjectStoreIterator(ObjectIterator):
  315. """ObjectIterator that works on top of an ObjectStore."""
  316. def __init__(self, store, sha_iter):
  317. self.store = store
  318. self.sha_iter = sha_iter
  319. self._shas = []
  320. def __iter__(self):
  321. for sha, path in self.itershas():
  322. yield self.store[sha], path
  323. def iterobjects(self):
  324. for o, path in self:
  325. yield o
  326. def itershas(self):
  327. for sha in self._shas:
  328. yield sha
  329. for sha in self.sha_iter:
  330. self._shas.append(sha)
  331. yield sha
  332. def __contains__(self, needle):
  333. """Check if an object is present.
  334. :param needle: SHA1 of the object to check for
  335. """
  336. return needle in self.store
  337. def __getitem__(self, key):
  338. """Find an object by SHA1."""
  339. return self.store[key]
  340. def __len__(self):
  341. """Return the number of objects."""
  342. return len(list(self.itershas()))
  343. def tree_lookup_path(lookup_obj, root_sha, path):
  344. """Lookup an object in a Git tree.
  345. :param lookup_obj: Callback for retrieving object by SHA1
  346. :param root_sha: SHA1 of the root tree
  347. :param path: Path to lookup
  348. """
  349. parts = path.split("/")
  350. sha = root_sha
  351. for p in parts:
  352. obj = lookup_obj(sha)
  353. if type(obj) is not Tree:
  354. raise NotTreeError(sha)
  355. if p == '':
  356. continue
  357. mode, sha = obj[p]
  358. return lookup_obj(sha)
  359. class MissingObjectFinder(object):
  360. """Find the objects missing from another object store.
  361. :param object_store: Object store containing at least all objects to be
  362. sent
  363. :param haves: SHA1s of commits not to send (already present in target)
  364. :param wants: SHA1s of commits to send
  365. :param progress: Optional function to report progress to.
  366. """
  367. def __init__(self, object_store, haves, wants, progress=None):
  368. self.sha_done = set(haves)
  369. self.objects_to_send = set([(w, None, False) for w in wants if w not in haves])
  370. self.object_store = object_store
  371. if progress is None:
  372. self.progress = lambda x: None
  373. else:
  374. self.progress = progress
  375. def add_todo(self, entries):
  376. self.objects_to_send.update([e for e in entries if not e[0] in self.sha_done])
  377. def parse_tree(self, tree):
  378. self.add_todo([(sha, name, not stat.S_ISDIR(mode)) for (mode, name, sha) in tree.entries()])
  379. def parse_commit(self, commit):
  380. self.add_todo([(commit.tree, "", False)])
  381. self.add_todo([(p, None, False) for p in commit.parents])
  382. def parse_tag(self, tag):
  383. self.add_todo([(tag.object[1], None, False)])
  384. def next(self):
  385. if not self.objects_to_send:
  386. return None
  387. (sha, name, leaf) = self.objects_to_send.pop()
  388. if not leaf:
  389. o = self.object_store[sha]
  390. if isinstance(o, Commit):
  391. self.parse_commit(o)
  392. elif isinstance(o, Tree):
  393. self.parse_tree(o)
  394. elif isinstance(o, Tag):
  395. self.parse_tag(o)
  396. self.sha_done.add(sha)
  397. self.progress("counting objects: %d\r" % len(self.sha_done))
  398. return (sha, name)
  399. class ObjectStoreGraphWalker(object):
  400. """Graph walker that finds out what commits are missing from an object store."""
  401. def __init__(self, local_heads, get_parents):
  402. """Create a new instance.
  403. :param local_heads: Heads to start search with
  404. :param get_parents: Function for finding the parents of a SHA1.
  405. """
  406. self.heads = set(local_heads)
  407. self.get_parents = get_parents
  408. self.parents = {}
  409. def ack(self, sha):
  410. """Ack that a particular revision and its ancestors are present in the source."""
  411. if sha in self.heads:
  412. self.heads.remove(sha)
  413. if sha in self.parents:
  414. for p in self.parents[sha]:
  415. self.ack(p)
  416. def next(self):
  417. """Iterate over ancestors of heads in the target."""
  418. if self.heads:
  419. ret = self.heads.pop()
  420. ps = self.get_parents(ret)
  421. self.parents[ret] = ps
  422. self.heads.update(ps)
  423. return ret
  424. return None