object_store.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. # object_store.py -- Object store for git objects
  2. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  3. #
  4. # This program is free software; you can redistribute it and/or
  5. # modify it under the terms of the GNU General Public License
  6. # as published by the Free Software Foundation; either version 2
  7. # or (at your option) a later version of the License.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  17. # MA 02110-1301, USA.
  18. import os
  19. import tempfile
  20. import urllib2
  21. from dulwich.objects import (
  22. hex_to_sha,
  23. sha_to_hex,
  24. ShaFile,
  25. )
  26. from dulwich.pack import (
  27. iter_sha1,
  28. load_packs,
  29. write_pack,
  30. write_pack_data,
  31. write_pack_index_v2,
  32. PackData,
  33. )
  34. PACKDIR = 'pack'
  35. class ObjectStore(object):
  36. def __init__(self, path):
  37. self.path = path
  38. self._packs = None
  39. def determine_wants_all(self, refs):
  40. return [sha for (ref, sha) in refs.iteritems() if not sha in self and not ref.endswith("^{}")]
  41. def iter_shas(self, shas):
  42. return ObjectStoreIterator(self, shas)
  43. def pack_dir(self):
  44. return os.path.join(self.path, PACKDIR)
  45. def __contains__(self, sha):
  46. # TODO: This can be more efficient
  47. try:
  48. self[sha]
  49. return True
  50. except KeyError:
  51. return False
  52. @property
  53. def packs(self):
  54. """List with pack objects."""
  55. if self._packs is None:
  56. self._packs = list(load_packs(self.pack_dir()))
  57. return self._packs
  58. def _get_shafile_path(self, sha):
  59. dir = sha[:2]
  60. file = sha[2:]
  61. # Check from object dir
  62. return os.path.join(self.path, dir, file)
  63. def _get_shafile(self, sha):
  64. path = self._get_shafile_path(sha)
  65. if os.path.exists(path):
  66. return ShaFile.from_file(path)
  67. return None
  68. def _add_shafile(self, sha, o):
  69. path = self._get_shafile_path(sha)
  70. f = os.path.open(path, 'w')
  71. try:
  72. f.write(o._header())
  73. f.write(o._text)
  74. finally:
  75. f.close()
  76. def get_raw(self, sha):
  77. """Obtain the raw text for an object.
  78. :param sha: Sha for the object.
  79. :return: tuple with object type and object contents.
  80. """
  81. for pack in self.packs:
  82. if sha in pack:
  83. return pack.get_raw(sha, self.get_raw)
  84. # FIXME: Are pack deltas ever against on-disk shafiles ?
  85. ret = self._get_shafile(sha)
  86. if ret is not None:
  87. return ret.as_raw_string()
  88. raise KeyError(sha)
  89. def __getitem__(self, sha):
  90. assert len(sha) == 40, "Incorrect length sha: %s" % str(sha)
  91. ret = self._get_shafile(sha)
  92. if ret is not None:
  93. return ret
  94. # Check from packs
  95. type, uncomp = self.get_raw(sha)
  96. return ShaFile.from_raw_string(type, uncomp)
  97. def move_in_thin_pack(self, path):
  98. """Move a specific file containing a pack into the pack directory.
  99. :note: The file should be on the same file system as the
  100. packs directory.
  101. :param path: Path to the pack file.
  102. """
  103. p = PackData(path)
  104. temppath = os.path.join(self.pack_dir(), sha_to_hex(urllib2.randombytes(20))+".temppack")
  105. write_pack(temppath, p.iterobjects(self.get_raw), len(p))
  106. pack_sha = PackIndex(temppath+".idx").objects_sha1()
  107. os.rename(temppath+".pack",
  108. os.path.join(self.pack_dir(), "pack-%s.pack" % pack_sha))
  109. os.rename(temppath+".idx",
  110. os.path.join(self.pack_dir(), "pack-%s.idx" % pack_sha))
  111. def move_in_pack(self, path):
  112. """Move a specific file containing a pack into the pack directory.
  113. :note: The file should be on the same file system as the
  114. packs directory.
  115. :param path: Path to the pack file.
  116. """
  117. p = PackData(path)
  118. entries = p.sorted_entries()
  119. basename = os.path.join(self.pack_dir(),
  120. "pack-%s" % iter_sha1(entry[0] for entry in entries))
  121. write_pack_index_v2(basename+".idx", entries, p.calculate_checksum())
  122. os.rename(path, basename + ".pack")
  123. def add_thin_pack(self):
  124. """Add a new thin pack to this object store.
  125. Thin packs are packs that contain deltas with parents that exist
  126. in a different pack.
  127. """
  128. fd, path = tempfile.mkstemp(dir=self.pack_dir(), suffix=".pack")
  129. f = os.fdopen(fd, 'w')
  130. def commit():
  131. if os.path.getsize(path) > 0:
  132. self.move_in_thin_pack(path)
  133. return f, commit
  134. def add_pack(self):
  135. """Add a new pack to this object store.
  136. :return: Fileobject to write to and a commit function to
  137. call when the pack is finished.
  138. """
  139. fd, path = tempfile.mkstemp(dir=self.pack_dir(), suffix=".pack")
  140. f = os.fdopen(fd, 'w')
  141. def commit():
  142. if os.path.getsize(path) > 0:
  143. self.move_in_pack(path)
  144. return f, commit
  145. def add_objects(self, objects):
  146. if len(objects) == 0:
  147. return
  148. f, commit = self.add_pack()
  149. write_pack_data(f, objects, len(objects))
  150. commit()
  151. class ObjectImporter(object):
  152. def __init__(self, count):
  153. self.count = count
  154. def add_object(self, object):
  155. raise NotImplementedError(self.add_object)
  156. def finish(self, object):
  157. raise NotImplementedError(self.finish)
  158. class ObjectIterator(object):
  159. def iterobjects(self):
  160. raise NotImplementedError(self.iterobjects)
  161. class ObjectStoreIterator(ObjectIterator):
  162. def __init__(self, store, shas):
  163. self.store = store
  164. self.shas = shas
  165. def __iter__(self):
  166. return ((self.store[sha], path) for sha, path in self.shas)
  167. def iterobjects(self):
  168. for o, path in self:
  169. yield o
  170. def __contains__(self, needle):
  171. """Check if an object is present.
  172. :param needle: SHA1 of the object to check for
  173. """
  174. # FIXME: This could be more efficient
  175. for sha, path in self.shas:
  176. if sha == needle:
  177. return True
  178. return False
  179. def __getitem__(self, key):
  180. """Find an object by SHA1."""
  181. return self.store[key]
  182. def __len__(self):
  183. """Return the number of objects."""
  184. return len(self.shas)