objects.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. # objects.py -- Acces to base git objects
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # The header parsing code is based on that from git itself, which is
  4. # Copyright (C) 2005 Linus Torvalds
  5. # and licensed under v2 of the GPL.
  6. #
  7. # This program is free software; you can redistribute it and/or
  8. # modify it under the terms of the GNU General Public License
  9. # as published by the Free Software Foundation; version 2
  10. # of the License.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program; if not, write to the Free Software
  19. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  20. # MA 02110-1301, USA.
  21. import mmap
  22. import os
  23. import sha
  24. import zlib
  25. from errors import (NotCommitError,
  26. NotTreeError,
  27. NotBlobError,
  28. )
  29. blob_id = "blob"
  30. tree_id = "tree"
  31. commit_id = "commit"
  32. parent_id = "parent"
  33. author_id = "author"
  34. committer_id = "committer"
  35. def _decompress(string):
  36. dcomp = zlib.decompressobj()
  37. dcomped = dcomp.decompress(string)
  38. dcomped += dcomp.flush()
  39. return dcomped
  40. def sha_to_hex(sha):
  41. """Takes a string and returns the hex of the sha within"""
  42. hexsha = ''
  43. for c in sha:
  44. hexsha += "%02x" % ord(c)
  45. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
  46. len(hexsha)
  47. return hexsha
  48. class ShaFile(object):
  49. """A git SHA file."""
  50. def _update_contents(self):
  51. """Update the _contents from the _text"""
  52. self._contents = [ord(c) for c in self._text]
  53. @classmethod
  54. def _parse_legacy_object(cls, map):
  55. """Parse a legacy object, creating it and setting object._text"""
  56. text = _decompress(map)
  57. object = None
  58. for posstype in type_map.keys():
  59. if text.startswith(posstype):
  60. object = type_map[posstype]()
  61. text = text[len(posstype):]
  62. break
  63. assert object is not None, "%s is not a known object type" % text[:9]
  64. assert text[0] == ' ', "%s is not a space" % text[0]
  65. text = text[1:]
  66. size = 0
  67. i = 0
  68. while text[0] >= '0' and text[0] <= '9':
  69. if i > 0 and size == 0:
  70. assert False, "Size is not in canonical format"
  71. size = (size * 10) + int(text[0])
  72. text = text[1:]
  73. i += 1
  74. object._size = size
  75. assert text[0] == "\0", "Size not followed by null"
  76. text = text[1:]
  77. object._text = text
  78. object._update_contents()
  79. return object
  80. @classmethod
  81. def _parse_object(cls, map):
  82. """Parse a new style object , creating it and setting object._text"""
  83. used = 0
  84. byte = ord(map[used])
  85. used += 1
  86. num_type = (byte >> 4) & 7
  87. try:
  88. object = num_type_map[num_type]()
  89. except KeyError:
  90. assert False, "Not a known type: %d" % num_type
  91. while((byte & 0x80) != 0):
  92. byte = ord(map[used])
  93. used += 1
  94. raw = map[used:]
  95. object._text = _decompress(raw)
  96. object._update_contents()
  97. return object
  98. @classmethod
  99. def _parse_file(cls, map):
  100. word = (ord(map[0]) << 8) + ord(map[1])
  101. if ord(map[0]) == 0x78 and (word % 31) == 0:
  102. return cls._parse_legacy_object(map)
  103. else:
  104. return cls._parse_object(map)
  105. def __init__(self):
  106. """Don't call this directly"""
  107. def _parse_text(self):
  108. """For subclasses to do initialistion time parsing"""
  109. @classmethod
  110. def from_file(cls, filename):
  111. """Get the contents of a SHA file on disk"""
  112. size = os.path.getsize(filename)
  113. f = open(filename, 'rb+')
  114. try:
  115. map = mmap.mmap(f.fileno(), size)
  116. shafile = cls._parse_file(map)
  117. shafile._parse_text()
  118. return shafile
  119. finally:
  120. f.close()
  121. def _header(self):
  122. return "%s %lu\0" % (self._type, len(self._contents))
  123. def contents(self):
  124. """The raw bytes of this object"""
  125. return self._contents
  126. def sha(self):
  127. """The SHA1 object that is the name of this object."""
  128. ressha = sha.new()
  129. ressha.update(self._header())
  130. ressha.update(self._text)
  131. return ressha
  132. def __eq__(self, other):
  133. """Return true id the sha of the two objects match.
  134. The __le__ etc methods aren't overriden as they make no sense,
  135. certainly at this level.
  136. """
  137. return self.sha().digest() == other.sha().digest()
  138. class Blob(ShaFile):
  139. """A Git Blob object."""
  140. _type = blob_id
  141. def text(self):
  142. """The text contained within the blob object."""
  143. return self._text
  144. @classmethod
  145. def from_file(cls, filename):
  146. blob = ShaFile.from_file(filename)
  147. if blob._type != cls._type:
  148. raise NotBlobError(filename)
  149. return blob
  150. @classmethod
  151. def from_string(cls, string):
  152. """Create a blob from a string."""
  153. shafile = cls()
  154. shafile._text = string
  155. shafile._update_contents()
  156. return shafile
  157. class Tree(ShaFile):
  158. """A Git tree object"""
  159. _type = tree_id
  160. @classmethod
  161. def from_file(cls, filename):
  162. tree = ShaFile.from_file(filename)
  163. if tree._type != cls._type:
  164. raise NotTreeError(filename)
  165. return tree
  166. def entries(self):
  167. """Reutrn a list of tuples describing the tree entries"""
  168. return self._entries
  169. def _parse_text(self):
  170. """Grab the entries in the tree"""
  171. self._entries = []
  172. count = 0
  173. while count < len(self._text):
  174. mode = 0
  175. chr = self._text[count]
  176. while chr != ' ':
  177. assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
  178. mode = (mode << 3) + (ord(chr) - ord('0'))
  179. count += 1
  180. chr = self._text[count]
  181. count += 1
  182. chr = self._text[count]
  183. name = ''
  184. while chr != '\0':
  185. name += chr
  186. count += 1
  187. chr = self._text[count]
  188. count += 1
  189. chr = self._text[count]
  190. sha = self._text[count:count+20]
  191. hexsha = sha_to_hex(sha)
  192. self._entries.append((mode, name, hexsha))
  193. count = count + 20
  194. class Commit(ShaFile):
  195. """A git commit object"""
  196. _type = commit_id
  197. @classmethod
  198. def from_file(cls, filename):
  199. commit = ShaFile.from_file(filename)
  200. if commit._type != cls._type:
  201. raise NotCommitError(filename)
  202. return commit
  203. def _parse_text(self):
  204. text = self._text
  205. count = 0
  206. assert text.startswith(tree_id), "Invlid commit object, " \
  207. "must start with %s" % tree_id
  208. count += len(tree_id)
  209. assert text[count] == ' ', "Invalid commit object, " \
  210. "%s must be followed by space not %s" % (tree_id, text[count])
  211. count += 1
  212. self._tree = text[count:count+40]
  213. count = count + 40
  214. assert text[count] == "\n", "Invalid commit object, " \
  215. "tree sha must be followed by newline"
  216. count += 1
  217. self._parents = []
  218. while text[count:].startswith(parent_id):
  219. count += len(parent_id)
  220. assert text[count] == ' ', "Invalid commit object, " \
  221. "%s must be followed by space not %s" % (parent_id, text[count])
  222. count += 1
  223. self._parents.append(text[count:count+40])
  224. count += 40
  225. assert text[count] == "\n", "Invalid commit object, " \
  226. "parent sha must be followed by newline"
  227. count += 1
  228. self._author = None
  229. if text[count:].startswith(author_id):
  230. count += len(author_id)
  231. assert text[count] == ' ', "Invalid commit object, " \
  232. "%s must be followed by space not %s" % (author_id, text[count])
  233. count += 1
  234. self._author = ''
  235. while text[count] != '>':
  236. assert text[count] != '\n', "Malformed author information"
  237. self._author += text[count]
  238. count += 1
  239. self._author += text[count]
  240. count += 1
  241. while text[count] != '\n':
  242. count += 1
  243. count += 1
  244. self._committer = None
  245. if text[count:].startswith(committer_id):
  246. count += len(committer_id)
  247. assert text[count] == ' ', "Invalid commit object, " \
  248. "%s must be followed by space not %s" % (committer_id, text[count])
  249. count += 1
  250. self._committer = ''
  251. while text[count] != '>':
  252. assert text[count] != '\n', "Malformed committer information"
  253. self._committer += text[count]
  254. count += 1
  255. self._committer += text[count]
  256. count += 1
  257. assert text[count] == ' ', "Invalid commit object, " \
  258. "commiter information must be followed by space not %s" % text[count]
  259. count += 1
  260. self._commit_time = int(text[count:count+10])
  261. while text[count] != '\n':
  262. count += 1
  263. count += 1
  264. assert text[count] == '\n', "There must be a new line after the headers"
  265. count += 1
  266. # XXX: There can be an encoding field.
  267. self._message = text[count:]
  268. def tree(self):
  269. """Returns the tree that is the state of this commit"""
  270. return self._tree
  271. def parents(self):
  272. """Return a list of parents of this commit."""
  273. return self._parents
  274. def author(self):
  275. """Returns the name of the author of the commit"""
  276. return self._author
  277. def committer(self):
  278. """Returns the name of the committer of the commit"""
  279. return self._committer
  280. def message(self):
  281. """Returns the commit message"""
  282. return self._message
  283. def commit_time(self):
  284. """Returns the timestamp of the commit.
  285. Returns it as the number of seconds since the epoch.
  286. """
  287. return self._commit_time
  288. type_map = {
  289. blob_id : Blob,
  290. tree_id : Tree,
  291. commit_id : Commit,
  292. }
  293. num_type_map = {
  294. 1 : Commit,
  295. 2 : Tree,
  296. 3 : Blob,
  297. }