objects.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. # objects.py -- Acces to base git objects
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # The header parsing code is based on that from git itself, which is
  4. # Copyright (C) 2005 Linus Torvalds
  5. # and licensed under v2 of the GPL.
  6. #
  7. # This program is free software; you can redistribute it and/or
  8. # modify it under the terms of the GNU General Public License
  9. # as published by the Free Software Foundation; version 2
  10. # of the License.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program; if not, write to the Free Software
  19. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  20. # MA 02110-1301, USA.
  21. import mmap
  22. import os
  23. import sha
  24. import zlib
  25. from errors import (NotCommitError,
  26. NotTreeError,
  27. NotBlobError,
  28. )
  29. blob_id = "blob"
  30. tree_id = "tree"
  31. commit_id = "commit"
  32. parent_id = "parent"
  33. author_id = "author"
  34. committer_id = "committer"
  35. def _decompress(string):
  36. dcomp = zlib.decompressobj()
  37. dcomped = dcomp.decompress(string)
  38. dcomped += dcomp.flush()
  39. return dcomped
  40. def sha_to_hex(sha):
  41. """Takes a string and returns the hex of the sha within"""
  42. hexsha = ''
  43. for c in sha:
  44. if ord(c) < 16:
  45. hexsha += "0%x" % ord(c)
  46. else:
  47. hexsha += "%x" % ord(c)
  48. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
  49. len(hexsha)
  50. return hexsha
  51. class ShaFile(object):
  52. """A git SHA file."""
  53. def _update_contents(self):
  54. """Update the _contents from the _text"""
  55. self._contents = [ord(c) for c in self._text]
  56. @classmethod
  57. def _parse_legacy_object(cls, map):
  58. """Parse a legacy object, creating it and setting object._text"""
  59. text = _decompress(map)
  60. object = None
  61. for posstype in type_map.keys():
  62. if text.startswith(posstype):
  63. object = type_map[posstype]()
  64. text = text[len(posstype):]
  65. break
  66. assert object is not None, "%s is not a known object type" % text[:9]
  67. assert text[0] == ' ', "%s is not a space" % text[0]
  68. text = text[1:]
  69. size = 0
  70. i = 0
  71. while text[0] >= '0' and text[0] <= '9':
  72. if i > 0 and size == 0:
  73. assert False, "Size is not in canonical format"
  74. size = (size * 10) + int(text[0])
  75. text = text[1:]
  76. i += 1
  77. object._size = size
  78. assert text[0] == "\0", "Size not followed by null"
  79. text = text[1:]
  80. object._text = text
  81. object._update_contents()
  82. return object
  83. @classmethod
  84. def _parse_object(cls, map):
  85. """Parse a new style object , creating it and setting object._text"""
  86. used = 0
  87. byte = ord(map[used])
  88. used += 1
  89. num_type = (byte >> 4) & 7
  90. try:
  91. object = num_type_map[num_type]()
  92. except KeyError:
  93. assert False, "Not a known type: %d" % num_type
  94. while((byte & 0x80) != 0):
  95. byte = ord(map[used])
  96. used += 1
  97. raw = map[used:]
  98. object._text = _decompress(raw)
  99. object._update_contents()
  100. return object
  101. @classmethod
  102. def _parse_file(cls, map):
  103. word = (ord(map[0]) << 8) + ord(map[1])
  104. if ord(map[0]) == 0x78 and (word % 31) == 0:
  105. return cls._parse_legacy_object(map)
  106. else:
  107. return cls._parse_object(map)
  108. def __init__(self):
  109. """Don't call this directly"""
  110. def _parse_text(self):
  111. """For subclasses to do initialistion time parsing"""
  112. @classmethod
  113. def from_file(cls, filename):
  114. """Get the contents of a SHA file on disk"""
  115. size = os.path.getsize(filename)
  116. f = open(filename, 'rb+')
  117. try:
  118. map = mmap.mmap(f.fileno(), size)
  119. shafile = cls._parse_file(map)
  120. shafile._parse_text()
  121. return shafile
  122. finally:
  123. f.close()
  124. def _header(self):
  125. return "%s %lu\0" % (self._type, len(self._contents))
  126. def contents(self):
  127. """The raw bytes of this object"""
  128. return self._contents
  129. def sha(self):
  130. """The SHA1 object that is the name of this object."""
  131. ressha = sha.new()
  132. ressha.update(self._header())
  133. ressha.update(self._text)
  134. return ressha
  135. class Blob(ShaFile):
  136. """A Git Blob object."""
  137. _type = blob_id
  138. def text(self):
  139. """The text contained within the blob object."""
  140. return self._text
  141. @classmethod
  142. def from_file(cls, filename):
  143. blob = ShaFile.from_file(filename)
  144. if blob._type != cls._type:
  145. raise NotBlobError(filename)
  146. return blob
  147. @classmethod
  148. def from_string(cls, string):
  149. """Create a blob from a string."""
  150. shafile = cls()
  151. shafile._text = string
  152. shafile._update_contents()
  153. return shafile
  154. class Tree(ShaFile):
  155. """A Git tree object"""
  156. _type = tree_id
  157. @classmethod
  158. def from_file(cls, filename):
  159. tree = ShaFile.from_file(filename)
  160. if tree._type != cls._type:
  161. raise NotTreeError(filename)
  162. return tree
  163. def entries(self):
  164. """Reutrn a list of tuples describing the tree entries"""
  165. return self._entries
  166. def _parse_text(self):
  167. """Grab the entries in the tree"""
  168. self._entries = []
  169. count = 0
  170. while count < len(self._text):
  171. mode = 0
  172. chr = self._text[count]
  173. while chr != ' ':
  174. assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
  175. mode = (mode << 3) + (ord(chr) - ord('0'))
  176. count += 1
  177. chr = self._text[count]
  178. count += 1
  179. chr = self._text[count]
  180. name = ''
  181. while chr != '\0':
  182. name += chr
  183. count += 1
  184. chr = self._text[count]
  185. count += 1
  186. chr = self._text[count]
  187. sha = self._text[count:count+20]
  188. hexsha = sha_to_hex(sha)
  189. self._entries.append((mode, name, hexsha))
  190. count = count + 20
  191. class Commit(ShaFile):
  192. """A git commit object"""
  193. _type = commit_id
  194. @classmethod
  195. def from_file(cls, filename):
  196. commit = ShaFile.from_file(filename)
  197. if commit._type != cls._type:
  198. raise NotCommitError(filename)
  199. return commit
  200. def _parse_text(self):
  201. text = self._text
  202. count = 0
  203. assert text.startswith(tree_id), "Invlid commit object, " \
  204. "must start with %s" % tree_id
  205. count += len(tree_id)
  206. assert text[count] == ' ', "Invalid commit object, " \
  207. "%s must be followed by space not %s" % (tree_id, text[count])
  208. count += 1
  209. self._tree = text[count:count+40]
  210. count = count + 40
  211. assert text[count] == "\n", "Invalid commit object, " \
  212. "tree sha must be followed by newline"
  213. count += 1
  214. self._parents = []
  215. while text[count:].startswith(parent_id):
  216. count += len(parent_id)
  217. assert text[count] == ' ', "Invalid commit object, " \
  218. "%s must be followed by space not %s" % (parent_id, text[count])
  219. count += 1
  220. self._parents.append(text[count:count+40])
  221. count += 40
  222. assert text[count] == "\n", "Invalid commit object, " \
  223. "parent sha must be followed by newline"
  224. count += 1
  225. self._author = None
  226. if text[count:].startswith(author_id):
  227. count += len(author_id)
  228. assert text[count] == ' ', "Invalid commit object, " \
  229. "%s must be followed by space not %s" % (author_id, text[count])
  230. count += 1
  231. self._author = ''
  232. while text[count] != '\n':
  233. self._author += text[count]
  234. count += 1
  235. count += 1
  236. self._committer = None
  237. if text[count:].startswith(committer_id):
  238. count += len(committer_id)
  239. assert text[count] == ' ', "Invalid commit object, " \
  240. "%s must be followed by space not %s" % (committer_id, text[count])
  241. count += 1
  242. self._committer = ''
  243. while text[count] != '\n':
  244. self._committer += text[count]
  245. count += 1
  246. count += 1
  247. assert text[count] == '\n', "There must be a new line after the headers"
  248. count += 1
  249. self._message = text[count:]
  250. def tree(self):
  251. """Returns the tree that is the state of this commit"""
  252. return self._tree
  253. def parents(self):
  254. """Return a list of parents of this commit."""
  255. return self._parents
  256. def author(self):
  257. """Returns the name of the author of the commit"""
  258. return self._author
  259. def committer(self):
  260. """Returns the name of the committer of the commit"""
  261. return self._committer
  262. def message(self):
  263. """Returns the commit message"""
  264. return self._message
  265. type_map = {
  266. blob_id : Blob,
  267. tree_id : Tree,
  268. commit_id : Commit,
  269. }
  270. num_type_map = {
  271. 1 : Commit,
  272. 2 : Tree,
  273. 3 : Blob,
  274. }