2
0

objects.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. # objects.py -- Acces to base git objects
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # The header parsing code is based on that from git itself, which is
  4. # Copyright (C) 2005 Linus Torvalds
  5. # and licensed under v2 of the GPL.
  6. #
  7. # This program is free software; you can redistribute it and/or
  8. # modify it under the terms of the GNU General Public License
  9. # as published by the Free Software Foundation; version 2
  10. # of the License.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program; if not, write to the Free Software
  19. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  20. # MA 02110-1301, USA.
  21. import mmap
  22. import os
  23. import sha
  24. import zlib
  25. blob_id = "blob"
  26. tree_id = "tree"
  27. commit_id = "commit"
  28. parent_id = "parent"
  29. author_id = "author"
  30. committer_id = "committer"
  31. def _decompress(string):
  32. dcomp = zlib.decompressobj()
  33. dcomped = dcomp.decompress(string)
  34. dcomped += dcomp.flush()
  35. return dcomped
  36. def sha_to_hex(sha):
  37. """Takes a string and returns the hex of the sha within"""
  38. hexsha = ''
  39. for c in sha:
  40. if ord(c) < 16:
  41. hexsha += "0%x" % ord(c)
  42. else:
  43. hexsha += "%x" % ord(c)
  44. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
  45. len(hexsha)
  46. return hexsha
  47. class ShaFile(object):
  48. """A git SHA file."""
  49. def _update_contents(self):
  50. """Update the _contents from the _text"""
  51. self._contents = [ord(c) for c in self._text]
  52. @classmethod
  53. def _parse_legacy_object(cls, map):
  54. """Parse a legacy object, creating it and setting object._text"""
  55. text = _decompress(map)
  56. object = None
  57. for posstype in type_map.keys():
  58. if text.startswith(posstype):
  59. object = type_map[posstype]()
  60. text = text[len(posstype):]
  61. break
  62. assert object is not None, "%s is not a known object type" % text[:9]
  63. assert text[0] == ' ', "%s is not a space" % text[0]
  64. text = text[1:]
  65. size = 0
  66. i = 0
  67. while text[0] >= '0' and text[0] <= '9':
  68. if i > 0 and size == 0:
  69. assert False, "Size is not in canonical format"
  70. size = (size * 10) + int(text[0])
  71. text = text[1:]
  72. i += 1
  73. object._size = size
  74. assert text[0] == "\0", "Size not followed by null"
  75. text = text[1:]
  76. object._text = text
  77. object._update_contents()
  78. return object
  79. @classmethod
  80. def _parse_object(cls, map):
  81. """Parse a new style object , creating it and setting object._text"""
  82. used = 0
  83. byte = ord(map[used])
  84. used += 1
  85. num_type = (byte >> 4) & 7
  86. try:
  87. object = num_type_map[num_type]()
  88. except KeyError:
  89. assert False, "Not a known type: %d" % num_type
  90. while((byte & 0x80) != 0):
  91. byte = ord(map[used])
  92. used += 1
  93. raw = map[used:]
  94. object._text = _decompress(raw)
  95. object._update_contents()
  96. return object
  97. @classmethod
  98. def _parse_file(cls, map):
  99. word = (ord(map[0]) << 8) + ord(map[1])
  100. if ord(map[0]) == 0x78 and (word % 31) == 0:
  101. return cls._parse_legacy_object(map)
  102. else:
  103. return cls._parse_object(map)
  104. def __init__(self):
  105. """Don't call this directly"""
  106. def _parse_text(self):
  107. """For subclasses to do initialistion time parsing"""
  108. @classmethod
  109. def from_file(cls, filename):
  110. """Get the contents of a SHA file on disk"""
  111. size = os.path.getsize(filename)
  112. f = open(filename, 'rb+')
  113. try:
  114. map = mmap.mmap(f.fileno(), size)
  115. shafile = cls._parse_file(map)
  116. shafile._parse_text()
  117. return shafile
  118. finally:
  119. f.close()
  120. def _header(self):
  121. return "%s %lu\0" % (self._type, len(self._contents))
  122. def contents(self):
  123. """The raw bytes of this object"""
  124. return self._contents
  125. def sha(self):
  126. """The SHA1 object that is the name of this object."""
  127. ressha = sha.new()
  128. ressha.update(self._header())
  129. ressha.update(self._text)
  130. return ressha
  131. class Blob(ShaFile):
  132. """A Git Blob object."""
  133. _type = blob_id
  134. def text(self):
  135. """The text contained within the blob object."""
  136. return self._text
  137. @classmethod
  138. def from_file(cls, filename):
  139. blob = ShaFile.from_file(filename)
  140. assert blob._type == cls._type, "%s is not a blob object" % filename
  141. return blob
  142. @classmethod
  143. def from_string(cls, string):
  144. """Create a blob from a string."""
  145. shafile = cls()
  146. shafile._text = string
  147. shafile._update_contents()
  148. return shafile
  149. class Tree(ShaFile):
  150. """A Git tree object"""
  151. _type = tree_id
  152. @classmethod
  153. def from_file(cls, filename):
  154. tree = ShaFile.from_file(filename)
  155. assert tree._type == cls._type, "%s is not a tree object" % filename
  156. return tree
  157. def entries(self):
  158. """Reutrn a list of tuples describing the tree entries"""
  159. return self._entries
  160. def _parse_text(self):
  161. """Grab the entries in the tree"""
  162. self._entries = []
  163. count = 0
  164. while count < len(self._text):
  165. mode = 0
  166. chr = self._text[count]
  167. while chr != ' ':
  168. assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
  169. mode = (mode << 3) + (ord(chr) - ord('0'))
  170. count += 1
  171. chr = self._text[count]
  172. count += 1
  173. chr = self._text[count]
  174. name = ''
  175. while chr != '\0':
  176. name += chr
  177. count += 1
  178. chr = self._text[count]
  179. count += 1
  180. chr = self._text[count]
  181. sha = self._text[count:count+20]
  182. hexsha = sha_to_hex(sha)
  183. self._entries.append((mode, name, hexsha))
  184. count = count + 20
  185. class Commit(ShaFile):
  186. """A git commit object"""
  187. _type = commit_id
  188. @classmethod
  189. def from_file(cls, filename):
  190. commit = ShaFile.from_file(filename)
  191. assert commit._type == cls._type, "%s is not a commit object" % filename
  192. return commit
  193. def _parse_text(self):
  194. text = self._text
  195. count = 0
  196. assert text.startswith(tree_id), "Invlid commit object, " \
  197. "must start with %s" % tree_id
  198. count += len(tree_id)
  199. assert text[count] == ' ', "Invalid commit object, " \
  200. "%s must be followed by space not %s" % (tree_id, text[count])
  201. count += 1
  202. self._tree = text[count:count+40]
  203. count = count + 40
  204. assert text[count] == "\n", "Invalid commit object, " \
  205. "tree sha must be followed by newline"
  206. count += 1
  207. self._parents = []
  208. while text[count:].startswith(parent_id):
  209. count += len(parent_id)
  210. assert text[count] == ' ', "Invalid commit object, " \
  211. "%s must be followed by space not %s" % (parent_id, text[count])
  212. count += 1
  213. self._parents.append(text[count:count+40])
  214. count += 40
  215. assert text[count] == "\n", "Invalid commit object, " \
  216. "parent sha must be followed by newline"
  217. count += 1
  218. self._author = None
  219. if text[count:].startswith(author_id):
  220. count += len(author_id)
  221. assert text[count] == ' ', "Invalid commit object, " \
  222. "%s must be followed by space not %s" % (author_id, text[count])
  223. count += 1
  224. self._author = ''
  225. while text[count] != '\n':
  226. self._author += text[count]
  227. count += 1
  228. count += 1
  229. self._committer = None
  230. if text[count:].startswith(committer_id):
  231. count += len(committer_id)
  232. assert text[count] == ' ', "Invalid commit object, " \
  233. "%s must be followed by space not %s" % (committer_id, text[count])
  234. count += 1
  235. self._committer = ''
  236. while text[count] != '\n':
  237. self._committer += text[count]
  238. count += 1
  239. count += 1
  240. assert text[count] == '\n', "There must be a new line after the headers"
  241. count += 1
  242. self._message = text[count:]
  243. def tree(self):
  244. """Returns the tree that is the state of this commit"""
  245. return self._tree
  246. def parents(self):
  247. """Return a list of parents of this commit."""
  248. return self._parents
  249. def author(self):
  250. """Returns the name of the author of the commit"""
  251. return self._author
  252. def committer(self):
  253. """Returns the name of the committer of the commit"""
  254. return self._committer
  255. def message(self):
  256. """Returns the commit message"""
  257. return self._message
  258. type_map = {
  259. blob_id : Blob,
  260. tree_id : Tree,
  261. commit_id : Commit,
  262. }
  263. num_type_map = {
  264. 1 : Commit,
  265. 2 : Tree,
  266. 3 : Blob,
  267. }