objects.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. import mmap
  2. import os
  3. import sha
  4. import zlib
  5. blob_id = "blob"
  6. tree_id = "tree"
  7. commit_id = "commit"
  8. parent_id = "parent"
  9. author_id = "author"
  10. committer_id = "committer"
  11. def _decompress(string):
  12. dcomp = zlib.decompressobj()
  13. dcomped = dcomp.decompress(string)
  14. dcomped += dcomp.flush()
  15. return dcomped
  16. def sha_to_hex(sha):
  17. """Takes a string and returns the hex of the sha within"""
  18. hexsha = ''
  19. for c in sha:
  20. if ord(c) < 16:
  21. hexsha += "0%x" % ord(c)
  22. else:
  23. hexsha += "%x" % ord(c)
  24. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
  25. len(hexsha)
  26. return hexsha
  27. class ShaFile(object):
  28. """A git SHA file."""
  29. def _update_contents(self):
  30. """Update the _contents from the _text"""
  31. self._contents = [ord(c) for c in self._text]
  32. @classmethod
  33. def _parse_legacy_object(cls, map):
  34. """Parse a legacy object, creating it and setting object._text"""
  35. text = _decompress(map)
  36. object = None
  37. for posstype in type_map.keys():
  38. if text.startswith(posstype):
  39. object = type_map[posstype]()
  40. text = text[len(posstype):]
  41. break
  42. assert object is not None, "%s is not a known object type" % text[:9]
  43. assert text[0] == ' ', "%s is not a space" % text[0]
  44. text = text[1:]
  45. size = 0
  46. i = 0
  47. while text[0] >= '0' and text[0] <= '9':
  48. if i > 0 and size == 0:
  49. assert False, "Size is not in canonical format"
  50. size = (size * 10) + int(text[0])
  51. text = text[1:]
  52. i += 1
  53. object._size = size
  54. assert text[0] == "\0", "Size not followed by null"
  55. text = text[1:]
  56. object._text = text
  57. object._update_contents()
  58. return object
  59. @classmethod
  60. def _parse_object(cls, map):
  61. """Parse a new style object , creating it and setting object._text"""
  62. used = 0
  63. byte = ord(map[used])
  64. used += 1
  65. num_type = (byte >> 4) & 7
  66. try:
  67. object = num_type_map[num_type]()
  68. except KeyError:
  69. assert False, "Not a known type: %d" % num_type
  70. while((byte & 0x80) != 0):
  71. byte = ord(map[used])
  72. used += 1
  73. raw = map[used:]
  74. object._text = _decompress(raw)
  75. object._update_contents()
  76. return object
  77. @classmethod
  78. def _parse_file(cls, map):
  79. word = (ord(map[0]) << 8) + ord(map[1])
  80. if ord(map[0]) == 0x78 and (word % 31) == 0:
  81. return cls._parse_legacy_object(map)
  82. else:
  83. return cls._parse_object(map)
  84. def __init__(self):
  85. """Don't call this directly"""
  86. def _parse_text(self):
  87. """For subclasses to do initialistion time parsing"""
  88. @classmethod
  89. def from_file(cls, filename):
  90. """Get the contents of a SHA file on disk"""
  91. size = os.path.getsize(filename)
  92. f = open(filename, 'rb+')
  93. try:
  94. map = mmap.mmap(f.fileno(), size)
  95. shafile = cls._parse_file(map)
  96. shafile._parse_text()
  97. return shafile
  98. finally:
  99. f.close()
  100. def _header(self):
  101. return "%s %lu\0" % (self._type, len(self._contents))
  102. def contents(self):
  103. """The raw bytes of this object"""
  104. return self._contents
  105. def sha(self):
  106. """The SHA1 object that is the name of this object."""
  107. ressha = sha.new()
  108. ressha.update(self._header())
  109. ressha.update(self._text)
  110. return ressha
  111. class Blob(ShaFile):
  112. """A Git Blob object."""
  113. _type = blob_id
  114. def text(self):
  115. """The text contained within the blob object."""
  116. return self._text
  117. @classmethod
  118. def from_file(cls, filename):
  119. blob = ShaFile.from_file(filename)
  120. assert blob._type == cls._type, "%s is not a blob object" % filename
  121. return blob
  122. @classmethod
  123. def from_string(cls, string):
  124. """Create a blob from a string."""
  125. shafile = cls()
  126. shafile._text = string
  127. shafile._update_contents()
  128. return shafile
  129. class Tree(ShaFile):
  130. """A Git tree object"""
  131. _type = tree_id
  132. @classmethod
  133. def from_file(cls, filename):
  134. tree = ShaFile.from_file(filename)
  135. assert tree._type == cls._type, "%s is not a tree object" % filename
  136. return tree
  137. def entries(self):
  138. """Reutrn a list of tuples describing the tree entries"""
  139. return self._entries
  140. def _parse_text(self):
  141. """Grab the entries in the tree"""
  142. self._entries = []
  143. count = 0
  144. while count < len(self._text):
  145. mode = 0
  146. chr = self._text[count]
  147. while chr != ' ':
  148. assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
  149. mode = (mode << 3) + (ord(chr) - ord('0'))
  150. count += 1
  151. chr = self._text[count]
  152. count += 1
  153. chr = self._text[count]
  154. name = ''
  155. while chr != '\0':
  156. name += chr
  157. count += 1
  158. chr = self._text[count]
  159. count += 1
  160. chr = self._text[count]
  161. sha = self._text[count:count+20]
  162. hexsha = sha_to_hex(sha)
  163. self._entries.append((mode, name, hexsha))
  164. count = count + 20
  165. class Commit(ShaFile):
  166. """A git commit object"""
  167. _type = commit_id
  168. @classmethod
  169. def from_file(cls, filename):
  170. commit = ShaFile.from_file(filename)
  171. assert commit._type == cls._type, "%s is not a commit object" % filename
  172. return commit
  173. def _parse_text(self):
  174. text = self._text
  175. count = 0
  176. assert text.startswith(tree_id), "Invlid commit object, " \
  177. "must start with %s" % tree_id
  178. count += len(tree_id)
  179. assert text[count] == ' ', "Invalid commit object, " \
  180. "%s must be followed by space not %s" % (tree_id, text[count])
  181. count += 1
  182. self._tree = text[count:count+40]
  183. count = count + 40
  184. assert text[count] == "\n", "Invalid commit object, " \
  185. "tree sha must be followed by newline"
  186. count += 1
  187. self._parents = []
  188. while text[count:].startswith(parent_id):
  189. count += len(parent_id)
  190. assert text[count] == ' ', "Invalid commit object, " \
  191. "%s must be followed by space not %s" % (parent_id, text[count])
  192. count += 1
  193. self._parents.append(text[count:count+40])
  194. count += 40
  195. assert text[count] == "\n", "Invalid commit object, " \
  196. "parent sha must be followed by newline"
  197. count += 1
  198. self._author = None
  199. if text[count:].startswith(author_id):
  200. count += len(author_id)
  201. assert text[count] == ' ', "Invalid commit object, " \
  202. "%s must be followed by space not %s" % (author_id, text[count])
  203. count += 1
  204. self._author = ''
  205. while text[count] != '\n':
  206. self._author += text[count]
  207. count += 1
  208. count += 1
  209. self._committer = None
  210. if text[count:].startswith(committer_id):
  211. count += len(committer_id)
  212. assert text[count] == ' ', "Invalid commit object, " \
  213. "%s must be followed by space not %s" % (committer_id, text[count])
  214. count += 1
  215. self._committer = ''
  216. while text[count] != '\n':
  217. self._committer += text[count]
  218. count += 1
  219. count += 1
  220. assert text[count] == '\n', "There must be a new line after the headers"
  221. count += 1
  222. self._message = text[count:]
  223. def tree(self):
  224. """Returns the tree that is the state of this commit"""
  225. return self._tree
  226. def parents(self):
  227. """Return a list of parents of this commit."""
  228. return self._parents
  229. def author(self):
  230. """Returns the name of the author of the commit"""
  231. return self._author
  232. def committer(self):
  233. """Returns the name of the committer of the commit"""
  234. return self._committer
  235. def message(self):
  236. """Returns the commit message"""
  237. return self._message
  238. type_map = {
  239. blob_id : Blob,
  240. tree_id : Tree,
  241. commit_id : Commit,
  242. }
  243. num_type_map = {
  244. 1 : Commit,
  245. 2 : Tree,
  246. 3 : Blob,
  247. }