objects.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. # objects.py -- Acces to base git objects
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # The header parsing code is based on that from git itself, which is
  4. # Copyright (C) 2005 Linus Torvalds
  5. # and licensed under v2 of the GPL.
  6. #
  7. # This program is free software; you can redistribute it and/or
  8. # modify it under the terms of the GNU General Public License
  9. # as published by the Free Software Foundation; version 2
  10. # of the License.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program; if not, write to the Free Software
  19. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  20. # MA 02110-1301, USA.
  21. import mmap
  22. import os
  23. import sha
  24. import zlib
  25. from errors import (NotCommitError,
  26. NotTreeError,
  27. NotBlobError,
  28. )
  29. blob_id = "blob"
  30. tag_id = "tag"
  31. tree_id = "tree"
  32. commit_id = "commit"
  33. parent_id = "parent"
  34. author_id = "author"
  35. committer_id = "committer"
  36. def _decompress(string):
  37. dcomp = zlib.decompressobj()
  38. dcomped = dcomp.decompress(string)
  39. dcomped += dcomp.flush()
  40. return dcomped
  41. def sha_to_hex(sha):
  42. """Takes a string and returns the hex of the sha within"""
  43. hexsha = ''
  44. for c in sha:
  45. hexsha += "%02x" % ord(c)
  46. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
  47. len(hexsha)
  48. return hexsha
  49. class ShaFile(object):
  50. """A git SHA file."""
  51. def _update_contents(self):
  52. """Update the _contents from the _text"""
  53. self._contents = [ord(c) for c in self._text]
  54. @classmethod
  55. def _parse_legacy_object(cls, map):
  56. """Parse a legacy object, creating it and setting object._text"""
  57. text = _decompress(map)
  58. object = None
  59. for posstype in type_map.keys():
  60. if text.startswith(posstype):
  61. object = type_map[posstype]()
  62. text = text[len(posstype):]
  63. break
  64. assert object is not None, "%s is not a known object type" % text[:9]
  65. assert text[0] == ' ', "%s is not a space" % text[0]
  66. text = text[1:]
  67. size = 0
  68. i = 0
  69. while text[0] >= '0' and text[0] <= '9':
  70. if i > 0 and size == 0:
  71. assert False, "Size is not in canonical format"
  72. size = (size * 10) + int(text[0])
  73. text = text[1:]
  74. i += 1
  75. object._size = size
  76. assert text[0] == "\0", "Size not followed by null"
  77. text = text[1:]
  78. object._text = text
  79. object._update_contents()
  80. return object
  81. def as_raw_string(self):
  82. return self._num_type, self._text
  83. @classmethod
  84. def _parse_object(cls, map):
  85. """Parse a new style object , creating it and setting object._text"""
  86. used = 0
  87. byte = ord(map[used])
  88. used += 1
  89. num_type = (byte >> 4) & 7
  90. try:
  91. object = num_type_map[num_type]()
  92. except KeyError:
  93. assert False, "Not a known type: %d" % num_type
  94. while((byte & 0x80) != 0):
  95. byte = ord(map[used])
  96. used += 1
  97. raw = map[used:]
  98. object._text = _decompress(raw)
  99. object._update_contents()
  100. return object
  101. @classmethod
  102. def _parse_file(cls, map):
  103. word = (ord(map[0]) << 8) + ord(map[1])
  104. if ord(map[0]) == 0x78 and (word % 31) == 0:
  105. return cls._parse_legacy_object(map)
  106. else:
  107. return cls._parse_object(map)
  108. def __init__(self):
  109. """Don't call this directly"""
  110. def _parse_text(self):
  111. """For subclasses to do initialistion time parsing"""
  112. @classmethod
  113. def from_file(cls, filename):
  114. """Get the contents of a SHA file on disk"""
  115. size = os.path.getsize(filename)
  116. f = open(filename, 'rb')
  117. try:
  118. map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
  119. shafile = cls._parse_file(map)
  120. shafile._parse_text()
  121. return shafile
  122. finally:
  123. f.close()
  124. @classmethod
  125. def from_raw_string(cls, type, string):
  126. """Creates an object of the indicated type from the raw string given.
  127. Type is the numeric type of an object. String is the raw uncompressed
  128. contents.
  129. """
  130. real_class = num_type_map[type]
  131. obj = real_class()
  132. obj._num_type = type
  133. obj._text = string
  134. obj._update_contents()
  135. return obj
  136. def _header(self):
  137. return "%s %lu\0" % (self._type, len(self._contents))
  138. def contents(self):
  139. """The raw bytes of this object"""
  140. return self._contents
  141. def crc32(self):
  142. return zlib.crc32(self._text)
  143. def sha(self):
  144. """The SHA1 object that is the name of this object."""
  145. ressha = sha.new()
  146. ressha.update(self._header())
  147. ressha.update(self._text)
  148. return ressha
  149. def __eq__(self, other):
  150. """Return true id the sha of the two objects match.
  151. The __le__ etc methods aren't overriden as they make no sense,
  152. certainly at this level.
  153. """
  154. return self.sha().digest() == other.sha().digest()
  155. class Blob(ShaFile):
  156. """A Git Blob object."""
  157. _type = blob_id
  158. def text(self):
  159. """The text contained within the blob object."""
  160. return self._text
  161. @classmethod
  162. def from_file(cls, filename):
  163. blob = ShaFile.from_file(filename)
  164. if blob._type != cls._type:
  165. raise NotBlobError(filename)
  166. return blob
  167. @classmethod
  168. def from_string(cls, string):
  169. """Create a blob from a string."""
  170. shafile = cls()
  171. shafile._text = string
  172. shafile._update_contents()
  173. return shafile
  174. class Tag(ShaFile):
  175. """A Git Tag object."""
  176. _type = tag_id
  177. @classmethod
  178. def from_file(cls, filename):
  179. blob = ShaFile.from_file(filename)
  180. if blob._type != cls._type:
  181. raise NotBlobError(filename)
  182. return blob
  183. @classmethod
  184. def from_string(cls, string):
  185. """Create a blob from a string."""
  186. shafile = cls()
  187. shafile._text = string
  188. shafile._update_contents()
  189. return shafile
  190. class Tree(ShaFile):
  191. """A Git tree object"""
  192. _type = tree_id
  193. @classmethod
  194. def from_file(cls, filename):
  195. tree = ShaFile.from_file(filename)
  196. if tree._type != cls._type:
  197. raise NotTreeError(filename)
  198. return tree
  199. def entries(self):
  200. """Return a list of tuples describing the tree entries"""
  201. return self._entries
  202. def _parse_text(self):
  203. """Grab the entries in the tree"""
  204. self._entries = []
  205. count = 0
  206. while count < len(self._text):
  207. mode = 0
  208. chr = self._text[count]
  209. while chr != ' ':
  210. assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
  211. mode = (mode << 3) + (ord(chr) - ord('0'))
  212. count += 1
  213. chr = self._text[count]
  214. count += 1
  215. chr = self._text[count]
  216. name = ''
  217. while chr != '\0':
  218. name += chr
  219. count += 1
  220. chr = self._text[count]
  221. count += 1
  222. chr = self._text[count]
  223. sha = self._text[count:count+20]
  224. hexsha = sha_to_hex(sha)
  225. self._entries.append((mode, name, hexsha))
  226. count = count + 20
  227. class Commit(ShaFile):
  228. """A git commit object"""
  229. _type = commit_id
  230. @classmethod
  231. def from_file(cls, filename):
  232. commit = ShaFile.from_file(filename)
  233. if commit._type != cls._type:
  234. raise NotCommitError(filename)
  235. return commit
  236. def _parse_text(self):
  237. text = self._text
  238. count = 0
  239. assert text.startswith(tree_id), "Invalid commit object, " \
  240. "must start with %s" % tree_id
  241. count += len(tree_id)
  242. assert text[count] == ' ', "Invalid commit object, " \
  243. "%s must be followed by space not %s" % (tree_id, text[count])
  244. count += 1
  245. self._tree = text[count:count+40]
  246. count = count + 40
  247. assert text[count] == "\n", "Invalid commit object, " \
  248. "tree sha must be followed by newline"
  249. count += 1
  250. self._parents = []
  251. while text[count:].startswith(parent_id):
  252. count += len(parent_id)
  253. assert text[count] == ' ', "Invalid commit object, " \
  254. "%s must be followed by space not %s" % (parent_id, text[count])
  255. count += 1
  256. self._parents.append(text[count:count+40])
  257. count += 40
  258. assert text[count] == "\n", "Invalid commit object, " \
  259. "parent sha must be followed by newline"
  260. count += 1
  261. self._author = None
  262. if text[count:].startswith(author_id):
  263. count += len(author_id)
  264. assert text[count] == ' ', "Invalid commit object, " \
  265. "%s must be followed by space not %s" % (author_id, text[count])
  266. count += 1
  267. self._author = ''
  268. while text[count] != '>':
  269. assert text[count] != '\n', "Malformed author information"
  270. self._author += text[count]
  271. count += 1
  272. self._author += text[count]
  273. count += 1
  274. while text[count] != '\n':
  275. count += 1
  276. count += 1
  277. self._committer = None
  278. if text[count:].startswith(committer_id):
  279. count += len(committer_id)
  280. assert text[count] == ' ', "Invalid commit object, " \
  281. "%s must be followed by space not %s" % (committer_id, text[count])
  282. count += 1
  283. self._committer = ''
  284. while text[count] != '>':
  285. assert text[count] != '\n', "Malformed committer information"
  286. self._committer += text[count]
  287. count += 1
  288. self._committer += text[count]
  289. count += 1
  290. assert text[count] == ' ', "Invalid commit object, " \
  291. "commiter information must be followed by space not %s" % text[count]
  292. count += 1
  293. self._commit_time = int(text[count:count+10])
  294. while text[count] != '\n':
  295. count += 1
  296. count += 1
  297. assert text[count] == '\n', "There must be a new line after the headers"
  298. count += 1
  299. # XXX: There can be an encoding field.
  300. self._message = text[count:]
  301. def tree(self):
  302. """Returns the tree that is the state of this commit"""
  303. return self._tree
  304. @property
  305. def parents(self):
  306. """Return a list of parents of this commit."""
  307. return self._parents
  308. def author(self):
  309. """Returns the name of the author of the commit"""
  310. return self._author
  311. def committer(self):
  312. """Returns the name of the committer of the commit"""
  313. return self._committer
  314. def message(self):
  315. """Returns the commit message"""
  316. return self._message
  317. def commit_time(self):
  318. """Returns the timestamp of the commit.
  319. Returns it as the number of seconds since the epoch.
  320. """
  321. return self._commit_time
  322. type_map = {
  323. blob_id : Blob,
  324. tree_id : Tree,
  325. commit_id : Commit,
  326. tag_id: Tag,
  327. }
  328. num_type_map = {
  329. 0: None,
  330. 1: Commit,
  331. 2: Tree,
  332. 3: Blob,
  333. 4: Tag,
  334. # 5 Is reserved for further expansion
  335. }