objects.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. # objects.py -- Acces to base git objects
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. # The header parsing code is based on that from git itself, which is
  5. # Copyright (C) 2005 Linus Torvalds
  6. # and licensed under v2 of the GPL.
  7. #
  8. # This program is free software; you can redistribute it and/or
  9. # modify it under the terms of the GNU General Public License
  10. # as published by the Free Software Foundation; version 2
  11. # of the License.
  12. #
  13. # This program is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU General Public License
  19. # along with this program; if not, write to the Free Software
  20. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  21. # MA 02110-1301, USA.
  22. import mmap
  23. import os
  24. import sha
  25. import zlib
  26. from errors import (NotCommitError,
  27. NotTreeError,
  28. NotBlobError,
  29. )
  30. BLOB_ID = "blob"
  31. TAG_ID = "tag"
  32. TREE_ID = "tree"
  33. COMMIT_ID = "commit"
  34. PARENT_ID = "parent"
  35. AUTHOR_ID = "author"
  36. COMMITTER_ID = "committer"
  37. OBJECT_ID = "object"
  38. TYPE_ID = "type"
  39. TAGGER_ID = "tagger"
  40. def _decompress(string):
  41. dcomp = zlib.decompressobj()
  42. dcomped = dcomp.decompress(string)
  43. dcomped += dcomp.flush()
  44. return dcomped
  45. def sha_to_hex(sha):
  46. """Takes a string and returns the hex of the sha within"""
  47. hexsha = ''
  48. for c in sha:
  49. hexsha += "%02x" % ord(c)
  50. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % \
  51. len(hexsha)
  52. return hexsha
  53. def hex_to_sha(hex):
  54. """Takes a hex sha and returns a binary sha"""
  55. sha = ''
  56. for i in range(0, len(hex), 2):
  57. sha += chr(int(hex[i:i+2], 16))
  58. assert len(sha) == 20, "Incorrent length of sha1: %d" % len(sha)
  59. return sha
  60. class ShaFile(object):
  61. """A git SHA file."""
  62. @classmethod
  63. def _parse_legacy_object(cls, map):
  64. """Parse a legacy object, creating it and setting object._text"""
  65. text = _decompress(map)
  66. object = None
  67. for posstype in type_map.keys():
  68. if text.startswith(posstype):
  69. object = type_map[posstype]()
  70. text = text[len(posstype):]
  71. break
  72. assert object is not None, "%s is not a known object type" % text[:9]
  73. assert text[0] == ' ', "%s is not a space" % text[0]
  74. text = text[1:]
  75. size = 0
  76. i = 0
  77. while text[0] >= '0' and text[0] <= '9':
  78. if i > 0 and size == 0:
  79. assert False, "Size is not in canonical format"
  80. size = (size * 10) + int(text[0])
  81. text = text[1:]
  82. i += 1
  83. object._size = size
  84. assert text[0] == "\0", "Size not followed by null"
  85. text = text[1:]
  86. object._text = text
  87. return object
  88. def as_raw_string(self):
  89. return self._num_type, self._text
  90. @classmethod
  91. def _parse_object(cls, map):
  92. """Parse a new style object , creating it and setting object._text"""
  93. used = 0
  94. byte = ord(map[used])
  95. used += 1
  96. num_type = (byte >> 4) & 7
  97. try:
  98. object = num_type_map[num_type]()
  99. except KeyError:
  100. assert False, "Not a known type: %d" % num_type
  101. while((byte & 0x80) != 0):
  102. byte = ord(map[used])
  103. used += 1
  104. raw = map[used:]
  105. object._text = _decompress(raw)
  106. return object
  107. @classmethod
  108. def _parse_file(cls, map):
  109. word = (ord(map[0]) << 8) + ord(map[1])
  110. if ord(map[0]) == 0x78 and (word % 31) == 0:
  111. return cls._parse_legacy_object(map)
  112. else:
  113. return cls._parse_object(map)
  114. def __init__(self):
  115. """Don't call this directly"""
  116. def _parse_text(self):
  117. """For subclasses to do initialisation time parsing"""
  118. @classmethod
  119. def from_file(cls, filename):
  120. """Get the contents of a SHA file on disk"""
  121. size = os.path.getsize(filename)
  122. f = open(filename, 'rb')
  123. try:
  124. map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
  125. shafile = cls._parse_file(map)
  126. shafile._parse_text()
  127. return shafile
  128. finally:
  129. f.close()
  130. @classmethod
  131. def from_raw_string(cls, type, string):
  132. """Creates an object of the indicated type from the raw string given.
  133. Type is the numeric type of an object. String is the raw uncompressed
  134. contents.
  135. """
  136. real_class = num_type_map[type]
  137. obj = real_class()
  138. obj._num_type = type
  139. obj._text = string
  140. obj._parse_text()
  141. return obj
  142. def _header(self):
  143. return "%s %lu\0" % (self._type, len(self._text))
  144. def crc32(self):
  145. return zlib.crc32(self._text)
  146. def sha(self):
  147. """The SHA1 object that is the name of this object."""
  148. ressha = sha.new()
  149. ressha.update(self._header())
  150. ressha.update(self._text)
  151. return ressha
  152. @property
  153. def id(self):
  154. return self.sha().hexdigest()
  155. def __repr__(self):
  156. return "<%s %s>" % (self.__class__.__name__, self.id)
  157. def __eq__(self, other):
  158. """Return true id the sha of the two objects match.
  159. The __le__ etc methods aren't overriden as they make no sense,
  160. certainly at this level.
  161. """
  162. return self.sha().digest() == other.sha().digest()
  163. class Blob(ShaFile):
  164. """A Git Blob object."""
  165. _type = BLOB_ID
  166. _num_type = 3
  167. @property
  168. def data(self):
  169. """The text contained within the blob object."""
  170. return self._text
  171. @classmethod
  172. def from_file(cls, filename):
  173. blob = ShaFile.from_file(filename)
  174. if blob._type != cls._type:
  175. raise NotBlobError(filename)
  176. return blob
  177. @classmethod
  178. def from_string(cls, string):
  179. """Create a blob from a string."""
  180. shafile = cls()
  181. shafile._text = string
  182. return shafile
  183. class Tag(ShaFile):
  184. """A Git Tag object."""
  185. _type = TAG_ID
  186. @classmethod
  187. def from_file(cls, filename):
  188. blob = ShaFile.from_file(filename)
  189. if blob._type != cls._type:
  190. raise NotBlobError(filename)
  191. return blob
  192. @classmethod
  193. def from_string(cls, string):
  194. """Create a blob from a string."""
  195. shafile = cls()
  196. shafile._text = string
  197. return shafile
  198. def _parse_text(self):
  199. """Grab the metadata attached to the tag"""
  200. text = self._text
  201. count = 0
  202. assert text.startswith(OBJECT_ID), "Invalid tag object, " \
  203. "must start with %s" % OBJECT_ID
  204. count += len(OBJECT_ID)
  205. assert text[count] == ' ', "Invalid tag object, " \
  206. "%s must be followed by space not %s" % (OBJECT_ID, text[count])
  207. count += 1
  208. self._object_sha = text[count:count+40]
  209. count += 40
  210. assert text[count] == '\n', "Invalid tag object, " \
  211. "%s sha must be followed by newline" % OBJECT_ID
  212. count += 1
  213. assert text[count:].startswith(TYPE_ID), "Invalid tag object, " \
  214. "%s sha must be followed by %s" % (OBJECT_ID, TYPE_ID)
  215. count += len(TYPE_ID)
  216. assert text[count] == ' ', "Invalid tag object, " \
  217. "%s must be followed by space not %s" % (TAG_ID, text[count])
  218. count += 1
  219. self._object_type = ""
  220. while text[count] != '\n':
  221. self._object_type += text[count]
  222. count += 1
  223. count += 1
  224. assert self._object_type in (COMMIT_ID, BLOB_ID, TREE_ID, TAG_ID), "Invalid tag object, " \
  225. "unexpected object type %s" % self._object_type
  226. self._object_type = type_map[self._object_type]
  227. assert text[count:].startswith(TAG_ID), "Invalid tag object, " \
  228. "object type must be followed by %s" % (TAG_ID)
  229. count += len(TAG_ID)
  230. assert text[count] == ' ', "Invalid tag object, " \
  231. "%s must be followed by space not %s" % (TAG_ID, text[count])
  232. count += 1
  233. self._name = ""
  234. while text[count] != '\n':
  235. self._name += text[count]
  236. count += 1
  237. count += 1
  238. assert text[count:].startswith(TAGGER_ID), "Invalid tag object, " \
  239. "%s must be followed by %s" % (TAG_ID, TAGGER_ID)
  240. count += len(TAGGER_ID)
  241. assert text[count] == ' ', "Invalid tag object, " \
  242. "%s must be followed by space not %s" % (TAGGER_ID, text[count])
  243. count += 1
  244. self._tagger = ""
  245. while text[count] != '>':
  246. assert text[count] != '\n', "Malformed tagger information"
  247. self._tagger += text[count]
  248. count += 1
  249. self._tagger += text[count]
  250. count += 1
  251. assert text[count] == ' ', "Invalid tag object, " \
  252. "tagger information must be followed by space not %s" % text[count]
  253. count += 1
  254. self._tag_time = int(text[count:count+10])
  255. while text[count] != '\n':
  256. count += 1
  257. count += 1
  258. assert text[count] == '\n', "There must be a new line after the headers"
  259. count += 1
  260. self._message = text[count:]
  261. @property
  262. def object(self):
  263. """Returns the object pointed by this tag, represented as a tuple(type, sha)"""
  264. return (self._object_type, self._object_sha)
  265. @property
  266. def name(self):
  267. """Returns the name of this tag"""
  268. return self._name
  269. @property
  270. def tagger(self):
  271. """Returns the name of the person who created this tag"""
  272. return self._tagger
  273. @property
  274. def tag_time(self):
  275. """Returns the creation timestamp of the tag.
  276. Returns it as the number of seconds since the epoch"""
  277. return self._tag_time
  278. @property
  279. def message(self):
  280. """Returns the message attached to this tag"""
  281. return self._message
  282. class Tree(ShaFile):
  283. """A Git tree object"""
  284. _type = TREE_ID
  285. _num_type = 2
  286. def __init__(self):
  287. self._entries = []
  288. @classmethod
  289. def from_file(cls, filename):
  290. tree = ShaFile.from_file(filename)
  291. if tree._type != cls._type:
  292. raise NotTreeError(filename)
  293. return tree
  294. def add(self, mode, name, hexsha):
  295. self._entries.append((mode, name, hexsha))
  296. def entries(self):
  297. """Return a list of tuples describing the tree entries"""
  298. return self._entries
  299. def _parse_text(self):
  300. """Grab the entries in the tree"""
  301. count = 0
  302. while count < len(self._text):
  303. mode = 0
  304. chr = self._text[count]
  305. while chr != ' ':
  306. assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
  307. mode = (mode << 3) + (ord(chr) - ord('0'))
  308. count += 1
  309. chr = self._text[count]
  310. count += 1
  311. chr = self._text[count]
  312. name = ''
  313. while chr != '\0':
  314. name += chr
  315. count += 1
  316. chr = self._text[count]
  317. count += 1
  318. chr = self._text[count]
  319. sha = self._text[count:count+20]
  320. hexsha = sha_to_hex(sha)
  321. self.add(mode, name, hexsha)
  322. count = count + 20
  323. def serialize(self):
  324. self._text = ""
  325. for mode, name, hexsha in self._entries:
  326. self._text += "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
  327. class Commit(ShaFile):
  328. """A git commit object"""
  329. _type = COMMIT_ID
  330. _num_type = 1
  331. def __init__(self):
  332. self._parents = []
  333. @classmethod
  334. def from_file(cls, filename):
  335. commit = ShaFile.from_file(filename)
  336. if commit._type != cls._type:
  337. raise NotCommitError(filename)
  338. return commit
  339. def _parse_text(self):
  340. text = self._text
  341. count = 0
  342. assert text.startswith(TREE_ID), "Invalid commit object, " \
  343. "must start with %s" % TREE_ID
  344. count += len(TREE_ID)
  345. assert text[count] == ' ', "Invalid commit object, " \
  346. "%s must be followed by space not %s" % (TREE_ID, text[count])
  347. count += 1
  348. self._tree = text[count:count+40]
  349. count = count + 40
  350. assert text[count] == "\n", "Invalid commit object, " \
  351. "tree sha must be followed by newline"
  352. count += 1
  353. self._parents = []
  354. while text[count:].startswith(PARENT_ID):
  355. count += len(PARENT_ID)
  356. assert text[count] == ' ', "Invalid commit object, " \
  357. "%s must be followed by space not %s" % (PARENT_ID, text[count])
  358. count += 1
  359. self._parents.append(text[count:count+40])
  360. count += 40
  361. assert text[count] == "\n", "Invalid commit object, " \
  362. "parent sha must be followed by newline"
  363. count += 1
  364. self._author = None
  365. if text[count:].startswith(AUTHOR_ID):
  366. count += len(AUTHOR_ID)
  367. assert text[count] == ' ', "Invalid commit object, " \
  368. "%s must be followed by space not %s" % (AUTHOR_ID, text[count])
  369. count += 1
  370. self._author = ''
  371. while text[count] != '>':
  372. assert text[count] != '\n', "Malformed author information"
  373. self._author += text[count]
  374. count += 1
  375. self._author += text[count]
  376. count += 1
  377. while text[count] != '\n':
  378. count += 1
  379. count += 1
  380. self._committer = None
  381. if text[count:].startswith(COMMITTER_ID):
  382. count += len(COMMITTER_ID)
  383. assert text[count] == ' ', "Invalid commit object, " \
  384. "%s must be followed by space not %s" % (COMMITTER_ID, text[count])
  385. count += 1
  386. self._committer = ''
  387. while text[count] != '>':
  388. assert text[count] != '\n', "Malformed committer information"
  389. self._committer += text[count]
  390. count += 1
  391. self._committer += text[count]
  392. count += 1
  393. assert text[count] == ' ', "Invalid commit object, " \
  394. "commiter information must be followed by space not %s" % text[count]
  395. count += 1
  396. self._commit_time = int(text[count:count+10])
  397. while text[count] != '\n':
  398. count += 1
  399. count += 1
  400. assert text[count] == '\n', "There must be a new line after the headers"
  401. count += 1
  402. # XXX: There can be an encoding field.
  403. self._message = text[count:]
  404. def serialize(self):
  405. self._text = ""
  406. self._text += "%s %s\n" % (TREE_ID, self._tree)
  407. for p in self._parents:
  408. self._text += "%s %s\n" % (PARENT_ID, p)
  409. self._text += "%s %s %s +0000\n" % (AUTHOR_ID, self._author, str(self._commit_time))
  410. self._text += "%s %s %s +0000\n" % (COMMITTER_ID, self._committer, str(self._commit_time))
  411. self._text += "\n" # There must be a new line after the headers
  412. self._text += self._message
  413. @property
  414. def tree(self):
  415. """Returns the tree that is the state of this commit"""
  416. return self._tree
  417. @property
  418. def parents(self):
  419. """Return a list of parents of this commit."""
  420. return self._parents
  421. @property
  422. def author(self):
  423. """Returns the name of the author of the commit"""
  424. return self._author
  425. @property
  426. def committer(self):
  427. """Returns the name of the committer of the commit"""
  428. return self._committer
  429. @property
  430. def message(self):
  431. """Returns the commit message"""
  432. return self._message
  433. @property
  434. def commit_time(self):
  435. """Returns the timestamp of the commit.
  436. Returns it as the number of seconds since the epoch.
  437. """
  438. return self._commit_time
  439. type_map = {
  440. BLOB_ID : Blob,
  441. TREE_ID : Tree,
  442. COMMIT_ID : Commit,
  443. TAG_ID: Tag,
  444. }
  445. num_type_map = {
  446. 0: None,
  447. 1: Commit,
  448. 2: Tree,
  449. 3: Blob,
  450. 4: Tag,
  451. # 5 Is reserved for further expansion
  452. }