objects.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. # objects.py -- Access to base git objects
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) a later version of the License.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Access to base git objects."""
  20. import mmap
  21. import os
  22. import sha
  23. import zlib
  24. from dulwich.errors import (
  25. NotBlobError,
  26. NotCommitError,
  27. NotTreeError,
  28. )
  29. BLOB_ID = "blob"
  30. TAG_ID = "tag"
  31. TREE_ID = "tree"
  32. COMMIT_ID = "commit"
  33. PARENT_ID = "parent"
  34. AUTHOR_ID = "author"
  35. COMMITTER_ID = "committer"
  36. OBJECT_ID = "object"
  37. TYPE_ID = "type"
  38. TAGGER_ID = "tagger"
  39. def _decompress(string):
  40. dcomp = zlib.decompressobj()
  41. dcomped = dcomp.decompress(string)
  42. dcomped += dcomp.flush()
  43. return dcomped
  44. def sha_to_hex(sha):
  45. """Takes a string and returns the hex of the sha within"""
  46. hexsha = "".join(["%02x" % ord(c) for c in sha])
  47. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
  48. return hexsha
  49. def hex_to_sha(hex):
  50. """Takes a hex sha and returns a binary sha"""
  51. assert len(hex) == 40, "Incorrent length of hexsha: %s" % hex
  52. return ''.join([chr(int(hex[i:i+2], 16)) for i in xrange(0, len(hex), 2)])
  53. class ShaFile(object):
  54. """A git SHA file."""
  55. @classmethod
  56. def _parse_legacy_object(cls, map):
  57. """Parse a legacy object, creating it and setting object._text"""
  58. text = _decompress(map)
  59. object = None
  60. for posstype in type_map.keys():
  61. if text.startswith(posstype):
  62. object = type_map[posstype]()
  63. text = text[len(posstype):]
  64. break
  65. assert object is not None, "%s is not a known object type" % text[:9]
  66. assert text[0] == ' ', "%s is not a space" % text[0]
  67. text = text[1:]
  68. size = 0
  69. i = 0
  70. while text[0] >= '0' and text[0] <= '9':
  71. if i > 0 and size == 0:
  72. assert False, "Size is not in canonical format"
  73. size = (size * 10) + int(text[0])
  74. text = text[1:]
  75. i += 1
  76. object._size = size
  77. assert text[0] == "\0", "Size not followed by null"
  78. text = text[1:]
  79. object._text = text
  80. return object
  81. def as_legacy_object(self):
  82. return zlib.compress("%s %d\0%s" % (self._type, len(self._text), self._text))
  83. def as_raw_string(self):
  84. return self._num_type, self._text
  85. @classmethod
  86. def _parse_object(cls, map):
  87. """Parse a new style object , creating it and setting object._text"""
  88. used = 0
  89. byte = ord(map[used])
  90. used += 1
  91. num_type = (byte >> 4) & 7
  92. try:
  93. object = num_type_map[num_type]()
  94. except KeyError:
  95. raise AssertionError("Not a known type: %d" % num_type)
  96. while (byte & 0x80) != 0:
  97. byte = ord(map[used])
  98. used += 1
  99. raw = map[used:]
  100. object._text = _decompress(raw)
  101. return object
  102. @classmethod
  103. def _parse_file(cls, map):
  104. word = (ord(map[0]) << 8) + ord(map[1])
  105. if ord(map[0]) == 0x78 and (word % 31) == 0:
  106. return cls._parse_legacy_object(map)
  107. else:
  108. return cls._parse_object(map)
  109. def __init__(self):
  110. """Don't call this directly"""
  111. def _parse_text(self):
  112. """For subclasses to do initialisation time parsing"""
  113. @classmethod
  114. def from_file(cls, filename):
  115. """Get the contents of a SHA file on disk"""
  116. size = os.path.getsize(filename)
  117. f = open(filename, 'rb')
  118. try:
  119. map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
  120. shafile = cls._parse_file(map)
  121. shafile._parse_text()
  122. return shafile
  123. finally:
  124. f.close()
  125. @classmethod
  126. def from_raw_string(cls, type, string):
  127. """Creates an object of the indicated type from the raw string given.
  128. Type is the numeric type of an object. String is the raw uncompressed
  129. contents.
  130. """
  131. real_class = num_type_map[type]
  132. obj = real_class()
  133. obj._num_type = type
  134. obj._text = string
  135. obj._parse_text()
  136. return obj
  137. def _header(self):
  138. return "%s %lu\0" % (self._type, len(self._text))
  139. def sha(self):
  140. """The SHA1 object that is the name of this object."""
  141. ressha = sha.new()
  142. ressha.update(self._header())
  143. ressha.update(self._text)
  144. return ressha
  145. @property
  146. def id(self):
  147. return self.sha().hexdigest()
  148. @property
  149. def type(self):
  150. return self._num_type
  151. def __repr__(self):
  152. return "<%s %s>" % (self.__class__.__name__, self.id)
  153. def __eq__(self, other):
  154. """Return true id the sha of the two objects match.
  155. The __le__ etc methods aren't overriden as they make no sense,
  156. certainly at this level.
  157. """
  158. return self.sha().digest() == other.sha().digest()
  159. class Blob(ShaFile):
  160. """A Git Blob object."""
  161. _type = BLOB_ID
  162. _num_type = 3
  163. @property
  164. def data(self):
  165. """The text contained within the blob object."""
  166. return self._text
  167. @classmethod
  168. def from_file(cls, filename):
  169. blob = ShaFile.from_file(filename)
  170. if blob._type != cls._type:
  171. raise NotBlobError(filename)
  172. return blob
  173. @classmethod
  174. def from_string(cls, string):
  175. """Create a blob from a string."""
  176. shafile = cls()
  177. shafile._text = string
  178. return shafile
  179. class Tag(ShaFile):
  180. """A Git Tag object."""
  181. _type = TAG_ID
  182. _num_type = 4
  183. @classmethod
  184. def from_file(cls, filename):
  185. blob = ShaFile.from_file(filename)
  186. if blob._type != cls._type:
  187. raise NotBlobError(filename)
  188. return blob
  189. @classmethod
  190. def from_string(cls, string):
  191. """Create a blob from a string."""
  192. shafile = cls()
  193. shafile._text = string
  194. return shafile
  195. def _parse_text(self):
  196. """Grab the metadata attached to the tag"""
  197. text = self._text
  198. count = 0
  199. assert text.startswith(OBJECT_ID), "Invalid tag object, " \
  200. "must start with %s" % OBJECT_ID
  201. count += len(OBJECT_ID)
  202. assert text[count] == ' ', "Invalid tag object, " \
  203. "%s must be followed by space not %s" % (OBJECT_ID, text[count])
  204. count += 1
  205. self._object_sha = text[count:count+40]
  206. count += 40
  207. assert text[count] == '\n', "Invalid tag object, " \
  208. "%s sha must be followed by newline" % OBJECT_ID
  209. count += 1
  210. assert text[count:].startswith(TYPE_ID), "Invalid tag object, " \
  211. "%s sha must be followed by %s" % (OBJECT_ID, TYPE_ID)
  212. count += len(TYPE_ID)
  213. assert text[count] == ' ', "Invalid tag object, " \
  214. "%s must be followed by space not %s" % (TAG_ID, text[count])
  215. count += 1
  216. self._object_type = ""
  217. while text[count] != '\n':
  218. self._object_type += text[count]
  219. count += 1
  220. count += 1
  221. assert self._object_type in (COMMIT_ID, BLOB_ID, TREE_ID, TAG_ID), "Invalid tag object, " \
  222. "unexpected object type %s" % self._object_type
  223. self._object_type = type_map[self._object_type]
  224. assert text[count:].startswith(TAG_ID), "Invalid tag object, " \
  225. "object type must be followed by %s" % (TAG_ID)
  226. count += len(TAG_ID)
  227. assert text[count] == ' ', "Invalid tag object, " \
  228. "%s must be followed by space not %s" % (TAG_ID, text[count])
  229. count += 1
  230. self._name = ""
  231. while text[count] != '\n':
  232. self._name += text[count]
  233. count += 1
  234. count += 1
  235. assert text[count:].startswith(TAGGER_ID), "Invalid tag object, " \
  236. "%s must be followed by %s" % (TAG_ID, TAGGER_ID)
  237. count += len(TAGGER_ID)
  238. assert text[count] == ' ', "Invalid tag object, " \
  239. "%s must be followed by space not %s" % (TAGGER_ID, text[count])
  240. count += 1
  241. self._tagger = ""
  242. while text[count] != '>':
  243. assert text[count] != '\n', "Malformed tagger information"
  244. self._tagger += text[count]
  245. count += 1
  246. self._tagger += text[count]
  247. count += 1
  248. assert text[count] == ' ', "Invalid tag object, " \
  249. "tagger information must be followed by space not %s" % text[count]
  250. count += 1
  251. self._tag_time = int(text[count:count+10])
  252. while text[count] != '\n':
  253. count += 1
  254. count += 1
  255. assert text[count] == '\n', "There must be a new line after the headers"
  256. count += 1
  257. self._message = text[count:]
  258. @property
  259. def object(self):
  260. """Returns the object pointed by this tag, represented as a tuple(type, sha)"""
  261. return (self._object_type, self._object_sha)
  262. @property
  263. def name(self):
  264. """Returns the name of this tag"""
  265. return self._name
  266. @property
  267. def tagger(self):
  268. """Returns the name of the person who created this tag"""
  269. return self._tagger
  270. @property
  271. def tag_time(self):
  272. """Returns the creation timestamp of the tag.
  273. Returns it as the number of seconds since the epoch"""
  274. return self._tag_time
  275. @property
  276. def message(self):
  277. """Returns the message attached to this tag"""
  278. return self._message
  279. def parse_tree(text):
  280. ret = []
  281. count = 0
  282. while count < len(text):
  283. mode = 0
  284. chr = text[count]
  285. while chr != ' ':
  286. assert chr >= '0' and chr <= '7', "%s is not a valid mode char" % chr
  287. mode = (mode << 3) + (ord(chr) - ord('0'))
  288. count += 1
  289. chr = text[count]
  290. count += 1
  291. chr = text[count]
  292. name = ''
  293. while chr != '\0':
  294. name += chr
  295. count += 1
  296. chr = text[count]
  297. count += 1
  298. chr = text[count]
  299. sha = text[count:count+20]
  300. hexsha = sha_to_hex(sha)
  301. ret.append((mode, name, hexsha))
  302. count = count + 20
  303. return ret
  304. class Tree(ShaFile):
  305. """A Git tree object"""
  306. _type = TREE_ID
  307. _num_type = 2
  308. def __init__(self):
  309. self._entries = {}
  310. @classmethod
  311. def from_file(cls, filename):
  312. tree = ShaFile.from_file(filename)
  313. if tree._type != cls._type:
  314. raise NotTreeError(filename)
  315. return tree
  316. def __getitem__(self, name):
  317. return self._entries[name]
  318. def __setitem__(self, name, value):
  319. assert isinstance(value, tuple)
  320. assert len(value) == 2
  321. self._entries[name] = value
  322. def __delitem__(self, name):
  323. del self._entries[name]
  324. def add(self, mode, name, hexsha):
  325. self._entries[name] = mode, hexsha
  326. def entries(self):
  327. """Return a list of tuples describing the tree entries"""
  328. return [(mode, name, hexsha) for (name, (mode, hexsha)) in self._entries.iteritems()]
  329. def iteritems(self):
  330. for name in sorted(self._entries.keys()):
  331. yield name, self._entries[name][0], self._entries[name][1]
  332. def _parse_text(self):
  333. """Grab the entries in the tree"""
  334. self._entries = parse_tree(self._text)
  335. def serialize(self):
  336. self._text = ""
  337. for name, mode, hexsha in self.iteritems():
  338. self._text += "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
  339. class Commit(ShaFile):
  340. """A git commit object"""
  341. _type = COMMIT_ID
  342. _num_type = 1
  343. def __init__(self):
  344. self._parents = []
  345. @classmethod
  346. def from_file(cls, filename):
  347. commit = ShaFile.from_file(filename)
  348. if commit._type != cls._type:
  349. raise NotCommitError(filename)
  350. return commit
  351. def _parse_text(self):
  352. text = self._text
  353. count = 0
  354. assert text.startswith(TREE_ID), "Invalid commit object, " \
  355. "must start with %s" % TREE_ID
  356. count += len(TREE_ID)
  357. assert text[count] == ' ', "Invalid commit object, " \
  358. "%s must be followed by space not %s" % (TREE_ID, text[count])
  359. count += 1
  360. self._tree = text[count:count+40]
  361. count = count + 40
  362. assert text[count] == "\n", "Invalid commit object, " \
  363. "tree sha must be followed by newline"
  364. count += 1
  365. self._parents = []
  366. while text[count:].startswith(PARENT_ID):
  367. count += len(PARENT_ID)
  368. assert text[count] == ' ', "Invalid commit object, " \
  369. "%s must be followed by space not %s" % (PARENT_ID, text[count])
  370. count += 1
  371. self._parents.append(text[count:count+40])
  372. count += 40
  373. assert text[count] == "\n", "Invalid commit object, " \
  374. "parent sha must be followed by newline"
  375. count += 1
  376. self._author = None
  377. if text[count:].startswith(AUTHOR_ID):
  378. count += len(AUTHOR_ID)
  379. assert text[count] == ' ', "Invalid commit object, " \
  380. "%s must be followed by space not %s" % (AUTHOR_ID, text[count])
  381. count += 1
  382. self._author = ''
  383. while text[count] != '>':
  384. assert text[count] != '\n', "Malformed author information"
  385. self._author += text[count]
  386. count += 1
  387. self._author += text[count]
  388. count += 1
  389. assert text[count] == ' ', "Invalid commit object, " \
  390. "author information must be followed by space not %s" % text[count]
  391. count += 1
  392. self._author_time = int(text[count:count+10])
  393. while text[count] != ' ':
  394. assert text[count] != '\n', "Malformed author information"
  395. count += 1
  396. self._author_timezone = int(text[count:count+6])
  397. count += 1
  398. while text[count] != '\n':
  399. count += 1
  400. count += 1
  401. self._committer = None
  402. if text[count:].startswith(COMMITTER_ID):
  403. count += len(COMMITTER_ID)
  404. assert text[count] == ' ', "Invalid commit object, " \
  405. "%s must be followed by space not %s" % (COMMITTER_ID, text[count])
  406. count += 1
  407. self._committer = ''
  408. while text[count] != '>':
  409. assert text[count] != '\n', "Malformed committer information"
  410. self._committer += text[count]
  411. count += 1
  412. self._committer += text[count]
  413. count += 1
  414. assert text[count] == ' ', "Invalid commit object, " \
  415. "commiter information must be followed by space not %s" % text[count]
  416. count += 1
  417. self._commit_time = int(text[count:count+10])
  418. while text[count] != ' ':
  419. assert text[count] != '\n', "Malformed committer information"
  420. count += 1
  421. self._commit_timezone = int(text[count:count+6])
  422. count += 1
  423. while text[count] != '\n':
  424. count += 1
  425. count += 1
  426. assert text[count] == '\n', "There must be a new line after the headers"
  427. count += 1
  428. # XXX: There can be an encoding field.
  429. self._message = text[count:]
  430. def serialize(self):
  431. self._text = ""
  432. self._text += "%s %s\n" % (TREE_ID, self._tree)
  433. for p in self._parents:
  434. self._text += "%s %s\n" % (PARENT_ID, p)
  435. self._text += "%s %s %s %+05d\n" % (AUTHOR_ID, self._author, str(self._author_time), self._author_timezone)
  436. self._text += "%s %s %s %+05d\n" % (COMMITTER_ID, self._committer, str(self._commit_time), self._commit_timezone)
  437. self._text += "\n" # There must be a new line after the headers
  438. self._text += self._message
  439. @property
  440. def tree(self):
  441. """Returns the tree that is the state of this commit"""
  442. return self._tree
  443. @property
  444. def parents(self):
  445. """Return a list of parents of this commit."""
  446. return self._parents
  447. @property
  448. def author(self):
  449. """Returns the name of the author of the commit"""
  450. return self._author
  451. @property
  452. def committer(self):
  453. """Returns the name of the committer of the commit"""
  454. return self._committer
  455. @property
  456. def message(self):
  457. """Returns the commit message"""
  458. return self._message
  459. @property
  460. def commit_time(self):
  461. """Returns the timestamp of the commit.
  462. Returns it as the number of seconds since the epoch.
  463. """
  464. return self._commit_time
  465. @property
  466. def commit_timezone(self):
  467. """Returns the zone the commit time is in
  468. """
  469. return self._commit_timezone
  470. @property
  471. def author_time(self):
  472. """Returns the timestamp the commit was written.
  473. Returns it as the number of seconds since the epoch.
  474. """
  475. return self._author_time
  476. @property
  477. def author_timezone(self):
  478. """Returns the zone the author time is in
  479. """
  480. return self._author_timezone
  481. type_map = {
  482. BLOB_ID : Blob,
  483. TREE_ID : Tree,
  484. COMMIT_ID : Commit,
  485. TAG_ID: Tag,
  486. }
  487. num_type_map = {
  488. 0: None,
  489. 1: Commit,
  490. 2: Tree,
  491. 3: Blob,
  492. 4: Tag,
  493. # 5 Is reserved for further expansion
  494. }
  495. try:
  496. # Try to import C versions
  497. from dulwich._objects import hex_to_sha, sha_to_hex, parse_tree
  498. except ImportError:
  499. pass