objects.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959
  1. # objects.py -- Access to base git objects
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) a later version of the License.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Access to base git objects."""
  20. import binascii
  21. from cStringIO import (
  22. StringIO,
  23. )
  24. import mmap
  25. import os
  26. import stat
  27. import zlib
  28. from dulwich.errors import (
  29. NotBlobError,
  30. NotCommitError,
  31. NotTagError,
  32. NotTreeError,
  33. ObjectFormatException,
  34. )
  35. from dulwich.file import GitFile
  36. from dulwich.misc import (
  37. make_sha,
  38. )
  39. # Header fields for commits
  40. _TREE_HEADER = "tree"
  41. _PARENT_HEADER = "parent"
  42. _AUTHOR_HEADER = "author"
  43. _COMMITTER_HEADER = "committer"
  44. _ENCODING_HEADER = "encoding"
  45. # Header fields for objects
  46. _OBJECT_HEADER = "object"
  47. _TYPE_HEADER = "type"
  48. _TAG_HEADER = "tag"
  49. _TAGGER_HEADER = "tagger"
  50. S_IFGITLINK = 0160000
  51. def S_ISGITLINK(m):
  52. return (stat.S_IFMT(m) == S_IFGITLINK)
  53. def _decompress(string):
  54. dcomp = zlib.decompressobj()
  55. dcomped = dcomp.decompress(string)
  56. dcomped += dcomp.flush()
  57. return dcomped
  58. def sha_to_hex(sha):
  59. """Takes a string and returns the hex of the sha within"""
  60. hexsha = binascii.hexlify(sha)
  61. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
  62. return hexsha
  63. def hex_to_sha(hex):
  64. """Takes a hex sha and returns a binary sha"""
  65. assert len(hex) == 40, "Incorrent length of hexsha: %s" % hex
  66. return binascii.unhexlify(hex)
  67. def serializable_property(name, docstring=None):
  68. def set(obj, value):
  69. obj._ensure_parsed()
  70. setattr(obj, "_"+name, value)
  71. obj._needs_serialization = True
  72. def get(obj):
  73. obj._ensure_parsed()
  74. return getattr(obj, "_"+name)
  75. return property(get, set, doc=docstring)
  76. def object_class(type):
  77. """Get the object class corresponding to the given type.
  78. :param type: Either a type name string or a numeric type.
  79. :return: The ShaFile subclass corresponding to the given type, or None if
  80. type is not a valid type name/number.
  81. """
  82. return _TYPE_MAP.get(type, None)
  83. def check_hexsha(hex, error_msg):
  84. try:
  85. hex_to_sha(hex)
  86. except (TypeError, AssertionError):
  87. raise ObjectFormatException("%s %s" % (error_msg, hex))
  88. def check_identity(identity, error_msg):
  89. email_start = identity.find("<")
  90. email_end = identity.find(">")
  91. if (email_start < 0 or email_end < 0 or email_end <= email_start
  92. or identity.find("<", email_start + 1) >= 0
  93. or identity.find(">", email_end + 1) >= 0
  94. or not identity.endswith(">")):
  95. raise ObjectFormatException(error_msg)
  96. class ShaFile(object):
  97. """A git SHA file."""
  98. @staticmethod
  99. def _parse_legacy_object_header(magic, f):
  100. """Parse a legacy object, creating it but not reading the file."""
  101. bufsize = 1024
  102. decomp = zlib.decompressobj()
  103. header = decomp.decompress(magic)
  104. start = 0
  105. end = -1
  106. while end < 0:
  107. header += decomp.decompress(f.read(bufsize))
  108. end = header.find("\0", start)
  109. start = len(header)
  110. header = header[:end]
  111. type_name, size = header.split(" ", 1)
  112. size = int(size) # sanity check
  113. obj_class = object_class(type_name)
  114. if not obj_class:
  115. raise ObjectFormatException("Not a known type: %s" % type_name)
  116. obj = obj_class()
  117. obj._filename = f.name
  118. return obj
  119. def _parse_legacy_object(self, f):
  120. """Parse a legacy object, setting the raw string."""
  121. size = os.path.getsize(f.name)
  122. map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
  123. try:
  124. text = _decompress(map)
  125. finally:
  126. map.close()
  127. header_end = text.find('\0')
  128. if header_end < 0:
  129. raise ObjectFormatException("Invalid object header")
  130. self.set_raw_string(text[header_end+1:])
  131. def as_legacy_object_chunks(self):
  132. compobj = zlib.compressobj()
  133. yield compobj.compress(self._header())
  134. for chunk in self.as_raw_chunks():
  135. yield compobj.compress(chunk)
  136. yield compobj.flush()
  137. def as_legacy_object(self):
  138. return "".join(self.as_legacy_object_chunks())
  139. def as_raw_chunks(self):
  140. if self._needs_parsing:
  141. self._ensure_parsed()
  142. else:
  143. self._chunked_text = self._serialize()
  144. return self._chunked_text
  145. def as_raw_string(self):
  146. return "".join(self.as_raw_chunks())
  147. def __str__(self):
  148. return self.as_raw_string()
  149. def __hash__(self):
  150. return hash(self.id)
  151. def as_pretty_string(self):
  152. return self.as_raw_string()
  153. def _ensure_parsed(self):
  154. if self._needs_parsing:
  155. if not self._chunked_text:
  156. assert self._filename, "ShaFile needs either text or filename"
  157. self._parse_file()
  158. self._deserialize(self._chunked_text)
  159. self._needs_parsing = False
  160. def set_raw_string(self, text):
  161. if type(text) != str:
  162. raise TypeError(text)
  163. self.set_raw_chunks([text])
  164. def set_raw_chunks(self, chunks):
  165. self._chunked_text = chunks
  166. self._sha = None
  167. self._needs_parsing = True
  168. self._needs_serialization = False
  169. @staticmethod
  170. def _parse_object_header(magic, f):
  171. """Parse a new style object, creating it but not reading the file."""
  172. num_type = (ord(magic[0]) >> 4) & 7
  173. obj_class = object_class(num_type)
  174. if not obj_class:
  175. raise ObjectFormatError("Not a known type: %d" % num_type)
  176. obj = obj_class()
  177. obj._filename = f.name
  178. return obj
  179. def _parse_object(self, f):
  180. """Parse a new style object, setting self._text."""
  181. size = os.path.getsize(f.name)
  182. map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
  183. try:
  184. # skip type and size; type must have already been determined, and we
  185. # trust zlib to fail if it's otherwise corrupted
  186. byte = ord(map[0])
  187. used = 1
  188. while (byte & 0x80) != 0:
  189. byte = ord(map[used])
  190. used += 1
  191. raw = map[used:]
  192. self.set_raw_string(_decompress(raw))
  193. finally:
  194. map.close()
  195. @classmethod
  196. def _is_legacy_object(cls, magic):
  197. b0, b1 = map(ord, magic)
  198. word = (b0 << 8) + b1
  199. return b0 == 0x78 and (word % 31) == 0
  200. @classmethod
  201. def _parse_file_header(cls, f):
  202. magic = f.read(2)
  203. if cls._is_legacy_object(magic):
  204. return cls._parse_legacy_object_header(magic, f)
  205. else:
  206. return cls._parse_object_header(magic, f)
  207. def __init__(self):
  208. """Don't call this directly"""
  209. self._sha = None
  210. self._filename = None
  211. self._chunked_text = []
  212. self._needs_parsing = False
  213. self._needs_serialization = True
  214. def _deserialize(self, chunks):
  215. raise NotImplementedError(self._deserialize)
  216. def _serialize(self):
  217. raise NotImplementedError(self._serialize)
  218. def _parse_file(self):
  219. f = GitFile(self._filename, 'rb')
  220. try:
  221. magic = f.read(2)
  222. if self._is_legacy_object(magic):
  223. self._parse_legacy_object(f)
  224. else:
  225. self._parse_object(f)
  226. finally:
  227. f.close()
  228. @classmethod
  229. def from_file(cls, filename):
  230. """Get the contents of a SHA file on disk."""
  231. f = GitFile(filename, 'rb')
  232. try:
  233. try:
  234. obj = cls._parse_file_header(f)
  235. obj._needs_parsing = True
  236. obj._needs_serialization = True
  237. return obj
  238. except (IndexError, ValueError), e:
  239. raise ObjectFormatException("invalid object header")
  240. finally:
  241. f.close()
  242. @staticmethod
  243. def from_raw_string(type_num, string):
  244. """Creates an object of the indicated type from the raw string given.
  245. :param type_num: The numeric type of the object.
  246. :param string: The raw uncompressed contents.
  247. """
  248. obj = object_class(type_num)()
  249. obj.set_raw_string(string)
  250. return obj
  251. @staticmethod
  252. def from_raw_chunks(type_num, chunks):
  253. """Creates an object of the indicated type from the raw chunks given.
  254. :param type_num: The numeric type of the object.
  255. :param chunks: An iterable of the raw uncompressed contents.
  256. """
  257. obj = object_class(type_num)()
  258. obj.set_raw_chunks(chunks)
  259. return obj
  260. @classmethod
  261. def from_string(cls, string):
  262. """Create a ShaFile from a string."""
  263. obj = cls()
  264. obj.set_raw_string(string)
  265. return obj
  266. def _check_has_member(self, member, error_msg):
  267. """Check that the object has a given member variable.
  268. :param member: the member variable to check for
  269. :param error_msg: the message for an error if the member is missing
  270. :raise ObjectFormatException: with the given error_msg if member is
  271. missing or is None
  272. """
  273. if getattr(self, member, None) is None:
  274. raise ObjectFormatException(error_msg)
  275. def check(self):
  276. """Check this object for internal consistency.
  277. :raise ObjectFormatException: if the object is malformed in some way
  278. """
  279. # TODO: if we find that error-checking during object parsing is a
  280. # performance bottleneck, those checks should be moved to the class's
  281. # check() method during optimization so we can still check the object
  282. # when necessary.
  283. try:
  284. self._deserialize(self.as_raw_chunks())
  285. except Exception, e:
  286. raise ObjectFormatException(e)
  287. def _header(self):
  288. return "%s %lu\0" % (self.type_name, self.raw_length())
  289. def raw_length(self):
  290. """Returns the length of the raw string of this object."""
  291. ret = 0
  292. for chunk in self.as_raw_chunks():
  293. ret += len(chunk)
  294. return ret
  295. def _make_sha(self):
  296. ret = make_sha()
  297. ret.update(self._header())
  298. for chunk in self.as_raw_chunks():
  299. ret.update(chunk)
  300. return ret
  301. def sha(self):
  302. """The SHA1 object that is the name of this object."""
  303. if self._needs_serialization or self._sha is None:
  304. self._sha = self._make_sha()
  305. return self._sha
  306. @property
  307. def id(self):
  308. return self.sha().hexdigest()
  309. def get_type(self):
  310. return self.type_num
  311. def set_type(self, type):
  312. self.type_num = type
  313. # DEPRECATED: use type_num or type_name as needed.
  314. type = property(get_type, set_type)
  315. def __repr__(self):
  316. return "<%s %s>" % (self.__class__.__name__, self.id)
  317. def __ne__(self, other):
  318. return self.id != other.id
  319. def __eq__(self, other):
  320. """Return true if the sha of the two objects match.
  321. The __le__ etc methods aren't overriden as they make no sense,
  322. certainly at this level.
  323. """
  324. return self.id == other.id
  325. class Blob(ShaFile):
  326. """A Git Blob object."""
  327. type_name = 'blob'
  328. type_num = 3
  329. def __init__(self):
  330. super(Blob, self).__init__()
  331. self._chunked_text = []
  332. self._needs_parsing = False
  333. self._needs_serialization = False
  334. def _get_data(self):
  335. return self.as_raw_string()
  336. def _set_data(self, data):
  337. self.set_raw_string(data)
  338. data = property(_get_data, _set_data,
  339. "The text contained within the blob object.")
  340. def _get_chunked(self):
  341. self._ensure_parsed()
  342. return self._chunked_text
  343. def _set_chunked(self, chunks):
  344. self._chunked_text = chunks
  345. def _serialize(self):
  346. if not self._chunked_text:
  347. self._ensure_parsed()
  348. self._needs_serialization = False
  349. return self._chunked_text
  350. def _deserialize(self, chunks):
  351. return "".join(chunks)
  352. chunked = property(_get_chunked, _set_chunked,
  353. "The text within the blob object, as chunks (not necessarily lines).")
  354. @classmethod
  355. def from_file(cls, filename):
  356. blob = ShaFile.from_file(filename)
  357. if not isinstance(blob, cls):
  358. raise NotBlobError(filename)
  359. return blob
  360. def check(self):
  361. """Check this object for internal consistency.
  362. :raise ObjectFormatException: if the object is malformed in some way
  363. """
  364. pass # it's impossible for raw data to be malformed
  365. def _parse_tag_or_commit(text):
  366. """Parse tag or commit text.
  367. :param text: the raw text of the tag or commit object.
  368. :yield: tuples of (field, value), one per header line, in the order read
  369. from the text, possibly including duplicates. Includes a field named
  370. None for the freeform tag/commit text.
  371. """
  372. f = StringIO(text)
  373. for l in f:
  374. l = l.rstrip("\n")
  375. if l == "":
  376. # Empty line indicates end of headers
  377. break
  378. yield l.split(" ", 1)
  379. yield (None, f.read())
  380. f.close()
  381. def parse_tag(text):
  382. return _parse_tag_or_commit(text)
  383. class Tag(ShaFile):
  384. """A Git Tag object."""
  385. type_name = 'tag'
  386. type_num = 4
  387. def __init__(self):
  388. super(Tag, self).__init__()
  389. self._tag_timezone_neg_utc = False
  390. @classmethod
  391. def from_file(cls, filename):
  392. tag = ShaFile.from_file(filename)
  393. if not isinstance(tag, cls):
  394. raise NotTagError(filename)
  395. return tag
  396. def check(self):
  397. """Check this object for internal consistency.
  398. :raise ObjectFormatException: if the object is malformed in some way
  399. """
  400. super(Tag, self).check()
  401. self._check_has_member("_object_sha", "missing object sha")
  402. self._check_has_member("_object_class", "missing object type")
  403. self._check_has_member("_name", "missing tag name")
  404. if not self._name:
  405. raise ObjectFormatException("empty tag name")
  406. check_hexsha(self._object_sha, "invalid object sha")
  407. if getattr(self, "_tagger", None):
  408. check_identity(self._tagger, "invalid tagger")
  409. last = None
  410. for field, _ in parse_tag("".join(self._chunked_text)):
  411. if field == _OBJECT_HEADER and last is not None:
  412. raise ObjectFormatException("unexpected object")
  413. elif field == _TYPE_HEADER and last != _OBJECT_HEADER:
  414. raise ObjectFormatException("unexpected type")
  415. elif field == _TAG_HEADER and last != _TYPE_HEADER:
  416. raise ObjectFormatException("unexpected tag name")
  417. elif field == _TAGGER_HEADER and last != _TAG_HEADER:
  418. raise ObjectFormatException("unexpected tagger")
  419. last = field
  420. def _serialize(self):
  421. chunks = []
  422. chunks.append("%s %s\n" % (_OBJECT_HEADER, self._object_sha))
  423. chunks.append("%s %s\n" % (_TYPE_HEADER, self._object_class.type_name))
  424. chunks.append("%s %s\n" % (_TAG_HEADER, self._name))
  425. if self._tagger:
  426. if self._tag_time is None:
  427. chunks.append("%s %s\n" % (_TAGGER_HEADER, self._tagger))
  428. else:
  429. chunks.append("%s %s %d %s\n" % (
  430. _TAGGER_HEADER, self._tagger, self._tag_time,
  431. format_timezone(self._tag_timezone,
  432. self._tag_timezone_neg_utc)))
  433. chunks.append("\n") # To close headers
  434. chunks.append(self._message)
  435. return chunks
  436. def _deserialize(self, chunks):
  437. """Grab the metadata attached to the tag"""
  438. self._tagger = None
  439. for field, value in parse_tag("".join(chunks)):
  440. if field == _OBJECT_HEADER:
  441. self._object_sha = value
  442. elif field == _TYPE_HEADER:
  443. self._object_class = object_class(value)
  444. elif field == _TAG_HEADER:
  445. self._name = value
  446. elif field == _TAGGER_HEADER:
  447. try:
  448. sep = value.index("> ")
  449. except ValueError:
  450. self._tagger = value
  451. self._tag_time = None
  452. self._tag_timezone = None
  453. self._tag_timezone_neg_utc = False
  454. else:
  455. self._tagger = value[0:sep+1]
  456. (timetext, timezonetext) = value[sep+2:].rsplit(" ", 1)
  457. self._tag_time = int(timetext)
  458. self._tag_timezone, self._tag_timezone_neg_utc = \
  459. parse_timezone(timezonetext)
  460. elif field is None:
  461. self._message = value
  462. else:
  463. raise AssertionError("Unknown field %s" % field)
  464. def _get_object(self):
  465. """Get the object pointed to by this tag.
  466. :return: tuple of (object class, sha).
  467. """
  468. self._ensure_parsed()
  469. return (self._object_class, self._object_sha)
  470. def _set_object(self, value):
  471. self._ensure_parsed()
  472. (self._object_class, self._object_sha) = value
  473. self._needs_serialization = True
  474. object = property(_get_object, _set_object)
  475. name = serializable_property("name", "The name of this tag")
  476. tagger = serializable_property("tagger",
  477. "Returns the name of the person who created this tag")
  478. tag_time = serializable_property("tag_time",
  479. "The creation timestamp of the tag. As the number of seconds since the epoch")
  480. tag_timezone = serializable_property("tag_timezone",
  481. "The timezone that tag_time is in.")
  482. message = serializable_property("message", "The message attached to this tag")
  483. def parse_tree(text):
  484. """Parse a tree text.
  485. :param text: Serialized text to parse
  486. :yields: tuples of (name, mode, sha)
  487. """
  488. count = 0
  489. l = len(text)
  490. while count < l:
  491. mode_end = text.index(' ', count)
  492. mode = int(text[count:mode_end], 8)
  493. name_end = text.index('\0', mode_end)
  494. name = text[mode_end+1:name_end]
  495. count = name_end+21
  496. sha = text[name_end+1:count]
  497. yield (name, mode, sha_to_hex(sha))
  498. def serialize_tree(items):
  499. """Serialize the items in a tree to a text.
  500. :param items: Sorted iterable over (name, mode, sha) tuples
  501. :return: Serialized tree text as chunks
  502. """
  503. for name, mode, hexsha in items:
  504. yield "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
  505. def sorted_tree_items(entries):
  506. """Iterate over a tree entries dictionary in the order in which
  507. the items would be serialized.
  508. :param entries: Dictionary mapping names to (mode, sha) tuples
  509. :return: Iterator over (name, mode, sha)
  510. """
  511. for name, entry in sorted(entries.iteritems(), cmp=cmp_entry):
  512. yield name, entry[0], entry[1]
  513. def cmp_entry((name1, value1), (name2, value2)):
  514. """Compare two tree entries."""
  515. if stat.S_ISDIR(value1[0]):
  516. name1 += "/"
  517. if stat.S_ISDIR(value2[0]):
  518. name2 += "/"
  519. return cmp(name1, name2)
  520. class Tree(ShaFile):
  521. """A Git tree object"""
  522. type_name = 'tree'
  523. type_num = 2
  524. def __init__(self):
  525. super(Tree, self).__init__()
  526. self._entries = {}
  527. @classmethod
  528. def from_file(cls, filename):
  529. tree = ShaFile.from_file(filename)
  530. if not isinstance(tree, cls):
  531. raise NotTreeError(filename)
  532. return tree
  533. def __contains__(self, name):
  534. self._ensure_parsed()
  535. return name in self._entries
  536. def __getitem__(self, name):
  537. self._ensure_parsed()
  538. return self._entries[name]
  539. def __setitem__(self, name, value):
  540. assert isinstance(value, tuple)
  541. assert len(value) == 2
  542. self._ensure_parsed()
  543. self._entries[name] = value
  544. self._needs_serialization = True
  545. def __delitem__(self, name):
  546. self._ensure_parsed()
  547. del self._entries[name]
  548. self._needs_serialization = True
  549. def __len__(self):
  550. self._ensure_parsed()
  551. return len(self._entries)
  552. def __iter__(self):
  553. self._ensure_parsed()
  554. return iter(self._entries)
  555. def add(self, mode, name, hexsha):
  556. assert type(mode) == int
  557. assert type(name) == str
  558. assert type(hexsha) == str
  559. self._ensure_parsed()
  560. self._entries[name] = mode, hexsha
  561. self._needs_serialization = True
  562. def entries(self):
  563. """Return a list of tuples describing the tree entries"""
  564. self._ensure_parsed()
  565. # The order of this is different from iteritems() for historical
  566. # reasons
  567. return [
  568. (mode, name, hexsha) for (name, mode, hexsha) in self.iteritems()]
  569. def iteritems(self):
  570. """Iterate over entries in the order in which they would be serialized.
  571. :return: Iterator over (name, mode, sha) tuples
  572. """
  573. self._ensure_parsed()
  574. return sorted_tree_items(self._entries)
  575. def _deserialize(self, chunks):
  576. """Grab the entries in the tree"""
  577. parsed_entries = parse_tree("".join(chunks))
  578. # TODO: list comprehension is for efficiency in the common (small) case;
  579. # if memory efficiency in the large case is a concern, use a genexp.
  580. self._entries = dict([(n, (m, s)) for n, m, s in parsed_entries])
  581. def check(self):
  582. """Check this object for internal consistency.
  583. :raise ObjectFormatException: if the object is malformed in some way
  584. """
  585. super(Tree, self).check()
  586. last = None
  587. allowed_modes = (stat.S_IFREG | 0755, stat.S_IFREG | 0644,
  588. stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
  589. # TODO: optionally exclude as in git fsck --strict
  590. stat.S_IFREG | 0664)
  591. for name, mode, sha in parse_tree("".join(self._chunked_text)):
  592. check_hexsha(sha, 'invalid sha %s' % sha)
  593. if '/' in name or name in ('', '.', '..'):
  594. raise ObjectFormatException('invalid name %s' % name)
  595. if mode not in allowed_modes:
  596. raise ObjectFormatException('invalid mode %06o' % mode)
  597. entry = (name, (mode, sha))
  598. if last:
  599. if cmp_entry(last, entry) > 0:
  600. raise ObjectFormatException('entries not sorted')
  601. if name == last[0]:
  602. raise ObjectFormatException('duplicate entry %s' % name)
  603. last = entry
  604. def _serialize(self):
  605. return list(serialize_tree(self.iteritems()))
  606. def as_pretty_string(self):
  607. text = []
  608. for name, mode, hexsha in self.iteritems():
  609. if mode & stat.S_IFDIR:
  610. kind = "tree"
  611. else:
  612. kind = "blob"
  613. text.append("%04o %s %s\t%s\n" % (mode, kind, hexsha, name))
  614. return "".join(text)
  615. def parse_timezone(text):
  616. offset = int(text)
  617. negative_utc = (offset == 0 and text[0] == '-')
  618. signum = (offset < 0) and -1 or 1
  619. offset = abs(offset)
  620. hours = int(offset / 100)
  621. minutes = (offset % 100)
  622. return signum * (hours * 3600 + minutes * 60), negative_utc
  623. def format_timezone(offset, negative_utc=False):
  624. if offset % 60 != 0:
  625. raise ValueError("Unable to handle non-minute offset.")
  626. if offset < 0 or (offset == 0 and negative_utc):
  627. sign = '-'
  628. else:
  629. sign = '+'
  630. offset = abs(offset)
  631. return '%c%02d%02d' % (sign, offset / 3600, (offset / 60) % 60)
  632. def parse_commit(text):
  633. return _parse_tag_or_commit(text)
  634. class Commit(ShaFile):
  635. """A git commit object"""
  636. type_name = 'commit'
  637. type_num = 1
  638. def __init__(self):
  639. super(Commit, self).__init__()
  640. self._parents = []
  641. self._encoding = None
  642. self._extra = {}
  643. self._author_timezone_neg_utc = False
  644. self._commit_timezone_neg_utc = False
  645. @classmethod
  646. def from_file(cls, filename):
  647. commit = ShaFile.from_file(filename)
  648. if not isinstance(commit, cls):
  649. raise NotCommitError(filename)
  650. return commit
  651. def _deserialize(self, chunks):
  652. self._parents = []
  653. self._extra = []
  654. self._author = None
  655. for field, value in parse_commit("".join(self._chunked_text)):
  656. if field == _TREE_HEADER:
  657. self._tree = value
  658. elif field == _PARENT_HEADER:
  659. self._parents.append(value)
  660. elif field == _AUTHOR_HEADER:
  661. self._author, timetext, timezonetext = value.rsplit(" ", 2)
  662. self._author_time = int(timetext)
  663. self._author_timezone, self._author_timezone_neg_utc =\
  664. parse_timezone(timezonetext)
  665. elif field == _COMMITTER_HEADER:
  666. self._committer, timetext, timezonetext = value.rsplit(" ", 2)
  667. self._commit_time = int(timetext)
  668. self._commit_timezone, self._commit_timezone_neg_utc =\
  669. parse_timezone(timezonetext)
  670. elif field == _ENCODING_HEADER:
  671. self._encoding = value
  672. elif field is None:
  673. self._message = value
  674. else:
  675. self._extra.append((field, value))
  676. def check(self):
  677. """Check this object for internal consistency.
  678. :raise ObjectFormatException: if the object is malformed in some way
  679. """
  680. super(Commit, self).check()
  681. self._check_has_member("_tree", "missing tree")
  682. self._check_has_member("_author", "missing author")
  683. self._check_has_member("_committer", "missing committer")
  684. # times are currently checked when set
  685. for parent in self._parents:
  686. check_hexsha(parent, "invalid parent sha")
  687. check_hexsha(self._tree, "invalid tree sha")
  688. check_identity(self._author, "invalid author")
  689. check_identity(self._committer, "invalid committer")
  690. last = None
  691. for field, _ in parse_commit("".join(self._chunked_text)):
  692. if field == _TREE_HEADER and last is not None:
  693. raise ObjectFormatException("unexpected tree")
  694. elif field == _PARENT_HEADER and last not in (_PARENT_HEADER,
  695. _TREE_HEADER):
  696. raise ObjectFormatException("unexpected parent")
  697. elif field == _AUTHOR_HEADER and last not in (_TREE_HEADER,
  698. _PARENT_HEADER):
  699. raise ObjectFormatException("unexpected author")
  700. elif field == _COMMITTER_HEADER and last != _AUTHOR_HEADER:
  701. raise ObjectFormatException("unexpected committer")
  702. elif field == _ENCODING_HEADER and last != _COMMITTER_HEADER:
  703. raise ObjectFormatException("unexpected encoding")
  704. last = field
  705. # TODO: optionally check for duplicate parents
  706. def _serialize(self):
  707. chunks = []
  708. chunks.append("%s %s\n" % (_TREE_HEADER, self._tree))
  709. for p in self._parents:
  710. chunks.append("%s %s\n" % (_PARENT_HEADER, p))
  711. chunks.append("%s %s %s %s\n" % (
  712. _AUTHOR_HEADER, self._author, str(self._author_time),
  713. format_timezone(self._author_timezone,
  714. self._author_timezone_neg_utc)))
  715. chunks.append("%s %s %s %s\n" % (
  716. _COMMITTER_HEADER, self._committer, str(self._commit_time),
  717. format_timezone(self._commit_timezone,
  718. self._commit_timezone_neg_utc)))
  719. if self.encoding:
  720. chunks.append("%s %s\n" % (_ENCODING_HEADER, self.encoding))
  721. for k, v in self.extra:
  722. if "\n" in k or "\n" in v:
  723. raise AssertionError("newline in extra data: %r -> %r" % (k, v))
  724. chunks.append("%s %s\n" % (k, v))
  725. chunks.append("\n") # There must be a new line after the headers
  726. chunks.append(self._message)
  727. return chunks
  728. tree = serializable_property("tree", "Tree that is the state of this commit")
  729. def _get_parents(self):
  730. """Return a list of parents of this commit."""
  731. self._ensure_parsed()
  732. return self._parents
  733. def _set_parents(self, value):
  734. """Set a list of parents of this commit."""
  735. self._ensure_parsed()
  736. self._needs_serialization = True
  737. self._parents = value
  738. parents = property(_get_parents, _set_parents)
  739. def _get_extra(self):
  740. """Return extra settings of this commit."""
  741. self._ensure_parsed()
  742. return self._extra
  743. extra = property(_get_extra)
  744. author = serializable_property("author",
  745. "The name of the author of the commit")
  746. committer = serializable_property("committer",
  747. "The name of the committer of the commit")
  748. message = serializable_property("message",
  749. "The commit message")
  750. commit_time = serializable_property("commit_time",
  751. "The timestamp of the commit. As the number of seconds since the epoch.")
  752. commit_timezone = serializable_property("commit_timezone",
  753. "The zone the commit time is in")
  754. author_time = serializable_property("author_time",
  755. "The timestamp the commit was written. as the number of seconds since the epoch.")
  756. author_timezone = serializable_property("author_timezone",
  757. "Returns the zone the author time is in.")
  758. encoding = serializable_property("encoding",
  759. "Encoding of the commit message.")
  760. OBJECT_CLASSES = (
  761. Commit,
  762. Tree,
  763. Blob,
  764. Tag,
  765. )
  766. _TYPE_MAP = {}
  767. for cls in OBJECT_CLASSES:
  768. _TYPE_MAP[cls.type_name] = cls
  769. _TYPE_MAP[cls.type_num] = cls
  770. # Hold on to the pure-python implementations for testing
  771. _parse_tree_py = parse_tree
  772. _sorted_tree_items_py = sorted_tree_items
  773. try:
  774. # Try to import C versions
  775. from dulwich._objects import parse_tree, sorted_tree_items
  776. except ImportError:
  777. pass