objects.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. # objects.py -- Access to base git objects
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) a later version of the License.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Access to base git objects."""
  20. import binascii
  21. from cStringIO import (
  22. StringIO,
  23. )
  24. import mmap
  25. import os
  26. import stat
  27. import zlib
  28. from dulwich.errors import (
  29. NotBlobError,
  30. NotCommitError,
  31. NotTagError,
  32. NotTreeError,
  33. ObjectFormatException,
  34. )
  35. from dulwich.file import GitFile
  36. from dulwich.misc import (
  37. make_sha,
  38. )
  39. # Header fields for commits
  40. _TREE_HEADER = "tree"
  41. _PARENT_HEADER = "parent"
  42. _AUTHOR_HEADER = "author"
  43. _COMMITTER_HEADER = "committer"
  44. _ENCODING_HEADER = "encoding"
  45. # Header fields for objects
  46. _OBJECT_HEADER = "object"
  47. _TYPE_HEADER = "type"
  48. _TAG_HEADER = "tag"
  49. _TAGGER_HEADER = "tagger"
  50. S_IFGITLINK = 0160000
  51. def S_ISGITLINK(m):
  52. return (stat.S_IFMT(m) == S_IFGITLINK)
  53. def _decompress(string):
  54. dcomp = zlib.decompressobj()
  55. dcomped = dcomp.decompress(string)
  56. dcomped += dcomp.flush()
  57. return dcomped
  58. def sha_to_hex(sha):
  59. """Takes a string and returns the hex of the sha within"""
  60. hexsha = binascii.hexlify(sha)
  61. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
  62. return hexsha
  63. def hex_to_sha(hex):
  64. """Takes a hex sha and returns a binary sha"""
  65. assert len(hex) == 40, "Incorrent length of hexsha: %s" % hex
  66. return binascii.unhexlify(hex)
  67. def hex_to_filename(path, hex):
  68. """Takes a hex sha and returns its filename relative to the given path."""
  69. dir = hex[:2]
  70. file = hex[2:]
  71. # Check from object dir
  72. return os.path.join(path, dir, file)
  73. def filename_to_hex(filename):
  74. """Takes an object filename and returns its corresponding hex sha."""
  75. # grab the last (up to) two path components
  76. names = filename.rsplit(os.path.sep, 2)[-2:]
  77. errmsg = "Invalid object filename: %s" % filename
  78. assert len(names) == 2, errmsg
  79. base, rest = names
  80. assert len(base) == 2 and len(rest) == 38, errmsg
  81. hex = base + rest
  82. hex_to_sha(hex)
  83. return hex
  84. def serializable_property(name, docstring=None):
  85. def set(obj, value):
  86. obj._ensure_parsed()
  87. setattr(obj, "_"+name, value)
  88. obj._needs_serialization = True
  89. def get(obj):
  90. obj._ensure_parsed()
  91. return getattr(obj, "_"+name)
  92. return property(get, set, doc=docstring)
  93. def object_class(type):
  94. """Get the object class corresponding to the given type.
  95. :param type: Either a type name string or a numeric type.
  96. :return: The ShaFile subclass corresponding to the given type, or None if
  97. type is not a valid type name/number.
  98. """
  99. return _TYPE_MAP.get(type, None)
  100. def check_hexsha(hex, error_msg):
  101. try:
  102. hex_to_sha(hex)
  103. except (TypeError, AssertionError):
  104. raise ObjectFormatException("%s %s" % (error_msg, hex))
  105. def check_identity(identity, error_msg):
  106. email_start = identity.find("<")
  107. email_end = identity.find(">")
  108. if (email_start < 0 or email_end < 0 or email_end <= email_start
  109. or identity.find("<", email_start + 1) >= 0
  110. or identity.find(">", email_end + 1) >= 0
  111. or not identity.endswith(">")):
  112. raise ObjectFormatException(error_msg)
  113. class FixedSha(object):
  114. """SHA object that behaves like hashlib's but is given a fixed value."""
  115. def __init__(self, hexsha):
  116. self._hexsha = hexsha
  117. self._sha = hex_to_sha(hexsha)
  118. def digest(self):
  119. return self._sha
  120. def hexdigest(self):
  121. return self._hexsha
  122. class ShaFile(object):
  123. """A git SHA file."""
  124. @staticmethod
  125. def _parse_legacy_object_header(magic, f):
  126. """Parse a legacy object, creating it but not reading the file."""
  127. bufsize = 1024
  128. decomp = zlib.decompressobj()
  129. header = decomp.decompress(magic)
  130. start = 0
  131. end = -1
  132. while end < 0:
  133. header += decomp.decompress(f.read(bufsize))
  134. end = header.find("\0", start)
  135. start = len(header)
  136. header = header[:end]
  137. type_name, size = header.split(" ", 1)
  138. size = int(size) # sanity check
  139. obj_class = object_class(type_name)
  140. if not obj_class:
  141. raise ObjectFormatException("Not a known type: %s" % type_name)
  142. obj = obj_class()
  143. obj._filename = f.name
  144. return obj
  145. def _parse_legacy_object(self, f):
  146. """Parse a legacy object, setting the raw string."""
  147. size = os.path.getsize(f.name)
  148. map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
  149. try:
  150. text = _decompress(map)
  151. finally:
  152. map.close()
  153. header_end = text.find('\0')
  154. if header_end < 0:
  155. raise ObjectFormatException("Invalid object header")
  156. self.set_raw_string(text[header_end+1:])
  157. def as_legacy_object_chunks(self):
  158. compobj = zlib.compressobj()
  159. yield compobj.compress(self._header())
  160. for chunk in self.as_raw_chunks():
  161. yield compobj.compress(chunk)
  162. yield compobj.flush()
  163. def as_legacy_object(self):
  164. return "".join(self.as_legacy_object_chunks())
  165. def as_raw_chunks(self):
  166. if self._needs_parsing:
  167. self._ensure_parsed()
  168. else:
  169. self._chunked_text = self._serialize()
  170. return self._chunked_text
  171. def as_raw_string(self):
  172. return "".join(self.as_raw_chunks())
  173. def __str__(self):
  174. return self.as_raw_string()
  175. def __hash__(self):
  176. return hash(self.id)
  177. def as_pretty_string(self):
  178. return self.as_raw_string()
  179. def _ensure_parsed(self):
  180. if self._needs_parsing:
  181. if not self._chunked_text:
  182. assert self._filename, "ShaFile needs either text or filename"
  183. self._parse_file()
  184. self._deserialize(self._chunked_text)
  185. self._needs_parsing = False
  186. def set_raw_string(self, text):
  187. if type(text) != str:
  188. raise TypeError(text)
  189. self.set_raw_chunks([text])
  190. def set_raw_chunks(self, chunks):
  191. self._chunked_text = chunks
  192. self._sha = None
  193. self._needs_parsing = True
  194. self._needs_serialization = False
  195. @staticmethod
  196. def _parse_object_header(magic, f):
  197. """Parse a new style object, creating it but not reading the file."""
  198. num_type = (ord(magic[0]) >> 4) & 7
  199. obj_class = object_class(num_type)
  200. if not obj_class:
  201. raise ObjectFormatError("Not a known type: %d" % num_type)
  202. obj = obj_class()
  203. obj._filename = f.name
  204. return obj
  205. def _parse_object(self, f):
  206. """Parse a new style object, setting self._text."""
  207. size = os.path.getsize(f.name)
  208. map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
  209. try:
  210. # skip type and size; type must have already been determined, and we
  211. # trust zlib to fail if it's otherwise corrupted
  212. byte = ord(map[0])
  213. used = 1
  214. while (byte & 0x80) != 0:
  215. byte = ord(map[used])
  216. used += 1
  217. raw = map[used:]
  218. self.set_raw_string(_decompress(raw))
  219. finally:
  220. map.close()
  221. @classmethod
  222. def _is_legacy_object(cls, magic):
  223. b0, b1 = map(ord, magic)
  224. word = (b0 << 8) + b1
  225. return b0 == 0x78 and (word % 31) == 0
  226. @classmethod
  227. def _parse_file_header(cls, f):
  228. magic = f.read(2)
  229. if cls._is_legacy_object(magic):
  230. return cls._parse_legacy_object_header(magic, f)
  231. else:
  232. return cls._parse_object_header(magic, f)
  233. def __init__(self):
  234. """Don't call this directly"""
  235. self._sha = None
  236. self._filename = None
  237. self._chunked_text = []
  238. self._needs_parsing = False
  239. self._needs_serialization = True
  240. def _deserialize(self, chunks):
  241. raise NotImplementedError(self._deserialize)
  242. def _serialize(self):
  243. raise NotImplementedError(self._serialize)
  244. def _parse_file(self):
  245. f = GitFile(self._filename, 'rb')
  246. try:
  247. magic = f.read(2)
  248. if self._is_legacy_object(magic):
  249. self._parse_legacy_object(f)
  250. else:
  251. self._parse_object(f)
  252. finally:
  253. f.close()
  254. @classmethod
  255. def from_file(cls, filename):
  256. """Get the contents of a SHA file on disk."""
  257. f = GitFile(filename, 'rb')
  258. try:
  259. try:
  260. obj = cls._parse_file_header(f)
  261. obj._sha = FixedSha(filename_to_hex(filename))
  262. obj._needs_parsing = True
  263. obj._needs_serialization = True
  264. return obj
  265. except (IndexError, ValueError), e:
  266. raise ObjectFormatException("invalid object header")
  267. finally:
  268. f.close()
  269. @staticmethod
  270. def from_raw_string(type_num, string):
  271. """Creates an object of the indicated type from the raw string given.
  272. :param type_num: The numeric type of the object.
  273. :param string: The raw uncompressed contents.
  274. """
  275. obj = object_class(type_num)()
  276. obj.set_raw_string(string)
  277. return obj
  278. @staticmethod
  279. def from_raw_chunks(type_num, chunks):
  280. """Creates an object of the indicated type from the raw chunks given.
  281. :param type_num: The numeric type of the object.
  282. :param chunks: An iterable of the raw uncompressed contents.
  283. """
  284. obj = object_class(type_num)()
  285. obj.set_raw_chunks(chunks)
  286. return obj
  287. @classmethod
  288. def from_string(cls, string):
  289. """Create a ShaFile from a string."""
  290. obj = cls()
  291. obj.set_raw_string(string)
  292. return obj
  293. def _check_has_member(self, member, error_msg):
  294. """Check that the object has a given member variable.
  295. :param member: the member variable to check for
  296. :param error_msg: the message for an error if the member is missing
  297. :raise ObjectFormatException: with the given error_msg if member is
  298. missing or is None
  299. """
  300. if getattr(self, member, None) is None:
  301. raise ObjectFormatException(error_msg)
  302. def check(self):
  303. """Check this object for internal consistency.
  304. :raise ObjectFormatException: if the object is malformed in some way
  305. """
  306. # TODO: if we find that error-checking during object parsing is a
  307. # performance bottleneck, those checks should be moved to the class's
  308. # check() method during optimization so we can still check the object
  309. # when necessary.
  310. try:
  311. self._deserialize(self.as_raw_chunks())
  312. except Exception, e:
  313. raise ObjectFormatException(e)
  314. def _header(self):
  315. return "%s %lu\0" % (self.type_name, self.raw_length())
  316. def raw_length(self):
  317. """Returns the length of the raw string of this object."""
  318. ret = 0
  319. for chunk in self.as_raw_chunks():
  320. ret += len(chunk)
  321. return ret
  322. def _make_sha(self):
  323. ret = make_sha()
  324. ret.update(self._header())
  325. for chunk in self.as_raw_chunks():
  326. ret.update(chunk)
  327. return ret
  328. def sha(self):
  329. """The SHA1 object that is the name of this object."""
  330. if self._needs_serialization or self._sha is None:
  331. self._sha = self._make_sha()
  332. return self._sha
  333. @property
  334. def id(self):
  335. return self.sha().hexdigest()
  336. def get_type(self):
  337. return self.type_num
  338. def set_type(self, type):
  339. self.type_num = type
  340. # DEPRECATED: use type_num or type_name as needed.
  341. type = property(get_type, set_type)
  342. def __repr__(self):
  343. return "<%s %s>" % (self.__class__.__name__, self.id)
  344. def __ne__(self, other):
  345. return self.id != other.id
  346. def __eq__(self, other):
  347. """Return true if the sha of the two objects match.
  348. The __le__ etc methods aren't overriden as they make no sense,
  349. certainly at this level.
  350. """
  351. return self.id == other.id
  352. class Blob(ShaFile):
  353. """A Git Blob object."""
  354. type_name = 'blob'
  355. type_num = 3
  356. def __init__(self):
  357. super(Blob, self).__init__()
  358. self._chunked_text = []
  359. self._needs_parsing = False
  360. self._needs_serialization = False
  361. def _get_data(self):
  362. return self.as_raw_string()
  363. def _set_data(self, data):
  364. self.set_raw_string(data)
  365. data = property(_get_data, _set_data,
  366. "The text contained within the blob object.")
  367. def _get_chunked(self):
  368. self._ensure_parsed()
  369. return self._chunked_text
  370. def _set_chunked(self, chunks):
  371. self._chunked_text = chunks
  372. def _serialize(self):
  373. if not self._chunked_text:
  374. self._ensure_parsed()
  375. self._needs_serialization = False
  376. return self._chunked_text
  377. def _deserialize(self, chunks):
  378. self._chunked_text = chunks
  379. chunked = property(_get_chunked, _set_chunked,
  380. "The text within the blob object, as chunks (not necessarily lines).")
  381. @classmethod
  382. def from_file(cls, filename):
  383. blob = ShaFile.from_file(filename)
  384. if not isinstance(blob, cls):
  385. raise NotBlobError(filename)
  386. return blob
  387. def check(self):
  388. """Check this object for internal consistency.
  389. :raise ObjectFormatException: if the object is malformed in some way
  390. """
  391. pass # it's impossible for raw data to be malformed
  392. def _parse_tag_or_commit(text):
  393. """Parse tag or commit text.
  394. :param text: the raw text of the tag or commit object.
  395. :yield: tuples of (field, value), one per header line, in the order read
  396. from the text, possibly including duplicates. Includes a field named
  397. None for the freeform tag/commit text.
  398. """
  399. f = StringIO(text)
  400. for l in f:
  401. l = l.rstrip("\n")
  402. if l == "":
  403. # Empty line indicates end of headers
  404. break
  405. yield l.split(" ", 1)
  406. yield (None, f.read())
  407. f.close()
  408. def parse_tag(text):
  409. return _parse_tag_or_commit(text)
  410. class Tag(ShaFile):
  411. """A Git Tag object."""
  412. type_name = 'tag'
  413. type_num = 4
  414. def __init__(self):
  415. super(Tag, self).__init__()
  416. self._tag_timezone_neg_utc = False
  417. @classmethod
  418. def from_file(cls, filename):
  419. tag = ShaFile.from_file(filename)
  420. if not isinstance(tag, cls):
  421. raise NotTagError(filename)
  422. return tag
  423. def check(self):
  424. """Check this object for internal consistency.
  425. :raise ObjectFormatException: if the object is malformed in some way
  426. """
  427. super(Tag, self).check()
  428. self._check_has_member("_object_sha", "missing object sha")
  429. self._check_has_member("_object_class", "missing object type")
  430. self._check_has_member("_name", "missing tag name")
  431. if not self._name:
  432. raise ObjectFormatException("empty tag name")
  433. check_hexsha(self._object_sha, "invalid object sha")
  434. if getattr(self, "_tagger", None):
  435. check_identity(self._tagger, "invalid tagger")
  436. last = None
  437. for field, _ in parse_tag("".join(self._chunked_text)):
  438. if field == _OBJECT_HEADER and last is not None:
  439. raise ObjectFormatException("unexpected object")
  440. elif field == _TYPE_HEADER and last != _OBJECT_HEADER:
  441. raise ObjectFormatException("unexpected type")
  442. elif field == _TAG_HEADER and last != _TYPE_HEADER:
  443. raise ObjectFormatException("unexpected tag name")
  444. elif field == _TAGGER_HEADER and last != _TAG_HEADER:
  445. raise ObjectFormatException("unexpected tagger")
  446. last = field
  447. def _serialize(self):
  448. chunks = []
  449. chunks.append("%s %s\n" % (_OBJECT_HEADER, self._object_sha))
  450. chunks.append("%s %s\n" % (_TYPE_HEADER, self._object_class.type_name))
  451. chunks.append("%s %s\n" % (_TAG_HEADER, self._name))
  452. if self._tagger:
  453. if self._tag_time is None:
  454. chunks.append("%s %s\n" % (_TAGGER_HEADER, self._tagger))
  455. else:
  456. chunks.append("%s %s %d %s\n" % (
  457. _TAGGER_HEADER, self._tagger, self._tag_time,
  458. format_timezone(self._tag_timezone,
  459. self._tag_timezone_neg_utc)))
  460. chunks.append("\n") # To close headers
  461. chunks.append(self._message)
  462. return chunks
  463. def _deserialize(self, chunks):
  464. """Grab the metadata attached to the tag"""
  465. self._tagger = None
  466. for field, value in parse_tag("".join(chunks)):
  467. if field == _OBJECT_HEADER:
  468. self._object_sha = value
  469. elif field == _TYPE_HEADER:
  470. self._object_class = object_class(value)
  471. elif field == _TAG_HEADER:
  472. self._name = value
  473. elif field == _TAGGER_HEADER:
  474. try:
  475. sep = value.index("> ")
  476. except ValueError:
  477. self._tagger = value
  478. self._tag_time = None
  479. self._tag_timezone = None
  480. self._tag_timezone_neg_utc = False
  481. else:
  482. self._tagger = value[0:sep+1]
  483. (timetext, timezonetext) = value[sep+2:].rsplit(" ", 1)
  484. self._tag_time = int(timetext)
  485. self._tag_timezone, self._tag_timezone_neg_utc = \
  486. parse_timezone(timezonetext)
  487. elif field is None:
  488. self._message = value
  489. else:
  490. raise AssertionError("Unknown field %s" % field)
  491. def _get_object(self):
  492. """Get the object pointed to by this tag.
  493. :return: tuple of (object class, sha).
  494. """
  495. self._ensure_parsed()
  496. return (self._object_class, self._object_sha)
  497. def _set_object(self, value):
  498. self._ensure_parsed()
  499. (self._object_class, self._object_sha) = value
  500. self._needs_serialization = True
  501. object = property(_get_object, _set_object)
  502. name = serializable_property("name", "The name of this tag")
  503. tagger = serializable_property("tagger",
  504. "Returns the name of the person who created this tag")
  505. tag_time = serializable_property("tag_time",
  506. "The creation timestamp of the tag. As the number of seconds since the epoch")
  507. tag_timezone = serializable_property("tag_timezone",
  508. "The timezone that tag_time is in.")
  509. message = serializable_property("message", "The message attached to this tag")
  510. def parse_tree(text):
  511. """Parse a tree text.
  512. :param text: Serialized text to parse
  513. :yields: tuples of (name, mode, sha)
  514. """
  515. count = 0
  516. l = len(text)
  517. while count < l:
  518. mode_end = text.index(' ', count)
  519. mode = int(text[count:mode_end], 8)
  520. name_end = text.index('\0', mode_end)
  521. name = text[mode_end+1:name_end]
  522. count = name_end+21
  523. sha = text[name_end+1:count]
  524. yield (name, mode, sha_to_hex(sha))
  525. def serialize_tree(items):
  526. """Serialize the items in a tree to a text.
  527. :param items: Sorted iterable over (name, mode, sha) tuples
  528. :return: Serialized tree text as chunks
  529. """
  530. for name, mode, hexsha in items:
  531. yield "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
  532. def sorted_tree_items(entries):
  533. """Iterate over a tree entries dictionary in the order in which
  534. the items would be serialized.
  535. :param entries: Dictionary mapping names to (mode, sha) tuples
  536. :return: Iterator over (name, mode, sha)
  537. """
  538. for name, entry in sorted(entries.iteritems(), cmp=cmp_entry):
  539. yield name, entry[0], entry[1]
  540. def cmp_entry((name1, value1), (name2, value2)):
  541. """Compare two tree entries."""
  542. if stat.S_ISDIR(value1[0]):
  543. name1 += "/"
  544. if stat.S_ISDIR(value2[0]):
  545. name2 += "/"
  546. return cmp(name1, name2)
  547. class Tree(ShaFile):
  548. """A Git tree object"""
  549. type_name = 'tree'
  550. type_num = 2
  551. def __init__(self):
  552. super(Tree, self).__init__()
  553. self._entries = {}
  554. @classmethod
  555. def from_file(cls, filename):
  556. tree = ShaFile.from_file(filename)
  557. if not isinstance(tree, cls):
  558. raise NotTreeError(filename)
  559. return tree
  560. def __contains__(self, name):
  561. self._ensure_parsed()
  562. return name in self._entries
  563. def __getitem__(self, name):
  564. self._ensure_parsed()
  565. return self._entries[name]
  566. def __setitem__(self, name, value):
  567. assert isinstance(value, tuple)
  568. assert len(value) == 2
  569. self._ensure_parsed()
  570. self._entries[name] = value
  571. self._needs_serialization = True
  572. def __delitem__(self, name):
  573. self._ensure_parsed()
  574. del self._entries[name]
  575. self._needs_serialization = True
  576. def __len__(self):
  577. self._ensure_parsed()
  578. return len(self._entries)
  579. def __iter__(self):
  580. self._ensure_parsed()
  581. return iter(self._entries)
  582. def add(self, mode, name, hexsha):
  583. assert type(mode) == int
  584. assert type(name) == str
  585. assert type(hexsha) == str
  586. self._ensure_parsed()
  587. self._entries[name] = mode, hexsha
  588. self._needs_serialization = True
  589. def entries(self):
  590. """Return a list of tuples describing the tree entries"""
  591. self._ensure_parsed()
  592. # The order of this is different from iteritems() for historical
  593. # reasons
  594. return [
  595. (mode, name, hexsha) for (name, mode, hexsha) in self.iteritems()]
  596. def iteritems(self):
  597. """Iterate over entries in the order in which they would be serialized.
  598. :return: Iterator over (name, mode, sha) tuples
  599. """
  600. self._ensure_parsed()
  601. return sorted_tree_items(self._entries)
  602. def _deserialize(self, chunks):
  603. """Grab the entries in the tree"""
  604. parsed_entries = parse_tree("".join(chunks))
  605. # TODO: list comprehension is for efficiency in the common (small) case;
  606. # if memory efficiency in the large case is a concern, use a genexp.
  607. self._entries = dict([(n, (m, s)) for n, m, s in parsed_entries])
  608. def check(self):
  609. """Check this object for internal consistency.
  610. :raise ObjectFormatException: if the object is malformed in some way
  611. """
  612. super(Tree, self).check()
  613. last = None
  614. allowed_modes = (stat.S_IFREG | 0755, stat.S_IFREG | 0644,
  615. stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
  616. # TODO: optionally exclude as in git fsck --strict
  617. stat.S_IFREG | 0664)
  618. for name, mode, sha in parse_tree("".join(self._chunked_text)):
  619. check_hexsha(sha, 'invalid sha %s' % sha)
  620. if '/' in name or name in ('', '.', '..'):
  621. raise ObjectFormatException('invalid name %s' % name)
  622. if mode not in allowed_modes:
  623. raise ObjectFormatException('invalid mode %06o' % mode)
  624. entry = (name, (mode, sha))
  625. if last:
  626. if cmp_entry(last, entry) > 0:
  627. raise ObjectFormatException('entries not sorted')
  628. if name == last[0]:
  629. raise ObjectFormatException('duplicate entry %s' % name)
  630. last = entry
  631. def _serialize(self):
  632. return list(serialize_tree(self.iteritems()))
  633. def as_pretty_string(self):
  634. text = []
  635. for name, mode, hexsha in self.iteritems():
  636. if mode & stat.S_IFDIR:
  637. kind = "tree"
  638. else:
  639. kind = "blob"
  640. text.append("%04o %s %s\t%s\n" % (mode, kind, hexsha, name))
  641. return "".join(text)
  642. def parse_timezone(text):
  643. offset = int(text)
  644. negative_utc = (offset == 0 and text[0] == '-')
  645. signum = (offset < 0) and -1 or 1
  646. offset = abs(offset)
  647. hours = int(offset / 100)
  648. minutes = (offset % 100)
  649. return signum * (hours * 3600 + minutes * 60), negative_utc
  650. def format_timezone(offset, negative_utc=False):
  651. if offset % 60 != 0:
  652. raise ValueError("Unable to handle non-minute offset.")
  653. if offset < 0 or (offset == 0 and negative_utc):
  654. sign = '-'
  655. else:
  656. sign = '+'
  657. offset = abs(offset)
  658. return '%c%02d%02d' % (sign, offset / 3600, (offset / 60) % 60)
  659. def parse_commit(text):
  660. return _parse_tag_or_commit(text)
  661. class Commit(ShaFile):
  662. """A git commit object"""
  663. type_name = 'commit'
  664. type_num = 1
  665. def __init__(self):
  666. super(Commit, self).__init__()
  667. self._parents = []
  668. self._encoding = None
  669. self._extra = {}
  670. self._author_timezone_neg_utc = False
  671. self._commit_timezone_neg_utc = False
  672. @classmethod
  673. def from_file(cls, filename):
  674. commit = ShaFile.from_file(filename)
  675. if not isinstance(commit, cls):
  676. raise NotCommitError(filename)
  677. return commit
  678. def _deserialize(self, chunks):
  679. self._parents = []
  680. self._extra = []
  681. self._author = None
  682. for field, value in parse_commit("".join(self._chunked_text)):
  683. if field == _TREE_HEADER:
  684. self._tree = value
  685. elif field == _PARENT_HEADER:
  686. self._parents.append(value)
  687. elif field == _AUTHOR_HEADER:
  688. self._author, timetext, timezonetext = value.rsplit(" ", 2)
  689. self._author_time = int(timetext)
  690. self._author_timezone, self._author_timezone_neg_utc =\
  691. parse_timezone(timezonetext)
  692. elif field == _COMMITTER_HEADER:
  693. self._committer, timetext, timezonetext = value.rsplit(" ", 2)
  694. self._commit_time = int(timetext)
  695. self._commit_timezone, self._commit_timezone_neg_utc =\
  696. parse_timezone(timezonetext)
  697. elif field == _ENCODING_HEADER:
  698. self._encoding = value
  699. elif field is None:
  700. self._message = value
  701. else:
  702. self._extra.append((field, value))
  703. def check(self):
  704. """Check this object for internal consistency.
  705. :raise ObjectFormatException: if the object is malformed in some way
  706. """
  707. super(Commit, self).check()
  708. self._check_has_member("_tree", "missing tree")
  709. self._check_has_member("_author", "missing author")
  710. self._check_has_member("_committer", "missing committer")
  711. # times are currently checked when set
  712. for parent in self._parents:
  713. check_hexsha(parent, "invalid parent sha")
  714. check_hexsha(self._tree, "invalid tree sha")
  715. check_identity(self._author, "invalid author")
  716. check_identity(self._committer, "invalid committer")
  717. last = None
  718. for field, _ in parse_commit("".join(self._chunked_text)):
  719. if field == _TREE_HEADER and last is not None:
  720. raise ObjectFormatException("unexpected tree")
  721. elif field == _PARENT_HEADER and last not in (_PARENT_HEADER,
  722. _TREE_HEADER):
  723. raise ObjectFormatException("unexpected parent")
  724. elif field == _AUTHOR_HEADER and last not in (_TREE_HEADER,
  725. _PARENT_HEADER):
  726. raise ObjectFormatException("unexpected author")
  727. elif field == _COMMITTER_HEADER and last != _AUTHOR_HEADER:
  728. raise ObjectFormatException("unexpected committer")
  729. elif field == _ENCODING_HEADER and last != _COMMITTER_HEADER:
  730. raise ObjectFormatException("unexpected encoding")
  731. last = field
  732. # TODO: optionally check for duplicate parents
  733. def _serialize(self):
  734. chunks = []
  735. chunks.append("%s %s\n" % (_TREE_HEADER, self._tree))
  736. for p in self._parents:
  737. chunks.append("%s %s\n" % (_PARENT_HEADER, p))
  738. chunks.append("%s %s %s %s\n" % (
  739. _AUTHOR_HEADER, self._author, str(self._author_time),
  740. format_timezone(self._author_timezone,
  741. self._author_timezone_neg_utc)))
  742. chunks.append("%s %s %s %s\n" % (
  743. _COMMITTER_HEADER, self._committer, str(self._commit_time),
  744. format_timezone(self._commit_timezone,
  745. self._commit_timezone_neg_utc)))
  746. if self.encoding:
  747. chunks.append("%s %s\n" % (_ENCODING_HEADER, self.encoding))
  748. for k, v in self.extra:
  749. if "\n" in k or "\n" in v:
  750. raise AssertionError("newline in extra data: %r -> %r" % (k, v))
  751. chunks.append("%s %s\n" % (k, v))
  752. chunks.append("\n") # There must be a new line after the headers
  753. chunks.append(self._message)
  754. return chunks
  755. tree = serializable_property("tree", "Tree that is the state of this commit")
  756. def _get_parents(self):
  757. """Return a list of parents of this commit."""
  758. self._ensure_parsed()
  759. return self._parents
  760. def _set_parents(self, value):
  761. """Set a list of parents of this commit."""
  762. self._ensure_parsed()
  763. self._needs_serialization = True
  764. self._parents = value
  765. parents = property(_get_parents, _set_parents)
  766. def _get_extra(self):
  767. """Return extra settings of this commit."""
  768. self._ensure_parsed()
  769. return self._extra
  770. extra = property(_get_extra)
  771. author = serializable_property("author",
  772. "The name of the author of the commit")
  773. committer = serializable_property("committer",
  774. "The name of the committer of the commit")
  775. message = serializable_property("message",
  776. "The commit message")
  777. commit_time = serializable_property("commit_time",
  778. "The timestamp of the commit. As the number of seconds since the epoch.")
  779. commit_timezone = serializable_property("commit_timezone",
  780. "The zone the commit time is in")
  781. author_time = serializable_property("author_time",
  782. "The timestamp the commit was written. as the number of seconds since the epoch.")
  783. author_timezone = serializable_property("author_timezone",
  784. "Returns the zone the author time is in.")
  785. encoding = serializable_property("encoding",
  786. "Encoding of the commit message.")
  787. OBJECT_CLASSES = (
  788. Commit,
  789. Tree,
  790. Blob,
  791. Tag,
  792. )
  793. _TYPE_MAP = {}
  794. for cls in OBJECT_CLASSES:
  795. _TYPE_MAP[cls.type_name] = cls
  796. _TYPE_MAP[cls.type_num] = cls
  797. # Hold on to the pure-python implementations for testing
  798. _parse_tree_py = parse_tree
  799. _sorted_tree_items_py = sorted_tree_items
  800. try:
  801. # Try to import C versions
  802. from dulwich._objects import parse_tree, sorted_tree_items
  803. except ImportError:
  804. pass