2
0

objects.py 32 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018
  1. # objects.py -- Access to base git objects
  2. # Copyright (C) 2007 James Westby <jw+debian@jameswestby.net>
  3. # Copyright (C) 2008-2009 Jelmer Vernooij <jelmer@samba.org>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; version 2
  8. # of the License or (at your option) a later version of the License.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Access to base git objects."""
  20. import binascii
  21. from cStringIO import (
  22. StringIO,
  23. )
  24. import mmap
  25. import os
  26. import stat
  27. import zlib
  28. from dulwich.errors import (
  29. ChecksumMismatch,
  30. NotBlobError,
  31. NotCommitError,
  32. NotTagError,
  33. NotTreeError,
  34. ObjectFormatException,
  35. )
  36. from dulwich.file import GitFile
  37. from dulwich.misc import (
  38. make_sha,
  39. )
  40. # Header fields for commits
  41. _TREE_HEADER = "tree"
  42. _PARENT_HEADER = "parent"
  43. _AUTHOR_HEADER = "author"
  44. _COMMITTER_HEADER = "committer"
  45. _ENCODING_HEADER = "encoding"
  46. # Header fields for objects
  47. _OBJECT_HEADER = "object"
  48. _TYPE_HEADER = "type"
  49. _TAG_HEADER = "tag"
  50. _TAGGER_HEADER = "tagger"
  51. S_IFGITLINK = 0160000
  52. def S_ISGITLINK(m):
  53. return (stat.S_IFMT(m) == S_IFGITLINK)
  54. def _decompress(string):
  55. dcomp = zlib.decompressobj()
  56. dcomped = dcomp.decompress(string)
  57. dcomped += dcomp.flush()
  58. return dcomped
  59. def sha_to_hex(sha):
  60. """Takes a string and returns the hex of the sha within"""
  61. hexsha = binascii.hexlify(sha)
  62. assert len(hexsha) == 40, "Incorrect length of sha1 string: %d" % hexsha
  63. return hexsha
  64. def hex_to_sha(hex):
  65. """Takes a hex sha and returns a binary sha"""
  66. assert len(hex) == 40, "Incorrent length of hexsha: %s" % hex
  67. return binascii.unhexlify(hex)
  68. def hex_to_filename(path, hex):
  69. """Takes a hex sha and returns its filename relative to the given path."""
  70. dir = hex[:2]
  71. file = hex[2:]
  72. # Check from object dir
  73. return os.path.join(path, dir, file)
  74. def filename_to_hex(filename):
  75. """Takes an object filename and returns its corresponding hex sha."""
  76. # grab the last (up to) two path components
  77. names = filename.rsplit(os.path.sep, 2)[-2:]
  78. errmsg = "Invalid object filename: %s" % filename
  79. assert len(names) == 2, errmsg
  80. base, rest = names
  81. assert len(base) == 2 and len(rest) == 38, errmsg
  82. hex = base + rest
  83. hex_to_sha(hex)
  84. return hex
  85. def serializable_property(name, docstring=None):
  86. def set(obj, value):
  87. obj._ensure_parsed()
  88. setattr(obj, "_"+name, value)
  89. obj._needs_serialization = True
  90. def get(obj):
  91. obj._ensure_parsed()
  92. return getattr(obj, "_"+name)
  93. return property(get, set, doc=docstring)
  94. def object_class(type):
  95. """Get the object class corresponding to the given type.
  96. :param type: Either a type name string or a numeric type.
  97. :return: The ShaFile subclass corresponding to the given type, or None if
  98. type is not a valid type name/number.
  99. """
  100. return _TYPE_MAP.get(type, None)
  101. def check_hexsha(hex, error_msg):
  102. try:
  103. hex_to_sha(hex)
  104. except (TypeError, AssertionError):
  105. raise ObjectFormatException("%s %s" % (error_msg, hex))
  106. def check_identity(identity, error_msg):
  107. email_start = identity.find("<")
  108. email_end = identity.find(">")
  109. if (email_start < 0 or email_end < 0 or email_end <= email_start
  110. or identity.find("<", email_start + 1) >= 0
  111. or identity.find(">", email_end + 1) >= 0
  112. or not identity.endswith(">")):
  113. raise ObjectFormatException(error_msg)
  114. class FixedSha(object):
  115. """SHA object that behaves like hashlib's but is given a fixed value."""
  116. def __init__(self, hexsha):
  117. self._hexsha = hexsha
  118. self._sha = hex_to_sha(hexsha)
  119. def digest(self):
  120. return self._sha
  121. def hexdigest(self):
  122. return self._hexsha
  123. class ShaFile(object):
  124. """A git SHA file."""
  125. @staticmethod
  126. def _parse_legacy_object_header(magic, f):
  127. """Parse a legacy object, creating it but not reading the file."""
  128. bufsize = 1024
  129. decomp = zlib.decompressobj()
  130. header = decomp.decompress(magic)
  131. start = 0
  132. end = -1
  133. while end < 0:
  134. header += decomp.decompress(f.read(bufsize))
  135. end = header.find("\0", start)
  136. start = len(header)
  137. header = header[:end]
  138. type_name, size = header.split(" ", 1)
  139. size = int(size) # sanity check
  140. obj_class = object_class(type_name)
  141. if not obj_class:
  142. raise ObjectFormatException("Not a known type: %s" % type_name)
  143. obj = obj_class()
  144. obj._filename = f.name
  145. return obj
  146. def _parse_legacy_object(self, f):
  147. """Parse a legacy object, setting the raw string."""
  148. size = os.path.getsize(f.name)
  149. map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
  150. try:
  151. text = _decompress(map)
  152. finally:
  153. map.close()
  154. header_end = text.find('\0')
  155. if header_end < 0:
  156. raise ObjectFormatException("Invalid object header")
  157. self.set_raw_string(text[header_end+1:])
  158. def as_legacy_object_chunks(self):
  159. compobj = zlib.compressobj()
  160. yield compobj.compress(self._header())
  161. for chunk in self.as_raw_chunks():
  162. yield compobj.compress(chunk)
  163. yield compobj.flush()
  164. def as_legacy_object(self):
  165. return "".join(self.as_legacy_object_chunks())
  166. def as_raw_chunks(self):
  167. if self._needs_parsing:
  168. self._ensure_parsed()
  169. elif self._needs_serialization:
  170. self._chunked_text = self._serialize()
  171. return self._chunked_text
  172. def as_raw_string(self):
  173. return "".join(self.as_raw_chunks())
  174. def __str__(self):
  175. return self.as_raw_string()
  176. def __hash__(self):
  177. return hash(self.id)
  178. def as_pretty_string(self):
  179. return self.as_raw_string()
  180. def _ensure_parsed(self):
  181. if self._needs_parsing:
  182. if not self._chunked_text:
  183. assert self._filename, "ShaFile needs either text or filename"
  184. self._parse_file()
  185. self._deserialize(self._chunked_text)
  186. self._needs_parsing = False
  187. def set_raw_string(self, text):
  188. if type(text) != str:
  189. raise TypeError(text)
  190. self.set_raw_chunks([text])
  191. def set_raw_chunks(self, chunks):
  192. self._chunked_text = chunks
  193. self._deserialize(chunks)
  194. self._sha = None
  195. self._needs_parsing = False
  196. self._needs_serialization = False
  197. @staticmethod
  198. def _parse_object_header(magic, f):
  199. """Parse a new style object, creating it but not reading the file."""
  200. num_type = (ord(magic[0]) >> 4) & 7
  201. obj_class = object_class(num_type)
  202. if not obj_class:
  203. raise ObjectFormatError("Not a known type: %d" % num_type)
  204. obj = obj_class()
  205. obj._filename = f.name
  206. return obj
  207. def _parse_object(self, f):
  208. """Parse a new style object, setting self._text."""
  209. size = os.path.getsize(f.name)
  210. map = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
  211. try:
  212. # skip type and size; type must have already been determined, and we
  213. # trust zlib to fail if it's otherwise corrupted
  214. byte = ord(map[0])
  215. used = 1
  216. while (byte & 0x80) != 0:
  217. byte = ord(map[used])
  218. used += 1
  219. raw = map[used:]
  220. self.set_raw_string(_decompress(raw))
  221. finally:
  222. map.close()
  223. @classmethod
  224. def _is_legacy_object(cls, magic):
  225. b0, b1 = map(ord, magic)
  226. word = (b0 << 8) + b1
  227. return b0 == 0x78 and (word % 31) == 0
  228. @classmethod
  229. def _parse_file_header(cls, f):
  230. magic = f.read(2)
  231. if cls._is_legacy_object(magic):
  232. return cls._parse_legacy_object_header(magic, f)
  233. else:
  234. return cls._parse_object_header(magic, f)
  235. def __init__(self):
  236. """Don't call this directly"""
  237. self._sha = None
  238. self._filename = None
  239. self._chunked_text = []
  240. self._needs_parsing = False
  241. self._needs_serialization = True
  242. def _deserialize(self, chunks):
  243. raise NotImplementedError(self._deserialize)
  244. def _serialize(self):
  245. raise NotImplementedError(self._serialize)
  246. def _parse_file(self):
  247. f = GitFile(self._filename, 'rb')
  248. try:
  249. magic = f.read(2)
  250. if self._is_legacy_object(magic):
  251. self._parse_legacy_object(f)
  252. else:
  253. self._parse_object(f)
  254. finally:
  255. f.close()
  256. @classmethod
  257. def from_file(cls, filename):
  258. """Get the contents of a SHA file on disk."""
  259. f = GitFile(filename, 'rb')
  260. try:
  261. try:
  262. obj = cls._parse_file_header(f)
  263. obj._sha = FixedSha(filename_to_hex(filename))
  264. obj._needs_parsing = True
  265. obj._needs_serialization = True
  266. return obj
  267. except (IndexError, ValueError), e:
  268. raise ObjectFormatException("invalid object header")
  269. finally:
  270. f.close()
  271. @staticmethod
  272. def from_raw_string(type_num, string):
  273. """Creates an object of the indicated type from the raw string given.
  274. :param type_num: The numeric type of the object.
  275. :param string: The raw uncompressed contents.
  276. """
  277. obj = object_class(type_num)()
  278. obj.set_raw_string(string)
  279. return obj
  280. @staticmethod
  281. def from_raw_chunks(type_num, chunks):
  282. """Creates an object of the indicated type from the raw chunks given.
  283. :param type_num: The numeric type of the object.
  284. :param chunks: An iterable of the raw uncompressed contents.
  285. """
  286. obj = object_class(type_num)()
  287. obj.set_raw_chunks(chunks)
  288. return obj
  289. @classmethod
  290. def from_string(cls, string):
  291. """Create a ShaFile from a string."""
  292. obj = cls()
  293. obj.set_raw_string(string)
  294. return obj
  295. def _check_has_member(self, member, error_msg):
  296. """Check that the object has a given member variable.
  297. :param member: the member variable to check for
  298. :param error_msg: the message for an error if the member is missing
  299. :raise ObjectFormatException: with the given error_msg if member is
  300. missing or is None
  301. """
  302. if getattr(self, member, None) is None:
  303. raise ObjectFormatException(error_msg)
  304. def check(self):
  305. """Check this object for internal consistency.
  306. :raise ObjectFormatException: if the object is malformed in some way
  307. :raise ChecksumMismatch: if the object was created with a SHA that does
  308. not match its contents
  309. """
  310. # TODO: if we find that error-checking during object parsing is a
  311. # performance bottleneck, those checks should be moved to the class's
  312. # check() method during optimization so we can still check the object
  313. # when necessary.
  314. old_sha = self.id
  315. try:
  316. self._deserialize(self.as_raw_chunks())
  317. self._sha = None
  318. new_sha = self.id
  319. except Exception, e:
  320. raise ObjectFormatException(e)
  321. if old_sha != new_sha:
  322. raise ChecksumMismatch(new_sha, old_sha)
  323. def _header(self):
  324. return "%s %lu\0" % (self.type_name, self.raw_length())
  325. def raw_length(self):
  326. """Returns the length of the raw string of this object."""
  327. ret = 0
  328. for chunk in self.as_raw_chunks():
  329. ret += len(chunk)
  330. return ret
  331. def _make_sha(self):
  332. ret = make_sha()
  333. ret.update(self._header())
  334. for chunk in self.as_raw_chunks():
  335. ret.update(chunk)
  336. return ret
  337. def sha(self):
  338. """The SHA1 object that is the name of this object."""
  339. if self._sha is None:
  340. # this is a local because as_raw_chunks() overwrites self._sha
  341. new_sha = make_sha()
  342. new_sha.update(self._header())
  343. for chunk in self.as_raw_chunks():
  344. new_sha.update(chunk)
  345. self._sha = new_sha
  346. return self._sha
  347. @property
  348. def id(self):
  349. return self.sha().hexdigest()
  350. def get_type(self):
  351. return self.type_num
  352. def set_type(self, type):
  353. self.type_num = type
  354. # DEPRECATED: use type_num or type_name as needed.
  355. type = property(get_type, set_type)
  356. def __repr__(self):
  357. return "<%s %s>" % (self.__class__.__name__, self.id)
  358. def __ne__(self, other):
  359. return self.id != other.id
  360. def __eq__(self, other):
  361. """Return true if the sha of the two objects match.
  362. The __le__ etc methods aren't overriden as they make no sense,
  363. certainly at this level.
  364. """
  365. return self.id == other.id
  366. class Blob(ShaFile):
  367. """A Git Blob object."""
  368. type_name = 'blob'
  369. type_num = 3
  370. def __init__(self):
  371. super(Blob, self).__init__()
  372. self._chunked_text = []
  373. self._needs_parsing = False
  374. self._needs_serialization = False
  375. def _get_data(self):
  376. return self.as_raw_string()
  377. def _set_data(self, data):
  378. self.set_raw_string(data)
  379. data = property(_get_data, _set_data,
  380. "The text contained within the blob object.")
  381. def _get_chunked(self):
  382. self._ensure_parsed()
  383. return self._chunked_text
  384. def _set_chunked(self, chunks):
  385. self._chunked_text = chunks
  386. def _serialize(self):
  387. if not self._chunked_text:
  388. self._ensure_parsed()
  389. self._needs_serialization = False
  390. return self._chunked_text
  391. def _deserialize(self, chunks):
  392. self._chunked_text = chunks
  393. chunked = property(_get_chunked, _set_chunked,
  394. "The text within the blob object, as chunks (not necessarily lines).")
  395. @classmethod
  396. def from_file(cls, filename):
  397. blob = ShaFile.from_file(filename)
  398. if not isinstance(blob, cls):
  399. raise NotBlobError(filename)
  400. return blob
  401. def check(self):
  402. """Check this object for internal consistency.
  403. :raise ObjectFormatException: if the object is malformed in some way
  404. """
  405. super(Blob, self).check()
  406. def _parse_tag_or_commit(text):
  407. """Parse tag or commit text.
  408. :param text: the raw text of the tag or commit object.
  409. :yield: tuples of (field, value), one per header line, in the order read
  410. from the text, possibly including duplicates. Includes a field named
  411. None for the freeform tag/commit text.
  412. """
  413. f = StringIO(text)
  414. for l in f:
  415. l = l.rstrip("\n")
  416. if l == "":
  417. # Empty line indicates end of headers
  418. break
  419. yield l.split(" ", 1)
  420. yield (None, f.read())
  421. f.close()
  422. def parse_tag(text):
  423. return _parse_tag_or_commit(text)
  424. class Tag(ShaFile):
  425. """A Git Tag object."""
  426. type_name = 'tag'
  427. type_num = 4
  428. def __init__(self):
  429. super(Tag, self).__init__()
  430. self._tag_timezone_neg_utc = False
  431. @classmethod
  432. def from_file(cls, filename):
  433. tag = ShaFile.from_file(filename)
  434. if not isinstance(tag, cls):
  435. raise NotTagError(filename)
  436. return tag
  437. def check(self):
  438. """Check this object for internal consistency.
  439. :raise ObjectFormatException: if the object is malformed in some way
  440. """
  441. super(Tag, self).check()
  442. self._check_has_member("_object_sha", "missing object sha")
  443. self._check_has_member("_object_class", "missing object type")
  444. self._check_has_member("_name", "missing tag name")
  445. if not self._name:
  446. raise ObjectFormatException("empty tag name")
  447. check_hexsha(self._object_sha, "invalid object sha")
  448. if getattr(self, "_tagger", None):
  449. check_identity(self._tagger, "invalid tagger")
  450. last = None
  451. for field, _ in parse_tag("".join(self._chunked_text)):
  452. if field == _OBJECT_HEADER and last is not None:
  453. raise ObjectFormatException("unexpected object")
  454. elif field == _TYPE_HEADER and last != _OBJECT_HEADER:
  455. raise ObjectFormatException("unexpected type")
  456. elif field == _TAG_HEADER and last != _TYPE_HEADER:
  457. raise ObjectFormatException("unexpected tag name")
  458. elif field == _TAGGER_HEADER and last != _TAG_HEADER:
  459. raise ObjectFormatException("unexpected tagger")
  460. last = field
  461. def _serialize(self):
  462. chunks = []
  463. chunks.append("%s %s\n" % (_OBJECT_HEADER, self._object_sha))
  464. chunks.append("%s %s\n" % (_TYPE_HEADER, self._object_class.type_name))
  465. chunks.append("%s %s\n" % (_TAG_HEADER, self._name))
  466. if self._tagger:
  467. if self._tag_time is None:
  468. chunks.append("%s %s\n" % (_TAGGER_HEADER, self._tagger))
  469. else:
  470. chunks.append("%s %s %d %s\n" % (
  471. _TAGGER_HEADER, self._tagger, self._tag_time,
  472. format_timezone(self._tag_timezone,
  473. self._tag_timezone_neg_utc)))
  474. chunks.append("\n") # To close headers
  475. chunks.append(self._message)
  476. return chunks
  477. def _deserialize(self, chunks):
  478. """Grab the metadata attached to the tag"""
  479. self._tagger = None
  480. for field, value in parse_tag("".join(chunks)):
  481. if field == _OBJECT_HEADER:
  482. self._object_sha = value
  483. elif field == _TYPE_HEADER:
  484. obj_class = object_class(value)
  485. if not obj_class:
  486. raise ObjectFormatException("Not a known type: %s" % value)
  487. self._object_class = obj_class
  488. elif field == _TAG_HEADER:
  489. self._name = value
  490. elif field == _TAGGER_HEADER:
  491. try:
  492. sep = value.index("> ")
  493. except ValueError:
  494. self._tagger = value
  495. self._tag_time = None
  496. self._tag_timezone = None
  497. self._tag_timezone_neg_utc = False
  498. else:
  499. self._tagger = value[0:sep+1]
  500. try:
  501. (timetext, timezonetext) = value[sep+2:].rsplit(" ", 1)
  502. self._tag_time = int(timetext)
  503. self._tag_timezone, self._tag_timezone_neg_utc = \
  504. parse_timezone(timezonetext)
  505. except ValueError, e:
  506. raise ObjectFormatException(e)
  507. elif field is None:
  508. self._message = value
  509. else:
  510. raise ObjectFormatError("Unknown field %s" % field)
  511. def _get_object(self):
  512. """Get the object pointed to by this tag.
  513. :return: tuple of (object class, sha).
  514. """
  515. self._ensure_parsed()
  516. return (self._object_class, self._object_sha)
  517. def _set_object(self, value):
  518. self._ensure_parsed()
  519. (self._object_class, self._object_sha) = value
  520. self._needs_serialization = True
  521. object = property(_get_object, _set_object)
  522. name = serializable_property("name", "The name of this tag")
  523. tagger = serializable_property("tagger",
  524. "Returns the name of the person who created this tag")
  525. tag_time = serializable_property("tag_time",
  526. "The creation timestamp of the tag. As the number of seconds since the epoch")
  527. tag_timezone = serializable_property("tag_timezone",
  528. "The timezone that tag_time is in.")
  529. message = serializable_property("message", "The message attached to this tag")
  530. def parse_tree(text):
  531. """Parse a tree text.
  532. :param text: Serialized text to parse
  533. :yields: tuples of (name, mode, sha)
  534. """
  535. count = 0
  536. l = len(text)
  537. while count < l:
  538. mode_end = text.index(' ', count)
  539. mode = int(text[count:mode_end], 8)
  540. name_end = text.index('\0', mode_end)
  541. name = text[mode_end+1:name_end]
  542. count = name_end+21
  543. sha = text[name_end+1:count]
  544. yield (name, mode, sha_to_hex(sha))
  545. def serialize_tree(items):
  546. """Serialize the items in a tree to a text.
  547. :param items: Sorted iterable over (name, mode, sha) tuples
  548. :return: Serialized tree text as chunks
  549. """
  550. for name, mode, hexsha in items:
  551. yield "%04o %s\0%s" % (mode, name, hex_to_sha(hexsha))
  552. def sorted_tree_items(entries):
  553. """Iterate over a tree entries dictionary in the order in which
  554. the items would be serialized.
  555. :param entries: Dictionary mapping names to (mode, sha) tuples
  556. :return: Iterator over (name, mode, sha)
  557. """
  558. for name, entry in sorted(entries.iteritems(), cmp=cmp_entry):
  559. yield name, entry[0], entry[1]
  560. def cmp_entry((name1, value1), (name2, value2)):
  561. """Compare two tree entries."""
  562. if stat.S_ISDIR(value1[0]):
  563. name1 += "/"
  564. if stat.S_ISDIR(value2[0]):
  565. name2 += "/"
  566. return cmp(name1, name2)
  567. class Tree(ShaFile):
  568. """A Git tree object"""
  569. type_name = 'tree'
  570. type_num = 2
  571. def __init__(self):
  572. super(Tree, self).__init__()
  573. self._entries = {}
  574. @classmethod
  575. def from_file(cls, filename):
  576. tree = ShaFile.from_file(filename)
  577. if not isinstance(tree, cls):
  578. raise NotTreeError(filename)
  579. return tree
  580. def __contains__(self, name):
  581. self._ensure_parsed()
  582. return name in self._entries
  583. def __getitem__(self, name):
  584. self._ensure_parsed()
  585. return self._entries[name]
  586. def __setitem__(self, name, value):
  587. assert isinstance(value, tuple)
  588. assert len(value) == 2
  589. self._ensure_parsed()
  590. self._entries[name] = value
  591. self._needs_serialization = True
  592. def __delitem__(self, name):
  593. self._ensure_parsed()
  594. del self._entries[name]
  595. self._needs_serialization = True
  596. def __len__(self):
  597. self._ensure_parsed()
  598. return len(self._entries)
  599. def __iter__(self):
  600. self._ensure_parsed()
  601. return iter(self._entries)
  602. def add(self, mode, name, hexsha):
  603. assert type(mode) == int
  604. assert type(name) == str
  605. assert type(hexsha) == str
  606. self._ensure_parsed()
  607. self._entries[name] = mode, hexsha
  608. self._needs_serialization = True
  609. def entries(self):
  610. """Return a list of tuples describing the tree entries"""
  611. self._ensure_parsed()
  612. # The order of this is different from iteritems() for historical
  613. # reasons
  614. return [
  615. (mode, name, hexsha) for (name, mode, hexsha) in self.iteritems()]
  616. def iteritems(self):
  617. """Iterate over entries in the order in which they would be serialized.
  618. :return: Iterator over (name, mode, sha) tuples
  619. """
  620. self._ensure_parsed()
  621. return sorted_tree_items(self._entries)
  622. def _deserialize(self, chunks):
  623. """Grab the entries in the tree"""
  624. try:
  625. parsed_entries = parse_tree("".join(chunks))
  626. except ValueError, e:
  627. raise ObjectFormatException(e)
  628. # TODO: list comprehension is for efficiency in the common (small) case;
  629. # if memory efficiency in the large case is a concern, use a genexp.
  630. self._entries = dict([(n, (m, s)) for n, m, s in parsed_entries])
  631. def check(self):
  632. """Check this object for internal consistency.
  633. :raise ObjectFormatException: if the object is malformed in some way
  634. """
  635. super(Tree, self).check()
  636. last = None
  637. allowed_modes = (stat.S_IFREG | 0755, stat.S_IFREG | 0644,
  638. stat.S_IFLNK, stat.S_IFDIR, S_IFGITLINK,
  639. # TODO: optionally exclude as in git fsck --strict
  640. stat.S_IFREG | 0664)
  641. for name, mode, sha in parse_tree("".join(self._chunked_text)):
  642. check_hexsha(sha, 'invalid sha %s' % sha)
  643. if '/' in name or name in ('', '.', '..'):
  644. raise ObjectFormatException('invalid name %s' % name)
  645. if mode not in allowed_modes:
  646. raise ObjectFormatException('invalid mode %06o' % mode)
  647. entry = (name, (mode, sha))
  648. if last:
  649. if cmp_entry(last, entry) > 0:
  650. raise ObjectFormatException('entries not sorted')
  651. if name == last[0]:
  652. raise ObjectFormatException('duplicate entry %s' % name)
  653. last = entry
  654. def _serialize(self):
  655. return list(serialize_tree(self.iteritems()))
  656. def as_pretty_string(self):
  657. text = []
  658. for name, mode, hexsha in self.iteritems():
  659. if mode & stat.S_IFDIR:
  660. kind = "tree"
  661. else:
  662. kind = "blob"
  663. text.append("%04o %s %s\t%s\n" % (mode, kind, hexsha, name))
  664. return "".join(text)
  665. def parse_timezone(text):
  666. offset = int(text)
  667. negative_utc = (offset == 0 and text[0] == '-')
  668. signum = (offset < 0) and -1 or 1
  669. offset = abs(offset)
  670. hours = int(offset / 100)
  671. minutes = (offset % 100)
  672. return signum * (hours * 3600 + minutes * 60), negative_utc
  673. def format_timezone(offset, negative_utc=False):
  674. if offset % 60 != 0:
  675. raise ValueError("Unable to handle non-minute offset.")
  676. if offset < 0 or (offset == 0 and negative_utc):
  677. sign = '-'
  678. else:
  679. sign = '+'
  680. offset = abs(offset)
  681. return '%c%02d%02d' % (sign, offset / 3600, (offset / 60) % 60)
  682. def parse_commit(text):
  683. return _parse_tag_or_commit(text)
  684. class Commit(ShaFile):
  685. """A git commit object"""
  686. type_name = 'commit'
  687. type_num = 1
  688. def __init__(self):
  689. super(Commit, self).__init__()
  690. self._parents = []
  691. self._encoding = None
  692. self._extra = {}
  693. self._author_timezone_neg_utc = False
  694. self._commit_timezone_neg_utc = False
  695. @classmethod
  696. def from_file(cls, filename):
  697. commit = ShaFile.from_file(filename)
  698. if not isinstance(commit, cls):
  699. raise NotCommitError(filename)
  700. return commit
  701. def _deserialize(self, chunks):
  702. self._parents = []
  703. self._extra = []
  704. self._author = None
  705. for field, value in parse_commit("".join(self._chunked_text)):
  706. if field == _TREE_HEADER:
  707. self._tree = value
  708. elif field == _PARENT_HEADER:
  709. self._parents.append(value)
  710. elif field == _AUTHOR_HEADER:
  711. self._author, timetext, timezonetext = value.rsplit(" ", 2)
  712. self._author_time = int(timetext)
  713. self._author_timezone, self._author_timezone_neg_utc =\
  714. parse_timezone(timezonetext)
  715. elif field == _COMMITTER_HEADER:
  716. self._committer, timetext, timezonetext = value.rsplit(" ", 2)
  717. self._commit_time = int(timetext)
  718. self._commit_timezone, self._commit_timezone_neg_utc =\
  719. parse_timezone(timezonetext)
  720. elif field == _ENCODING_HEADER:
  721. self._encoding = value
  722. elif field is None:
  723. self._message = value
  724. else:
  725. self._extra.append((field, value))
  726. def check(self):
  727. """Check this object for internal consistency.
  728. :raise ObjectFormatException: if the object is malformed in some way
  729. """
  730. super(Commit, self).check()
  731. self._check_has_member("_tree", "missing tree")
  732. self._check_has_member("_author", "missing author")
  733. self._check_has_member("_committer", "missing committer")
  734. # times are currently checked when set
  735. for parent in self._parents:
  736. check_hexsha(parent, "invalid parent sha")
  737. check_hexsha(self._tree, "invalid tree sha")
  738. check_identity(self._author, "invalid author")
  739. check_identity(self._committer, "invalid committer")
  740. last = None
  741. for field, _ in parse_commit("".join(self._chunked_text)):
  742. if field == _TREE_HEADER and last is not None:
  743. raise ObjectFormatException("unexpected tree")
  744. elif field == _PARENT_HEADER and last not in (_PARENT_HEADER,
  745. _TREE_HEADER):
  746. raise ObjectFormatException("unexpected parent")
  747. elif field == _AUTHOR_HEADER and last not in (_TREE_HEADER,
  748. _PARENT_HEADER):
  749. raise ObjectFormatException("unexpected author")
  750. elif field == _COMMITTER_HEADER and last != _AUTHOR_HEADER:
  751. raise ObjectFormatException("unexpected committer")
  752. elif field == _ENCODING_HEADER and last != _COMMITTER_HEADER:
  753. raise ObjectFormatException("unexpected encoding")
  754. last = field
  755. # TODO: optionally check for duplicate parents
  756. def _serialize(self):
  757. chunks = []
  758. chunks.append("%s %s\n" % (_TREE_HEADER, self._tree))
  759. for p in self._parents:
  760. chunks.append("%s %s\n" % (_PARENT_HEADER, p))
  761. chunks.append("%s %s %s %s\n" % (
  762. _AUTHOR_HEADER, self._author, str(self._author_time),
  763. format_timezone(self._author_timezone,
  764. self._author_timezone_neg_utc)))
  765. chunks.append("%s %s %s %s\n" % (
  766. _COMMITTER_HEADER, self._committer, str(self._commit_time),
  767. format_timezone(self._commit_timezone,
  768. self._commit_timezone_neg_utc)))
  769. if self.encoding:
  770. chunks.append("%s %s\n" % (_ENCODING_HEADER, self.encoding))
  771. for k, v in self.extra:
  772. if "\n" in k or "\n" in v:
  773. raise AssertionError("newline in extra data: %r -> %r" % (k, v))
  774. chunks.append("%s %s\n" % (k, v))
  775. chunks.append("\n") # There must be a new line after the headers
  776. chunks.append(self._message)
  777. return chunks
  778. tree = serializable_property("tree", "Tree that is the state of this commit")
  779. def _get_parents(self):
  780. """Return a list of parents of this commit."""
  781. self._ensure_parsed()
  782. return self._parents
  783. def _set_parents(self, value):
  784. """Set a list of parents of this commit."""
  785. self._ensure_parsed()
  786. self._needs_serialization = True
  787. self._parents = value
  788. parents = property(_get_parents, _set_parents)
  789. def _get_extra(self):
  790. """Return extra settings of this commit."""
  791. self._ensure_parsed()
  792. return self._extra
  793. extra = property(_get_extra)
  794. author = serializable_property("author",
  795. "The name of the author of the commit")
  796. committer = serializable_property("committer",
  797. "The name of the committer of the commit")
  798. message = serializable_property("message",
  799. "The commit message")
  800. commit_time = serializable_property("commit_time",
  801. "The timestamp of the commit. As the number of seconds since the epoch.")
  802. commit_timezone = serializable_property("commit_timezone",
  803. "The zone the commit time is in")
  804. author_time = serializable_property("author_time",
  805. "The timestamp the commit was written. as the number of seconds since the epoch.")
  806. author_timezone = serializable_property("author_timezone",
  807. "Returns the zone the author time is in.")
  808. encoding = serializable_property("encoding",
  809. "Encoding of the commit message.")
  810. OBJECT_CLASSES = (
  811. Commit,
  812. Tree,
  813. Blob,
  814. Tag,
  815. )
  816. _TYPE_MAP = {}
  817. for cls in OBJECT_CLASSES:
  818. _TYPE_MAP[cls.type_name] = cls
  819. _TYPE_MAP[cls.type_num] = cls
  820. # Hold on to the pure-python implementations for testing
  821. _parse_tree_py = parse_tree
  822. _sorted_tree_items_py = sorted_tree_items
  823. try:
  824. # Try to import C versions
  825. from dulwich._objects import parse_tree, sorted_tree_items
  826. except ImportError:
  827. pass