archive.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. # archive.py -- Creating an archive from a tarball
  2. # Copyright (C) 2015 Jonas Haag <jonas@lophus.org>
  3. # Copyright (C) 2015 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # This program is free software; you can redistribute it and/or
  6. # modify it under the terms of the GNU General Public License
  7. # as published by the Free Software Foundation; either version 2
  8. # or (at your option) a later version of the License.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the Free Software
  17. # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  18. # MA 02110-1301, USA.
  19. """Generates tarballs for Git trees.
  20. """
  21. import posixpath
  22. import stat
  23. import tarfile
  24. from io import BytesIO
  25. from contextlib import closing
  26. class ChunkedBytesIO(object):
  27. """Turn a list of bytestrings into a file-like object.
  28. This is similar to creating a `BytesIO` from a concatenation of the
  29. bytestring list, but saves memory by NOT creating one giant bytestring first::
  30. BytesIO(b''.join(list_of_bytestrings)) =~= ChunkedBytesIO(list_of_bytestrings)
  31. """
  32. def __init__(self, contents):
  33. self.contents = contents
  34. self.pos = (0, 0)
  35. def read(self, maxbytes=None):
  36. if maxbytes < 0:
  37. maxbytes = float('inf')
  38. buf = []
  39. chunk, cursor = self.pos
  40. while chunk < len(self.contents):
  41. if maxbytes < len(self.contents[chunk]) - cursor:
  42. buf.append(self.contents[chunk][cursor:cursor+maxbytes])
  43. cursor += maxbytes
  44. self.pos = (chunk, cursor)
  45. break
  46. else:
  47. buf.append(self.contents[chunk][cursor:])
  48. maxbytes -= len(self.contents[chunk]) - cursor
  49. chunk += 1
  50. cursor = 0
  51. self.pos = (chunk, cursor)
  52. return b''.join(buf)
  53. def tar_stream(store, tree, mtime, format=''):
  54. """Generate a tar stream for the contents of a Git tree.
  55. Returns a generator that lazily assembles a .tar.gz archive, yielding it in
  56. pieces (bytestrings). To obtain the complete .tar.gz binary file, simply
  57. concatenate these chunks.
  58. :param store: Object store to retrieve objects from
  59. :param tree: Tree object for the tree root
  60. :param mtime: UNIX timestamp that is assigned as the modification time for
  61. all files
  62. :param format: Optional compression format for tarball
  63. :return: Bytestrings
  64. """
  65. buf = BytesIO()
  66. with closing(tarfile.open(None, "w:%s" % format, buf)) as tar:
  67. for entry_abspath, entry in _walk_tree(store, tree):
  68. try:
  69. blob = store[entry.sha]
  70. except KeyError:
  71. # Entry probably refers to a submodule, which we don't yet support.
  72. continue
  73. data = ChunkedBytesIO(blob.chunked)
  74. info = tarfile.TarInfo()
  75. info.name = entry_abspath.decode('ascii') # tarfile only works with ascii.
  76. info.size = blob.raw_length()
  77. info.mode = entry.mode
  78. info.mtime = mtime
  79. tar.addfile(info, data)
  80. yield buf.getvalue()
  81. buf.truncate(0)
  82. buf.seek(0)
  83. yield buf.getvalue()
  84. def _walk_tree(store, tree, root=b''):
  85. """Recursively walk a dulwich Tree, yielding tuples of
  86. (absolute path, TreeEntry) along the way.
  87. """
  88. for entry in tree.iteritems():
  89. entry_abspath = posixpath.join(root, entry.path)
  90. if stat.S_ISDIR(entry.mode):
  91. for _ in _walk_tree(store, store[entry.sha], entry_abspath):
  92. yield _
  93. else:
  94. yield (entry_abspath, entry)