archive.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. # archive.py -- Creating an archive from a tarball
  2. # Copyright (C) 2015 Jonas Haag <jonas@lophus.org>
  3. # Copyright (C) 2015 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Generates tarballs for Git trees.
  22. """
  23. import posixpath
  24. import stat
  25. import tarfile
  26. from io import BytesIO
  27. from contextlib import closing
  28. class ChunkedBytesIO(object):
  29. """Turn a list of bytestrings into a file-like object.
  30. This is similar to creating a `BytesIO` from a concatenation of the
  31. bytestring list, but saves memory by NOT creating one giant bytestring
  32. first::
  33. BytesIO(b''.join(list_of_bytestrings)) =~= ChunkedBytesIO(
  34. list_of_bytestrings)
  35. """
  36. def __init__(self, contents):
  37. self.contents = contents
  38. self.pos = (0, 0)
  39. def read(self, maxbytes=None):
  40. if maxbytes < 0:
  41. maxbytes = float('inf')
  42. buf = []
  43. chunk, cursor = self.pos
  44. while chunk < len(self.contents):
  45. if maxbytes < len(self.contents[chunk]) - cursor:
  46. buf.append(self.contents[chunk][cursor:cursor+maxbytes])
  47. cursor += maxbytes
  48. self.pos = (chunk, cursor)
  49. break
  50. else:
  51. buf.append(self.contents[chunk][cursor:])
  52. maxbytes -= len(self.contents[chunk]) - cursor
  53. chunk += 1
  54. cursor = 0
  55. self.pos = (chunk, cursor)
  56. return b''.join(buf)
  57. def tar_stream(store, tree, mtime, format=''):
  58. """Generate a tar stream for the contents of a Git tree.
  59. Returns a generator that lazily assembles a .tar.gz archive, yielding it in
  60. pieces (bytestrings). To obtain the complete .tar.gz binary file, simply
  61. concatenate these chunks.
  62. :param store: Object store to retrieve objects from
  63. :param tree: Tree object for the tree root
  64. :param mtime: UNIX timestamp that is assigned as the modification time for
  65. all files
  66. :param format: Optional compression format for tarball
  67. :return: Bytestrings
  68. """
  69. buf = BytesIO()
  70. with closing(tarfile.open(None, "w:%s" % format, buf)) as tar:
  71. for entry_abspath, entry in _walk_tree(store, tree):
  72. try:
  73. blob = store[entry.sha]
  74. except KeyError:
  75. # Entry probably refers to a submodule, which we don't yet
  76. # support.
  77. continue
  78. data = ChunkedBytesIO(blob.chunked)
  79. info = tarfile.TarInfo()
  80. # tarfile only works with ascii.
  81. info.name = entry_abspath.decode('ascii')
  82. info.size = blob.raw_length()
  83. info.mode = entry.mode
  84. info.mtime = mtime
  85. tar.addfile(info, data)
  86. yield buf.getvalue()
  87. buf.truncate(0)
  88. buf.seek(0)
  89. yield buf.getvalue()
  90. def _walk_tree(store, tree, root=b''):
  91. """Recursively walk a dulwich Tree, yielding tuples of
  92. (absolute path, TreeEntry) along the way.
  93. """
  94. for entry in tree.iteritems():
  95. entry_abspath = posixpath.join(root, entry.path)
  96. if stat.S_ISDIR(entry.mode):
  97. for _ in _walk_tree(store, store[entry.sha], entry_abspath):
  98. yield _
  99. else:
  100. yield (entry_abspath, entry)