2
0

archive.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. # archive.py -- Creating an archive from a tarball
  2. # Copyright (C) 2015 Jonas Haag <jonas@lophus.org>
  3. # Copyright (C) 2015 Jelmer Vernooij <jelmer@jelmer.uk>
  4. #
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as public by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Generates tarballs for Git trees.
  22. """
  23. import posixpath
  24. import stat
  25. import tarfile
  26. from io import BytesIO
  27. from contextlib import closing
  28. class ChunkedBytesIO(object):
  29. """Turn a list of bytestrings into a file-like object.
  30. This is similar to creating a `BytesIO` from a concatenation of the
  31. bytestring list, but saves memory by NOT creating one giant bytestring first::
  32. BytesIO(b''.join(list_of_bytestrings)) =~= ChunkedBytesIO(list_of_bytestrings)
  33. """
  34. def __init__(self, contents):
  35. self.contents = contents
  36. self.pos = (0, 0)
  37. def read(self, maxbytes=None):
  38. if maxbytes < 0:
  39. maxbytes = float('inf')
  40. buf = []
  41. chunk, cursor = self.pos
  42. while chunk < len(self.contents):
  43. if maxbytes < len(self.contents[chunk]) - cursor:
  44. buf.append(self.contents[chunk][cursor:cursor+maxbytes])
  45. cursor += maxbytes
  46. self.pos = (chunk, cursor)
  47. break
  48. else:
  49. buf.append(self.contents[chunk][cursor:])
  50. maxbytes -= len(self.contents[chunk]) - cursor
  51. chunk += 1
  52. cursor = 0
  53. self.pos = (chunk, cursor)
  54. return b''.join(buf)
  55. def tar_stream(store, tree, mtime, format=''):
  56. """Generate a tar stream for the contents of a Git tree.
  57. Returns a generator that lazily assembles a .tar.gz archive, yielding it in
  58. pieces (bytestrings). To obtain the complete .tar.gz binary file, simply
  59. concatenate these chunks.
  60. :param store: Object store to retrieve objects from
  61. :param tree: Tree object for the tree root
  62. :param mtime: UNIX timestamp that is assigned as the modification time for
  63. all files
  64. :param format: Optional compression format for tarball
  65. :return: Bytestrings
  66. """
  67. buf = BytesIO()
  68. with closing(tarfile.open(None, "w:%s" % format, buf)) as tar:
  69. for entry_abspath, entry in _walk_tree(store, tree):
  70. try:
  71. blob = store[entry.sha]
  72. except KeyError:
  73. # Entry probably refers to a submodule, which we don't yet support.
  74. continue
  75. data = ChunkedBytesIO(blob.chunked)
  76. info = tarfile.TarInfo()
  77. info.name = entry_abspath.decode('ascii') # tarfile only works with ascii.
  78. info.size = blob.raw_length()
  79. info.mode = entry.mode
  80. info.mtime = mtime
  81. tar.addfile(info, data)
  82. yield buf.getvalue()
  83. buf.truncate(0)
  84. buf.seek(0)
  85. yield buf.getvalue()
  86. def _walk_tree(store, tree, root=b''):
  87. """Recursively walk a dulwich Tree, yielding tuples of
  88. (absolute path, TreeEntry) along the way.
  89. """
  90. for entry in tree.iteritems():
  91. entry_abspath = posixpath.join(root, entry.path)
  92. if stat.S_ISDIR(entry.mode):
  93. for _ in _walk_tree(store, store[entry.sha], entry_abspath):
  94. yield _
  95. else:
  96. yield (entry_abspath, entry)