Kaynağa Gözat

Add dulwich.archive module.

Jelmer Vernooij 9 yıl önce
ebeveyn
işleme
234e8a1649

+ 5 - 0
NEWS

@@ -1,5 +1,10 @@
 0.11.3	UNRELEASED
 
+ IMPROVEMENTS
+
+  * Add a `dulwich.archive` module that can create tarballs.
+    Based on code from Jonas Haag in klaus.
+
  BUG FIXES
 
   * Simplify handling of SSH command invocation.

+ 112 - 0
dulwich/archive.py

@@ -0,0 +1,112 @@
+# archive.py -- Creating an archive from a tarball
+# Copyright (C) 2015 Jonas Haag <jonas@lophus.org>
+# Copyright (C) 2015 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# or (at your option) a later version of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""Generates tarballs for Git trees.
+
+"""
+
+import posixpath
+import stat
+import tarfile
+from io import BytesIO
+from contextlib import closing
+
+
+class ListBytesIO(object):
+    """Turn a list of bytestrings into a file-like object.
+
+    This is similar to creating a `BytesIO` from a concatenation of the
+    bytestring list, but saves memory by NOT creating one giant bytestring first::
+
+        BytesIO(b''.join(list_of_bytestrings)) =~= ListBytesIO(list_of_bytestrings)
+    """
+    def __init__(self, contents):
+        self.contents = contents
+        self.pos = (0, 0)
+
+    def read(self, maxbytes=None):
+        if maxbytes < 0:
+            maxbytes = float('inf')
+
+        buf = []
+        chunk, cursor = self.pos
+
+        while chunk < len(self.contents):
+            if maxbytes < len(self.contents[chunk]) - cursor:
+                buf.append(self.contents[chunk][cursor:cursor+maxbytes])
+                cursor += maxbytes
+                self.pos = (chunk, cursor)
+                break
+            else:
+                buf.append(self.contents[chunk][cursor:])
+                maxbytes -= len(self.contents[chunk]) - cursor
+                chunk += 1
+                cursor = 0
+                self.pos = (chunk, cursor)
+        return b''.join(buf)
+
+
+def tar_stream(store, tree, mtime, format=''):
+    """Generate a tar stream for the contents of a Git tree.
+
+    Returns a generator that lazily assembles a .tar.gz archive, yielding it in
+    pieces (bytestrings). To obtain the complete .tar.gz binary file, simply
+    concatenate these chunks.
+
+    :param store: Object store to retrieve objects from
+    :param tree: Tree object for the tree root
+    :param mtime: UNIX timestamp that is assigned as the modification time for
+        all files
+    :param format: Optional compression format for tarball
+    :return: Bytestrings
+    """
+    buf = BytesIO()
+    with closing(tarfile.open(None, "w:%s" % format, buf)) as tar:
+        for entry_abspath, entry in _walk_tree(store, tree):
+            try:
+                blob = store[entry.sha]
+            except KeyError:
+                # Entry probably refers to a submodule, which we don't yet support.
+                continue
+            data = ListBytesIO(blob.chunked)
+
+            info = tarfile.TarInfo()
+            info.name = entry_abspath
+            info.size = blob.raw_length()
+            info.mode = entry.mode
+            info.mtime = mtime
+
+            tar.addfile(info, data)
+            yield buf.getvalue()
+            buf.truncate(0)
+            buf.seek(0)
+    yield buf.getvalue()
+
+
+def _walk_tree(store, tree, root=''):
+    """Recursively walk a dulwich Tree, yielding tuples of
+    (absolute path, TreeEntry) along the way.
+    """
+    for entry in tree.iteritems():
+        entry_abspath = posixpath.join(root, entry.path)
+        if stat.S_ISDIR(entry.mode):
+            for _ in _walk_tree(store, store[entry.sha], entry_abspath):
+                yield _
+        else:
+            yield (entry_abspath, entry)

+ 11 - 10
dulwich/porcelain.py

@@ -57,9 +57,11 @@ import os
 import sys
 import time
 
+from dulwich.archive import (
+    tar_stream,
+    )
 from dulwich.client import (
     get_transport_and_path,
-    SubprocessGitClient,
     )
 from dulwich.errors import (
     SendPackError,
@@ -126,25 +128,24 @@ def open_repo_closing(path_or_repo):
     return closing(Repo(path_or_repo))
 
 
-def archive(path, committish=None, outstream=sys.stdout,
+def archive(repo, committish=None, outstream=sys.stdout,
             errstream=sys.stderr):
     """Create an archive.
 
-    :param path: Path of repository for which to generate an archive.
+    :param repo: Path of repository for which to generate an archive.
     :param committish: Commit SHA1 or ref to use
     :param outstream: Output stream (defaults to stdout)
     :param errstream: Error stream (defaults to stderr)
     """
 
-    client = SubprocessGitClient()
     if committish is None:
         committish = "HEAD"
-    if not isinstance(path, bytes):
-        path = path.encode(sys.getfilesystemencoding())
-    # TODO(jelmer): This invokes C git; this introduces a dependency.
-    # Instead, dulwich should have its own archiver implementation.
-    client.archive(path, committish, outstream.write, errstream.write,
-                   errstream.write)
+    with open_repo_closing(repo) as repo_obj:
+        c = repo_obj[committish]
+        tree = c.tree
+        for chunk in tar_stream(repo_obj.object_store,
+                repo_obj.object_store[c.tree], c.commit_time):
+            outstream.write(chunk)
 
 
 def update_server_info(repo="."):

+ 1 - 0
dulwich/tests/__init__.py

@@ -115,6 +115,7 @@ class BlackboxTestCase(TestCase):
 
 def self_test_suite():
     names = [
+        'archive',
         'blackbox',
         'client',
         'config',

+ 63 - 0
dulwich/tests/test_archive.py

@@ -0,0 +1,63 @@
+# test_archive.py -- tests for archive
+# Copyright (C) 2015 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License or (at your option) a later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+# MA  02110-1301, USA.
+
+"""Tests for archive support."""
+
+from io import BytesIO
+import tarfile
+
+from dulwich.archive import tar_stream
+from dulwich.object_store import (
+    MemoryObjectStore,
+    )
+from dulwich.objects import (
+    Blob,
+    Tree,
+    )
+from dulwich.tests import (
+    TestCase,
+    )
+from dulwich.tests.utils import (
+    build_commit_graph,
+    )
+
+
+class ArchiveTests(TestCase):
+
+    def test_empty(self):
+        store = MemoryObjectStore()
+        c1, c2, c3 = build_commit_graph(store, [[1], [2, 1], [3, 1, 2]])
+        tree = store[c3.tree]
+        stream = ''.join(tar_stream(store, tree, 10))
+        out = BytesIO(stream)
+        tf = tarfile.TarFile(fileobj=out)
+        self.addCleanup(tf.close)
+        self.assertEqual([], tf.getnames())
+
+    def test_simple(self):
+        store = MemoryObjectStore()
+        b1 = Blob.from_string("somedata")
+        store.add_object(b1)
+        t1 = Tree()
+        t1.add(b"somename", 0o100644, b1.id)
+        store.add_object(t1)
+        stream = ''.join(tar_stream(store, t1, 10))
+        out = BytesIO(stream)
+        tf = tarfile.TarFile(fileobj=out)
+        self.addCleanup(tf.close)
+        self.assertEqual(["somename"], tf.getnames())

+ 0 - 1
dulwich/tests/test_client.py

@@ -31,7 +31,6 @@ from dulwich.client import (
     LocalGitClient,
     TraditionalGitClient,
     TCPGitClient,
-    SubprocessGitClient,
     SSHGitClient,
     HttpGitClient,
     ReportStatusParser,

+ 0 - 3
dulwich/tests/test_porcelain.py

@@ -40,7 +40,6 @@ from dulwich.repo import Repo
 from dulwich.tests import (
     TestCase,
     )
-from dulwich.tests.compat.utils import require_git_version
 from dulwich.tests.utils import (
     build_commit_graph,
     make_object,
@@ -64,8 +63,6 @@ class ArchiveTests(PorcelainTestCase):
     """Tests for the archive command."""
 
     def test_simple(self):
-        # TODO(jelmer): Remove this once dulwich has its own implementation of archive.
-        require_git_version((1, 5, 0))
         c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1], [3, 1, 2]])
         self.repo.refs[b"refs/heads/master"] = c3.id
         out = BytesIO()