Sfoglia il codice sorgente

Add a GcsObjectStore.

Jelmer Vernooij 4 anni fa
parent
commit
e59f22c1d3
6 ha cambiato i file con 199 aggiunte e 29 eliminazioni
  1. 3 0
      NEWS
  2. 1 1
      dulwich/client.py
  3. 0 0
      dulwich/cloud/__init__.py
  4. 80 0
      dulwich/cloud/gcs.py
  5. 82 0
      dulwich/object_store.py
  6. 33 28
      dulwich/repo.py

+ 3 - 0
NEWS

@@ -1,5 +1,8 @@
 0.20.21	UNRELEASED
 
+ * Add basic support for a GcsObjectStore that stores
+   pack files in gcs. (Jelmer Vernooij)
+
  * In porcelain.push, default to local active branch.
    (Jelmer Vernooij, #846)
 

+ 1 - 1
dulwich/client.py

@@ -873,7 +873,7 @@ class TraditionalGitClient(GitClient):
         self._remote_path_encoding = path_encoding
         super(TraditionalGitClient, self).__init__(**kwargs)
 
-    def _connect(self, cmd, path):
+    async def _connect(self, cmd, path):
         """Create a connection to the server.
 
         This method is abstract - concrete implementations should

+ 0 - 0
dulwich/cloud/__init__.py


+ 80 - 0
dulwich/cloud/gcs.py

@@ -0,0 +1,80 @@
+# object_store.py -- Object store for git objects
+# Copyright (C) 2021 Jelmer Vernooij <jelmer@jelmer.uk>
+#
+# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
+# General Public License as public by the Free Software Foundation; version 2.0
+# or (at your option) any later version. You can redistribute it and/or
+# modify it under the terms of either of these two licenses.
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# You should have received a copy of the licenses; if not, see
+# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
+# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
+# License, Version 2.0.
+#
+
+
+"""Storage of repositories on GCS."""
+
+from io import BytesIO
+import posixpath
+import tempfile
+
+from ..object_store import BucketBasedObjectStore
+from ..pack import PackData, Pack, load_pack_index_file
+
+
+class GcsObjectStore(BucketBasedObjectStore):
+
+    def __init__(self, bucket, subpath=''):
+        super(GcsObjectStore, self).__init__()
+        self.bucket = bucket
+        self.subpath = subpath
+
+    def __repr__(self):
+        return "%s(%r, subpath=%r)" % (
+            type(self).__name__, self.bucket, self.subpath)
+
+    def _remove_pack(self, name):
+        self.bucket.delete_blobs([
+            posixpath.join(self.subpath, name) + '.' + ext
+            for ext in ['pack', 'idx']])
+
+    def _iter_pack_names(self):
+        packs = {}
+        for blob in self.bucket.list_blobs(prefix=self.subpath):
+            name, ext = posixpath.splitext(posixpath.basename(blob.name))
+            packs.setdefault(name, set()).add(ext)
+        for name, exts in packs.items():
+            if exts == set(['.pack', '.idx']):
+                yield name
+
+    def _load_pack_data(self, name):
+        b = self.bucket.blob(posixpath.join(self.subpath, name + '.pack'))
+        f = tempfile.SpooledTemporaryFile()
+        b.download_to_file(f)
+        f.seek(0)
+        return PackData(name + '.pack', f)
+
+    def _load_pack_index(self, name):
+        b = self.bucket.blob(posixpath.join(self.subpath, name + '.idx'))
+        f = tempfile.SpooledTemporaryFile()
+        b.download_to_file(f)
+        f.seek(0)
+        return load_pack_index_file(name + '.idx', f)
+
+    def _get_pack(self, name):
+        return Pack.from_lazy_objects(
+            lambda: self._load_pack_data(name),
+            lambda: self._load_pack_index(name))
+
+    def _upload_pack(self, basename, pack_file, index_file):
+        idxblob = self.bucket.blob(posixpath.join(self.subpath, basename + '.idx'))
+        datablob = self.bucket.blob(posixpath.join(self.subpath, basename + '.pack'))
+        idxblob.upload_from_file(index_file)
+        datablob.upload_from_file(pack_file)

+ 82 - 0
dulwich/object_store.py

@@ -53,6 +53,7 @@ from dulwich.pack import (
     PackData,
     PackInflater,
     PackFileDisappeared,
+    load_pack_index_file,
     iter_sha1,
     pack_objects_to_data,
     write_pack_header,
@@ -1471,3 +1472,84 @@ def read_packs_file(f):
         if kind != b"P":
             continue
         yield os.fsdecode(name)
+
+
+class BucketBasedObjectStore(PackBasedObjectStore):
+    """Object store implementation that uses a bucket store like S3 as backend.
+    """
+
+    def _iter_loose_objects(self):
+        """Iterate over the SHAs of all loose objects."""
+        return iter([])
+
+    def _get_loose_object(self, sha):
+        return None
+
+    def _remove_loose_object(self, sha):
+        # Doesn't exist..
+        pass
+
+    def _remove_pack(self, name):
+        raise NotImplementedError(self._remove_pack)
+
+    def _iter_pack_names(self):
+        raise NotImplementedError(self._iter_pack_names)
+
+    def _get_pack(self, name):
+        raise NotImplementedError(self._get_pack)
+
+    def _update_pack_cache(self):
+        pack_files = set(self._iter_pack_names())
+
+        # Open newly appeared pack files
+        new_packs = []
+        for f in pack_files:
+            if f not in self._pack_cache:
+                pack = self._get_pack(f)
+                new_packs.append(pack)
+                self._pack_cache[f] = pack
+        # Remove disappeared pack files
+        for f in set(self._pack_cache) - pack_files:
+            self._pack_cache.pop(f).close()
+        return new_packs
+
+    def _upload_pack(self, basename, pack_file, index_file):
+        raise NotImplementedError
+
+    def add_pack(self):
+        """Add a new pack to this object store.
+
+        Returns: Fileobject to write to, a commit function to
+            call when the pack is finished and an abort
+            function.
+        """
+        import tempfile
+
+        pf = tempfile.SpooledTemporaryFile()
+        def commit():
+            if pf.tell() == 0:
+                pf.close()
+                return None
+
+            pf.seek(0)
+            p = PackData(pf.name, pf)
+            entries = p.sorted_entries()
+            basename = iter_sha1(entry[0] for entry in entries).decode('ascii')
+            idxf = tempfile.SpooledTemporaryFile()
+            checksum = p.get_stored_checksum()
+            write_pack_index_v2(idxf, entries, checksum)
+            idxf.seek(0)
+            idx = load_pack_index_file(basename + '.idx', idxf)
+            for pack in self.packs:
+                if pack.get_stored_checksum() == p.get_stored_checksum():
+                    p.close()
+                    idx.close()
+                    return pack
+            pf.seek(0)
+            idxf.seek(0)
+            self._upload_pack(basename, pf, idxf)
+            final_pack = Pack.from_objects(p, idx)
+            self._add_cached_pack(basename, final_pack)
+            return final_pack
+
+        return pf, commit, pf.close

+ 33 - 28
dulwich/repo.py

@@ -1034,26 +1034,30 @@ class Repo(BaseRepo):
     To create a new repository, use the Repo.init class method.
     """
 
-    def __init__(self, root):
+    def __init__(self, root, object_store=None, bare=None):
         hidden_path = os.path.join(root, CONTROLDIR)
-        if os.path.isdir(os.path.join(hidden_path, OBJECTDIR)):
-            self.bare = False
-            self._controldir = hidden_path
-        elif os.path.isdir(os.path.join(root, OBJECTDIR)) and os.path.isdir(
-            os.path.join(root, REFSDIR)
-        ):
-            self.bare = True
-            self._controldir = root
-        elif os.path.isfile(hidden_path):
-            self.bare = False
-            with open(hidden_path, "r") as f:
-                path = read_gitfile(f)
-            self.bare = False
-            self._controldir = os.path.join(root, path)
+        if bare is None:
+            if os.path.isdir(os.path.join(hidden_path, OBJECTDIR)):
+                bare = False
+            elif (os.path.isdir(os.path.join(root, OBJECTDIR)) and
+                    os.path.isdir(os.path.join(root, REFSDIR))):
+                bare = True
+            else:
+                raise NotGitRepository(
+                    "No git repository was found at %(path)s" % dict(path=root)
+                )
+
+        self.bare = bare
+        if bare is False:
+            if os.path.isfile(hidden_path):
+                with open(hidden_path, "r") as f:
+                    path = read_gitfile(f)
+                self.bare = False
+                self._controldir = os.path.join(root, path)
+            else:
+                self._controldir = hidden_path
         else:
-            raise NotGitRepository(
-                "No git repository was found at %(path)s" % dict(path=root)
-            )
+            self._controldir = root
         commondir = self.get_named_file(COMMONDIR)
         if commondir is not None:
             with commondir:
@@ -1071,9 +1075,10 @@ class Repo(BaseRepo):
             format_version = 0
         if format_version != 0:
             raise UnsupportedVersion(format_version)
-        object_store = DiskObjectStore.from_config(
-            os.path.join(self.commondir(), OBJECTDIR), config
-        )
+        if object_store is None:
+            object_store = DiskObjectStore.from_config(
+                os.path.join(self.commondir(), OBJECTDIR), config
+            )
         refs = DiskRefsContainer(
             self.commondir(), self._controldir, logger=self._write_reflog
         )
@@ -1428,11 +1433,12 @@ class Repo(BaseRepo):
         self._put_named_file("description", description)
 
     @classmethod
-    def _init_maybe_bare(cls, path, bare):
+    def _init_maybe_bare(cls, path, bare, object_store=None):
         for d in BASE_DIRECTORIES:
             os.mkdir(os.path.join(path, *d))
-        DiskObjectStore.init(os.path.join(path, OBJECTDIR))
-        ret = cls(path)
+        if object_store is None:
+            object_store = DiskObjectStore.init(os.path.join(path, OBJECTDIR))
+        ret = cls(path, bare=bare, object_store=object_store)
         ret.refs.set_symbolic_ref(b"HEAD", DEFAULT_REF)
         ret._init_files(bare)
         return ret
@@ -1451,8 +1457,7 @@ class Repo(BaseRepo):
         controldir = os.path.join(path, CONTROLDIR)
         os.mkdir(controldir)
         _set_filesystem_hidden(controldir)
-        cls._init_maybe_bare(controldir, False)
-        return cls(path)
+        return cls._init_maybe_bare(controldir, False)
 
     @classmethod
     def _init_new_working_directory(cls, path, main_repo, identifier=None, mkdir=False):
@@ -1493,7 +1498,7 @@ class Repo(BaseRepo):
         return r
 
     @classmethod
-    def init_bare(cls, path, mkdir=False):
+    def init_bare(cls, path, mkdir=False, object_store=None):
         """Create a new bare repository.
 
         ``path`` should already exist and be an empty directory.
@@ -1504,7 +1509,7 @@ class Repo(BaseRepo):
         """
         if mkdir:
             os.mkdir(path)
-        return cls._init_maybe_bare(path, True)
+        return cls._init_maybe_bare(path, True, object_store=object_store)
 
     create = init_bare