gcs.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. # object_store.py -- Object store for git objects
  2. # Copyright (C) 2021 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Storage of repositories on GCS."""
  22. __all__ = [
  23. "GcsObjectStore",
  24. ]
  25. import posixpath
  26. import tempfile
  27. from collections.abc import Iterator
  28. from typing import TYPE_CHECKING, BinaryIO
  29. from ..object_store import BucketBasedObjectStore
  30. from ..pack import (
  31. PACK_SPOOL_FILE_MAX_SIZE,
  32. Pack,
  33. PackData,
  34. PackIndex,
  35. load_pack_index_file,
  36. )
  37. if TYPE_CHECKING:
  38. from google.cloud.storage import Bucket
  39. # TODO(jelmer): For performance, read ranges?
  40. class GcsObjectStore(BucketBasedObjectStore):
  41. """Object store implementation using Google Cloud Storage."""
  42. def __init__(self, bucket: "Bucket", subpath: str = "") -> None:
  43. """Initialize GCS object store.
  44. Args:
  45. bucket: GCS bucket instance
  46. subpath: Optional subpath within the bucket
  47. """
  48. super().__init__()
  49. self.bucket = bucket
  50. self.subpath = subpath
  51. def __repr__(self) -> str:
  52. """Return string representation of GcsObjectStore."""
  53. return f"{type(self).__name__}({self.bucket!r}, subpath={self.subpath!r})"
  54. def _remove_pack_by_name(self, name: str) -> None:
  55. self.bucket.delete_blobs(
  56. [posixpath.join(self.subpath, name) + "." + ext for ext in ["pack", "idx"]]
  57. )
  58. def _iter_pack_names(self) -> Iterator[str]:
  59. packs: dict[str, set[str]] = {}
  60. for blob in self.bucket.list_blobs(prefix=self.subpath):
  61. name, ext = posixpath.splitext(posixpath.basename(blob.name))
  62. packs.setdefault(name, set()).add(ext)
  63. for name, exts in packs.items():
  64. if exts == {".pack", ".idx"}:
  65. yield name
  66. def _load_pack_data(self, name: str) -> PackData:
  67. b = self.bucket.blob(posixpath.join(self.subpath, name + ".pack"))
  68. from typing import cast
  69. from ..file import _GitFile
  70. with tempfile.SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE) as f:
  71. b.download_to_file(f)
  72. f.seek(0)
  73. return PackData(name + ".pack", cast(_GitFile, f))
  74. def _load_pack_index(self, name: str) -> PackIndex:
  75. b = self.bucket.blob(posixpath.join(self.subpath, name + ".idx"))
  76. with tempfile.SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE) as f:
  77. b.download_to_file(f)
  78. f.seek(0)
  79. return load_pack_index_file(name + ".idx", f)
  80. def _get_pack(self, name: str) -> Pack:
  81. return Pack.from_lazy_objects(
  82. lambda: self._load_pack_data(name), lambda: self._load_pack_index(name)
  83. )
  84. def _upload_pack(
  85. self, basename: str, pack_file: BinaryIO, index_file: BinaryIO
  86. ) -> None:
  87. idxblob = self.bucket.blob(posixpath.join(self.subpath, basename + ".idx"))
  88. datablob = self.bucket.blob(posixpath.join(self.subpath, basename + ".pack"))
  89. idxblob.upload_from_file(index_file)
  90. datablob.upload_from_file(pack_file)