file.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. # file.py -- Safe access to git files
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Safe access to git files."""
  22. __all__ = [
  23. "FileLocked",
  24. "GitFile",
  25. "ensure_dir_exists",
  26. ]
  27. import os
  28. import sys
  29. import warnings
  30. from collections.abc import Iterable, Iterator
  31. from types import TracebackType
  32. from typing import IO, Any, ClassVar, Literal, overload
  33. from ._typing import Buffer
  34. def ensure_dir_exists(
  35. dirname: str | bytes | os.PathLike[str] | os.PathLike[bytes],
  36. ) -> None:
  37. """Ensure a directory exists, creating if necessary."""
  38. try:
  39. os.makedirs(dirname)
  40. except FileExistsError:
  41. pass
  42. def _fancy_rename(oldname: str | bytes, newname: str | bytes) -> None:
  43. """Rename file with temporary backup file to rollback if rename fails."""
  44. if not os.path.exists(newname):
  45. os.rename(oldname, newname)
  46. return
  47. # Defer the tempfile import since it pulls in a lot of other things.
  48. import tempfile
  49. # destination file exists
  50. (fd, tmpfile) = tempfile.mkstemp(".tmp", prefix=str(oldname), dir=".")
  51. os.close(fd)
  52. os.remove(tmpfile)
  53. os.rename(newname, tmpfile)
  54. try:
  55. os.rename(oldname, newname)
  56. except OSError:
  57. os.rename(tmpfile, newname)
  58. raise
  59. os.remove(tmpfile)
  60. @overload
  61. def GitFile(
  62. filename: str | bytes | os.PathLike[str] | os.PathLike[bytes],
  63. mode: Literal["wb"],
  64. bufsize: int = -1,
  65. mask: int = 0o644,
  66. fsync: bool = True,
  67. ) -> "_GitFile": ...
  68. @overload
  69. def GitFile(
  70. filename: str | bytes | os.PathLike[str] | os.PathLike[bytes],
  71. mode: Literal["rb"] = "rb",
  72. bufsize: int = -1,
  73. mask: int = 0o644,
  74. fsync: bool = True,
  75. ) -> IO[bytes]: ...
  76. @overload
  77. def GitFile(
  78. filename: str | bytes | os.PathLike[str] | os.PathLike[bytes],
  79. mode: str = "rb",
  80. bufsize: int = -1,
  81. mask: int = 0o644,
  82. fsync: bool = True,
  83. ) -> "IO[bytes] | _GitFile": ...
  84. def GitFile(
  85. filename: str | bytes | os.PathLike[str] | os.PathLike[bytes],
  86. mode: str = "rb",
  87. bufsize: int = -1,
  88. mask: int = 0o644,
  89. fsync: bool = True,
  90. ) -> "IO[bytes] | _GitFile":
  91. """Create a file object that obeys the git file locking protocol.
  92. Returns: a builtin file object or a _GitFile object
  93. Note: See _GitFile for a description of the file locking protocol.
  94. Only read-only and write-only (binary) modes are supported; r+, w+, and a
  95. are not. To read and write from the same file, you can take advantage of
  96. the fact that opening a file for write does not actually open the file you
  97. request.
  98. The default file mask makes any created files user-writable and
  99. world-readable.
  100. Args:
  101. filename: Path to the file
  102. mode: File mode (only 'rb' and 'wb' are supported)
  103. bufsize: Buffer size for file operations
  104. mask: File mask for created files
  105. fsync: Whether to call fsync() before closing (default: True)
  106. """
  107. if "a" in mode:
  108. raise OSError("append mode not supported for Git files")
  109. if "+" in mode:
  110. raise OSError("read/write mode not supported for Git files")
  111. if "b" not in mode:
  112. raise OSError("text mode not supported for Git files")
  113. if "w" in mode:
  114. return _GitFile(filename, mode, bufsize, mask, fsync)
  115. else:
  116. return open(filename, mode, bufsize)
  117. class FileLocked(Exception):
  118. """File is already locked."""
  119. def __init__(
  120. self,
  121. filename: str | bytes,
  122. lockfilename: str | bytes,
  123. ) -> None:
  124. """Initialize FileLocked.
  125. Args:
  126. filename: Name of the file that is locked
  127. lockfilename: Name of the lock file
  128. """
  129. self.filename = filename
  130. self.lockfilename = lockfilename
  131. super().__init__(filename, lockfilename)
  132. class _GitFile(IO[bytes]):
  133. """File that follows the git locking protocol for writes.
  134. All writes to a file foo will be written into foo.lock in the same
  135. directory, and the lockfile will be renamed to overwrite the original file
  136. on close.
  137. Note: You *must* call close() or abort() on a _GitFile for the lock to be
  138. released. Typically this will happen in a finally block.
  139. """
  140. _file: IO[bytes]
  141. _filename: str | bytes
  142. _lockfilename: str | bytes
  143. _closed: bool
  144. PROXY_PROPERTIES: ClassVar[set[str]] = {
  145. "encoding",
  146. "errors",
  147. "mode",
  148. "name",
  149. "newlines",
  150. "softspace",
  151. }
  152. PROXY_METHODS: ClassVar[set[str]] = {
  153. "__iter__",
  154. "__next__",
  155. "flush",
  156. "fileno",
  157. "isatty",
  158. "read",
  159. "readable",
  160. "readline",
  161. "readlines",
  162. "seek",
  163. "seekable",
  164. "tell",
  165. "truncate",
  166. "writable",
  167. "write",
  168. "writelines",
  169. }
  170. def __init__(
  171. self,
  172. filename: str | bytes | os.PathLike[str] | os.PathLike[bytes],
  173. mode: str,
  174. bufsize: int,
  175. mask: int,
  176. fsync: bool = True,
  177. ) -> None:
  178. # Convert PathLike to str/bytes for our internal use
  179. self._filename: str | bytes = os.fspath(filename)
  180. self._fsync = fsync
  181. if isinstance(self._filename, bytes):
  182. self._lockfilename: str | bytes = self._filename + b".lock"
  183. else:
  184. self._lockfilename = self._filename + ".lock"
  185. try:
  186. fd = os.open(
  187. self._lockfilename,
  188. os.O_RDWR | os.O_CREAT | os.O_EXCL | getattr(os, "O_BINARY", 0),
  189. mask,
  190. )
  191. except FileExistsError as exc:
  192. raise FileLocked(self._filename, self._lockfilename) from exc
  193. self._file = os.fdopen(fd, mode, bufsize)
  194. self._closed = False
  195. def __iter__(self) -> Iterator[bytes]:
  196. """Iterate over lines in the file."""
  197. return iter(self._file)
  198. def abort(self) -> None:
  199. """Close and discard the lockfile without overwriting the target.
  200. If the file is already closed, this is a no-op.
  201. """
  202. if self._closed:
  203. return
  204. self._file.close()
  205. try:
  206. os.remove(self._lockfilename)
  207. self._closed = True
  208. except FileNotFoundError:
  209. # The file may have been removed already, which is ok.
  210. self._closed = True
  211. def close(self) -> None:
  212. """Close this file, saving the lockfile over the original.
  213. Note: If this method fails, it will attempt to delete the lockfile.
  214. However, it is not guaranteed to do so (e.g. if a filesystem
  215. becomes suddenly read-only), which will prevent future writes to
  216. this file until the lockfile is removed manually.
  217. Raises:
  218. OSError: if the original file could not be overwritten. The
  219. lock file is still closed, so further attempts to write to the same
  220. file object will raise ValueError.
  221. """
  222. if self._closed:
  223. return
  224. self._file.flush()
  225. if self._fsync:
  226. os.fsync(self._file.fileno())
  227. self._file.close()
  228. try:
  229. if getattr(os, "replace", None) is not None:
  230. os.replace(self._lockfilename, self._filename)
  231. else:
  232. if sys.platform != "win32":
  233. os.rename(self._lockfilename, self._filename)
  234. else:
  235. # Windows versions prior to Vista don't support atomic
  236. # renames
  237. _fancy_rename(self._lockfilename, self._filename)
  238. finally:
  239. self.abort()
  240. def __del__(self) -> None:
  241. if not getattr(self, "_closed", True):
  242. warnings.warn(f"unclosed {self!r}", ResourceWarning, stacklevel=2)
  243. self.abort()
  244. def __enter__(self) -> "_GitFile":
  245. return self
  246. def __exit__(
  247. self,
  248. exc_type: type[BaseException] | None,
  249. exc_val: BaseException | None,
  250. exc_tb: TracebackType | None,
  251. ) -> None:
  252. if exc_type is not None:
  253. self.abort()
  254. else:
  255. self.close()
  256. def __fspath__(self) -> str | bytes:
  257. """Return the file path for os.fspath() compatibility."""
  258. return self._filename
  259. @property
  260. def closed(self) -> bool:
  261. """Return whether the file is closed."""
  262. return self._closed
  263. def __getattr__(self, name: str) -> Any: # noqa: ANN401
  264. """Proxy property calls to the underlying file."""
  265. if name in self.PROXY_PROPERTIES:
  266. return getattr(self._file, name)
  267. raise AttributeError(name)
  268. # Implement IO[bytes] methods by delegating to the underlying file
  269. def read(self, size: int = -1) -> bytes:
  270. return self._file.read(size)
  271. # TODO: Remove type: ignore when Python 3.10 support is dropped (Oct 2026)
  272. # Python 3.10 has issues with IO[bytes] overload signatures
  273. def write(self, data: Buffer, /) -> int: # type: ignore[override,unused-ignore]
  274. return self._file.write(data)
  275. def readline(self, size: int = -1) -> bytes:
  276. return self._file.readline(size)
  277. def readlines(self, hint: int = -1) -> list[bytes]:
  278. return self._file.readlines(hint)
  279. # TODO: Remove type: ignore when Python 3.10 support is dropped (Oct 2026)
  280. # Python 3.10 has issues with IO[bytes] overload signatures
  281. def writelines(self, lines: Iterable[Buffer], /) -> None: # type: ignore[override,unused-ignore]
  282. return self._file.writelines(lines)
  283. def seek(self, offset: int, whence: int = 0) -> int:
  284. return self._file.seek(offset, whence)
  285. def tell(self) -> int:
  286. return self._file.tell()
  287. def flush(self) -> None:
  288. return self._file.flush()
  289. def truncate(self, size: int | None = None) -> int:
  290. return self._file.truncate(size)
  291. def fileno(self) -> int:
  292. return self._file.fileno()
  293. def isatty(self) -> bool:
  294. return self._file.isatty()
  295. def readable(self) -> bool:
  296. return self._file.readable()
  297. def writable(self) -> bool:
  298. return self._file.writable()
  299. def seekable(self) -> bool:
  300. return self._file.seekable()
  301. def __next__(self) -> bytes:
  302. return next(iter(self._file))