file.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. # file.py -- Safe access to git files
  2. # Copyright (C) 2010 Google, Inc.
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Safe access to git files."""
  22. import os
  23. import sys
  24. import warnings
  25. from collections.abc import Iterable, Iterator
  26. from types import TracebackType
  27. from typing import IO, Any, ClassVar, Literal, Optional, Union, overload
  28. if sys.version_info >= (3, 12):
  29. from collections.abc import Buffer
  30. else:
  31. Buffer = Union[bytes, bytearray, memoryview]
  32. def ensure_dir_exists(
  33. dirname: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
  34. ) -> None:
  35. """Ensure a directory exists, creating if necessary."""
  36. try:
  37. os.makedirs(dirname)
  38. except FileExistsError:
  39. pass
  40. def _fancy_rename(oldname: Union[str, bytes], newname: Union[str, bytes]) -> None:
  41. """Rename file with temporary backup file to rollback if rename fails."""
  42. if not os.path.exists(newname):
  43. os.rename(oldname, newname)
  44. return
  45. # Defer the tempfile import since it pulls in a lot of other things.
  46. import tempfile
  47. # destination file exists
  48. (fd, tmpfile) = tempfile.mkstemp(".tmp", prefix=str(oldname), dir=".")
  49. os.close(fd)
  50. os.remove(tmpfile)
  51. os.rename(newname, tmpfile)
  52. try:
  53. os.rename(oldname, newname)
  54. except OSError:
  55. os.rename(tmpfile, newname)
  56. raise
  57. os.remove(tmpfile)
  58. @overload
  59. def GitFile(
  60. filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
  61. mode: Literal["wb"],
  62. bufsize: int = -1,
  63. mask: int = 0o644,
  64. fsync: bool = True,
  65. ) -> "_GitFile": ...
  66. @overload
  67. def GitFile(
  68. filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
  69. mode: Literal["rb"] = "rb",
  70. bufsize: int = -1,
  71. mask: int = 0o644,
  72. fsync: bool = True,
  73. ) -> IO[bytes]: ...
  74. @overload
  75. def GitFile(
  76. filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
  77. mode: str = "rb",
  78. bufsize: int = -1,
  79. mask: int = 0o644,
  80. fsync: bool = True,
  81. ) -> Union[IO[bytes], "_GitFile"]: ...
  82. def GitFile(
  83. filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
  84. mode: str = "rb",
  85. bufsize: int = -1,
  86. mask: int = 0o644,
  87. fsync: bool = True,
  88. ) -> Union[IO[bytes], "_GitFile"]:
  89. """Create a file object that obeys the git file locking protocol.
  90. Returns: a builtin file object or a _GitFile object
  91. Note: See _GitFile for a description of the file locking protocol.
  92. Only read-only and write-only (binary) modes are supported; r+, w+, and a
  93. are not. To read and write from the same file, you can take advantage of
  94. the fact that opening a file for write does not actually open the file you
  95. request.
  96. The default file mask makes any created files user-writable and
  97. world-readable.
  98. Args:
  99. filename: Path to the file
  100. mode: File mode (only 'rb' and 'wb' are supported)
  101. bufsize: Buffer size for file operations
  102. mask: File mask for created files
  103. fsync: Whether to call fsync() before closing (default: True)
  104. """
  105. if "a" in mode:
  106. raise OSError("append mode not supported for Git files")
  107. if "+" in mode:
  108. raise OSError("read/write mode not supported for Git files")
  109. if "b" not in mode:
  110. raise OSError("text mode not supported for Git files")
  111. if "w" in mode:
  112. return _GitFile(filename, mode, bufsize, mask, fsync)
  113. else:
  114. return open(filename, mode, bufsize)
  115. class FileLocked(Exception):
  116. """File is already locked."""
  117. def __init__(
  118. self,
  119. filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
  120. lockfilename: Union[str, bytes],
  121. ) -> None:
  122. """Initialize FileLocked.
  123. Args:
  124. filename: Name of the file that is locked
  125. lockfilename: Name of the lock file
  126. """
  127. self.filename = filename
  128. self.lockfilename = lockfilename
  129. super().__init__(filename, lockfilename)
  130. class _GitFile(IO[bytes]):
  131. """File that follows the git locking protocol for writes.
  132. All writes to a file foo will be written into foo.lock in the same
  133. directory, and the lockfile will be renamed to overwrite the original file
  134. on close.
  135. Note: You *must* call close() or abort() on a _GitFile for the lock to be
  136. released. Typically this will happen in a finally block.
  137. """
  138. _file: IO[bytes]
  139. _filename: Union[str, bytes]
  140. _lockfilename: Union[str, bytes]
  141. _closed: bool
  142. PROXY_PROPERTIES: ClassVar[set[str]] = {
  143. "encoding",
  144. "errors",
  145. "mode",
  146. "name",
  147. "newlines",
  148. "softspace",
  149. }
  150. PROXY_METHODS: ClassVar[set[str]] = {
  151. "__iter__",
  152. "__next__",
  153. "flush",
  154. "fileno",
  155. "isatty",
  156. "read",
  157. "readable",
  158. "readline",
  159. "readlines",
  160. "seek",
  161. "seekable",
  162. "tell",
  163. "truncate",
  164. "writable",
  165. "write",
  166. "writelines",
  167. }
  168. def __init__(
  169. self,
  170. filename: Union[str, bytes, os.PathLike[str], os.PathLike[bytes]],
  171. mode: str,
  172. bufsize: int,
  173. mask: int,
  174. fsync: bool = True,
  175. ) -> None:
  176. # Convert PathLike to str/bytes for our internal use
  177. self._filename: Union[str, bytes] = os.fspath(filename)
  178. self._fsync = fsync
  179. if isinstance(self._filename, bytes):
  180. self._lockfilename: Union[str, bytes] = self._filename + b".lock"
  181. else:
  182. self._lockfilename = self._filename + ".lock"
  183. try:
  184. fd = os.open(
  185. self._lockfilename,
  186. os.O_RDWR | os.O_CREAT | os.O_EXCL | getattr(os, "O_BINARY", 0),
  187. mask,
  188. )
  189. except FileExistsError as exc:
  190. raise FileLocked(filename, self._lockfilename) from exc
  191. self._file = os.fdopen(fd, mode, bufsize)
  192. self._closed = False
  193. def __iter__(self) -> Iterator[bytes]:
  194. """Iterate over lines in the file."""
  195. return iter(self._file)
  196. def abort(self) -> None:
  197. """Close and discard the lockfile without overwriting the target.
  198. If the file is already closed, this is a no-op.
  199. """
  200. if self._closed:
  201. return
  202. self._file.close()
  203. try:
  204. os.remove(self._lockfilename)
  205. self._closed = True
  206. except FileNotFoundError:
  207. # The file may have been removed already, which is ok.
  208. self._closed = True
  209. def close(self) -> None:
  210. """Close this file, saving the lockfile over the original.
  211. Note: If this method fails, it will attempt to delete the lockfile.
  212. However, it is not guaranteed to do so (e.g. if a filesystem
  213. becomes suddenly read-only), which will prevent future writes to
  214. this file until the lockfile is removed manually.
  215. Raises:
  216. OSError: if the original file could not be overwritten. The
  217. lock file is still closed, so further attempts to write to the same
  218. file object will raise ValueError.
  219. """
  220. if self._closed:
  221. return
  222. self._file.flush()
  223. if self._fsync:
  224. os.fsync(self._file.fileno())
  225. self._file.close()
  226. try:
  227. if getattr(os, "replace", None) is not None:
  228. os.replace(self._lockfilename, self._filename)
  229. else:
  230. if sys.platform != "win32":
  231. os.rename(self._lockfilename, self._filename)
  232. else:
  233. # Windows versions prior to Vista don't support atomic
  234. # renames
  235. _fancy_rename(self._lockfilename, self._filename)
  236. finally:
  237. self.abort()
  238. def __del__(self) -> None:
  239. if not getattr(self, "_closed", True):
  240. warnings.warn(f"unclosed {self!r}", ResourceWarning, stacklevel=2)
  241. self.abort()
  242. def __enter__(self) -> "_GitFile":
  243. return self
  244. def __exit__(
  245. self,
  246. exc_type: Optional[type[BaseException]],
  247. exc_val: Optional[BaseException],
  248. exc_tb: Optional[TracebackType],
  249. ) -> None:
  250. if exc_type is not None:
  251. self.abort()
  252. else:
  253. self.close()
  254. def __fspath__(self) -> Union[str, bytes]:
  255. """Return the file path for os.fspath() compatibility."""
  256. return self._filename
  257. @property
  258. def closed(self) -> bool:
  259. """Return whether the file is closed."""
  260. return self._closed
  261. def __getattr__(self, name: str) -> Any: # noqa: ANN401
  262. """Proxy property calls to the underlying file."""
  263. if name in self.PROXY_PROPERTIES:
  264. return getattr(self._file, name)
  265. raise AttributeError(name)
  266. # Implement IO[bytes] methods by delegating to the underlying file
  267. def read(self, size: int = -1) -> bytes:
  268. return self._file.read(size)
  269. # TODO: Remove type: ignore when Python 3.10 support is dropped (Oct 2026)
  270. # Python 3.9/3.10 have issues with IO[bytes] overload signatures
  271. def write(self, data: Buffer, /) -> int: # type: ignore[override,unused-ignore]
  272. return self._file.write(data)
  273. def readline(self, size: int = -1) -> bytes:
  274. return self._file.readline(size)
  275. def readlines(self, hint: int = -1) -> list[bytes]:
  276. return self._file.readlines(hint)
  277. # TODO: Remove type: ignore when Python 3.10 support is dropped (Oct 2026)
  278. # Python 3.9/3.10 have issues with IO[bytes] overload signatures
  279. def writelines(self, lines: Iterable[Buffer], /) -> None: # type: ignore[override,unused-ignore]
  280. return self._file.writelines(lines)
  281. def seek(self, offset: int, whence: int = 0) -> int:
  282. return self._file.seek(offset, whence)
  283. def tell(self) -> int:
  284. return self._file.tell()
  285. def flush(self) -> None:
  286. return self._file.flush()
  287. def truncate(self, size: Optional[int] = None) -> int:
  288. return self._file.truncate(size)
  289. def fileno(self) -> int:
  290. return self._file.fileno()
  291. def isatty(self) -> bool:
  292. return self._file.isatty()
  293. def readable(self) -> bool:
  294. return self._file.readable()
  295. def writable(self) -> bool:
  296. return self._file.writable()
  297. def seekable(self) -> bool:
  298. return self._file.seekable()
  299. def __next__(self) -> bytes:
  300. return next(iter(self._file))