reflog.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. # reflog.py -- Parsing and writing reflog files
  2. # Copyright (C) 2015 Jelmer Vernooij and others.
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Utilities for reading and generating reflogs."""
  22. __all__ = [
  23. "drop_reflog_entry",
  24. "expire_reflog",
  25. "format_reflog_line",
  26. "iter_reflogs",
  27. "parse_reflog_line",
  28. "parse_reflog_spec",
  29. "read_reflog",
  30. ]
  31. import collections
  32. from collections.abc import Callable, Generator
  33. from typing import IO, BinaryIO
  34. from .file import _GitFile
  35. from .objects import ZERO_SHA, format_timezone, parse_timezone
  36. Entry = collections.namedtuple(
  37. "Entry",
  38. ["old_sha", "new_sha", "committer", "timestamp", "timezone", "message"],
  39. )
  40. def parse_reflog_spec(refspec: str | bytes) -> tuple[bytes, int]:
  41. """Parse a reflog specification like 'HEAD@{1}' or 'refs/heads/master@{2}'.
  42. Args:
  43. refspec: Reflog specification (e.g., 'HEAD@{1}', 'master@{0}')
  44. Returns:
  45. Tuple of (ref_name, index) where index is in Git reflog order (0 = newest)
  46. Raises:
  47. ValueError: If the refspec is not a valid reflog specification
  48. """
  49. if isinstance(refspec, str):
  50. refspec = refspec.encode("utf-8")
  51. if b"@{" not in refspec:
  52. raise ValueError(
  53. f"Invalid reflog spec: {refspec!r}. Expected format: ref@{{n}}"
  54. )
  55. ref, rest = refspec.split(b"@{", 1)
  56. if not rest.endswith(b"}"):
  57. raise ValueError(
  58. f"Invalid reflog spec: {refspec!r}. Expected format: ref@{{n}}"
  59. )
  60. index_str = rest[:-1]
  61. if not index_str.isdigit():
  62. raise ValueError(
  63. f"Invalid reflog index: {index_str!r}. Expected integer in ref@{{n}}"
  64. )
  65. # Use HEAD if no ref specified (e.g., "@{1}")
  66. if not ref:
  67. ref = b"HEAD"
  68. return ref, int(index_str)
  69. def format_reflog_line(
  70. old_sha: bytes | None,
  71. new_sha: bytes,
  72. committer: bytes,
  73. timestamp: int | float,
  74. timezone: int,
  75. message: bytes,
  76. ) -> bytes:
  77. """Generate a single reflog line.
  78. Args:
  79. old_sha: Old Commit SHA
  80. new_sha: New Commit SHA
  81. committer: Committer name and e-mail
  82. timestamp: Timestamp
  83. timezone: Timezone
  84. message: Message
  85. """
  86. if old_sha is None:
  87. old_sha = ZERO_SHA
  88. return (
  89. old_sha
  90. + b" "
  91. + new_sha
  92. + b" "
  93. + committer
  94. + b" "
  95. + str(int(timestamp)).encode("ascii")
  96. + b" "
  97. + format_timezone(timezone)
  98. + b"\t"
  99. + message
  100. )
  101. def parse_reflog_line(line: bytes) -> Entry:
  102. """Parse a reflog line.
  103. Args:
  104. line: Line to parse
  105. Returns: Tuple of (old_sha, new_sha, committer, timestamp, timezone,
  106. message)
  107. """
  108. (begin, message) = line.split(b"\t", 1)
  109. (old_sha, new_sha, rest) = begin.split(b" ", 2)
  110. (committer, timestamp_str, timezone_str) = rest.rsplit(b" ", 2)
  111. return Entry(
  112. old_sha,
  113. new_sha,
  114. committer,
  115. int(timestamp_str),
  116. parse_timezone(timezone_str)[0],
  117. message,
  118. )
  119. def read_reflog(
  120. f: BinaryIO | IO[bytes] | _GitFile,
  121. ) -> Generator[Entry, None, None]:
  122. """Read reflog.
  123. Args:
  124. f: File-like object
  125. Returns: Iterator over Entry objects
  126. """
  127. for line in f:
  128. yield parse_reflog_line(line.rstrip(b"\n"))
  129. def drop_reflog_entry(f: BinaryIO, index: int, rewrite: bool = False) -> None:
  130. """Drop the specified reflog entry.
  131. Args:
  132. f: File-like object
  133. index: Reflog entry index (in Git reflog reverse 0-indexed order)
  134. rewrite: If a reflog entry's predecessor is removed, set its
  135. old SHA to the new SHA of the entry that now precedes it
  136. """
  137. if index < 0:
  138. raise ValueError(f"Invalid reflog index {index}")
  139. log = []
  140. offset = f.tell()
  141. for line in f:
  142. log.append((offset, parse_reflog_line(line)))
  143. offset = f.tell()
  144. inverse_index = len(log) - index - 1
  145. write_offset = log[inverse_index][0]
  146. f.seek(write_offset)
  147. if index == 0:
  148. f.truncate()
  149. return
  150. del log[inverse_index]
  151. if rewrite and index > 0 and log:
  152. if inverse_index == 0:
  153. previous_new = ZERO_SHA
  154. else:
  155. previous_new = log[inverse_index - 1][1].new_sha
  156. offset, entry = log[inverse_index]
  157. log[inverse_index] = (
  158. offset,
  159. Entry(
  160. previous_new,
  161. entry.new_sha,
  162. entry.committer,
  163. entry.timestamp,
  164. entry.timezone,
  165. entry.message,
  166. ),
  167. )
  168. for _, entry in log[inverse_index:]:
  169. f.write(
  170. format_reflog_line(
  171. entry.old_sha,
  172. entry.new_sha,
  173. entry.committer,
  174. entry.timestamp,
  175. entry.timezone,
  176. entry.message,
  177. )
  178. )
  179. f.truncate()
  180. def expire_reflog(
  181. f: BinaryIO,
  182. expire_time: int | None = None,
  183. expire_unreachable_time: int | None = None,
  184. reachable_checker: Callable[[bytes], bool] | None = None,
  185. ) -> int:
  186. """Expire reflog entries based on age and reachability.
  187. Args:
  188. f: File-like object for the reflog
  189. expire_time: Expire entries older than this timestamp (seconds since epoch).
  190. If None, entries are not expired based on age alone.
  191. expire_unreachable_time: Expire unreachable entries older than this
  192. timestamp. If None, unreachable entries are not expired.
  193. reachable_checker: Optional callable that takes a SHA and returns True
  194. if the commit is reachable. If None, all entries are considered
  195. reachable.
  196. Returns:
  197. Number of entries expired
  198. """
  199. if expire_time is None and expire_unreachable_time is None:
  200. return 0
  201. entries = []
  202. offset = f.tell()
  203. for line in f:
  204. entries.append((offset, parse_reflog_line(line)))
  205. offset = f.tell()
  206. # Filter entries that should be kept
  207. kept_entries = []
  208. expired_count = 0
  209. for offset, entry in entries:
  210. should_expire = False
  211. # Check if entry is reachable
  212. is_reachable = True
  213. if reachable_checker is not None:
  214. is_reachable = reachable_checker(entry.new_sha)
  215. # Apply expiration rules
  216. # Check the appropriate expiration time based on reachability
  217. if is_reachable:
  218. if expire_time is not None and entry.timestamp < expire_time:
  219. should_expire = True
  220. else:
  221. if (
  222. expire_unreachable_time is not None
  223. and entry.timestamp < expire_unreachable_time
  224. ):
  225. should_expire = True
  226. if should_expire:
  227. expired_count += 1
  228. else:
  229. kept_entries.append((offset, entry))
  230. # Write back the kept entries
  231. if expired_count > 0:
  232. f.seek(0)
  233. for _, entry in kept_entries:
  234. f.write(
  235. format_reflog_line(
  236. entry.old_sha,
  237. entry.new_sha,
  238. entry.committer,
  239. entry.timestamp,
  240. entry.timezone,
  241. entry.message,
  242. )
  243. )
  244. f.truncate()
  245. return expired_count
  246. def iter_reflogs(logs_dir: str) -> Generator[bytes, None, None]:
  247. """Iterate over all reflogs in a repository.
  248. Args:
  249. logs_dir: Path to the logs directory (e.g., .git/logs)
  250. Yields:
  251. Reference names (as bytes) that have reflogs
  252. """
  253. import os
  254. from pathlib import Path
  255. if not os.path.exists(logs_dir):
  256. return
  257. logs_path = Path(logs_dir)
  258. for log_file in logs_path.rglob("*"):
  259. if log_file.is_file():
  260. # Get the ref name by removing the logs_dir prefix
  261. ref_name = str(log_file.relative_to(logs_path))
  262. # Convert path separators to / for refs
  263. ref_name = ref_name.replace(os.sep, "/")
  264. yield ref_name.encode("utf-8")