maintenance.py 15 KB


  1. # maintenance.py -- Git maintenance implementation
  2. # Copyright (C) 2025 Jelmer Vernooij <jelmer@jelmer.uk>
  3. #
  4. # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  5. # Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
  6. # General Public License as published by the Free Software Foundation; version 2.0
  7. # or (at your option) any later version. You can redistribute it and/or
  8. # modify it under the terms of either of these two licenses.
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. # You should have received a copy of the licenses; if not, see
  17. # <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
  18. # and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
  19. # License, Version 2.0.
  20. #
  21. """Git maintenance implementation.
  22. This module provides the git maintenance functionality for optimizing
  23. and maintaining Git repositories.
  24. """
  25. __all__ = [
  26. "CommitGraphTask",
  27. "GcTask",
  28. "IncrementalRepackTask",
  29. "LooseObjectsTask",
  30. "MaintenanceResult",
  31. "MaintenanceSchedule",
  32. "MaintenanceTask",
  33. "PackRefsTask",
  34. "PrefetchTask",
  35. "get_enabled_tasks",
  36. "register_repository",
  37. "run_maintenance",
  38. "unregister_repository",
  39. ]
  40. import logging
  41. import os
  42. from abc import ABC, abstractmethod
  43. from collections.abc import Callable
  44. from dataclasses import dataclass, field
  45. from enum import Enum
  46. from typing import TYPE_CHECKING
  47. if TYPE_CHECKING:
  48. from .repo import BaseRepo, Repo
  49. logger = logging.getLogger(__name__)
  50. class MaintenanceSchedule(str, Enum):
  51. """Maintenance schedule types."""
  52. HOURLY = "hourly"
  53. DAILY = "daily"
  54. WEEKLY = "weekly"
  55. @dataclass
  56. class MaintenanceResult:
  57. """Result from running maintenance tasks."""
  58. tasks_run: list[str] = field(default_factory=list)
  59. tasks_succeeded: list[str] = field(default_factory=list)
  60. tasks_failed: list[str] = field(default_factory=list)
  61. errors: dict[str, str] = field(default_factory=dict)
  62. class MaintenanceTask(ABC):
  63. """Base class for maintenance tasks."""
  64. name: str = ""
  65. def __init__(
  66. self,
  67. repo: "BaseRepo",
  68. auto: bool = False,
  69. progress: Callable[[str], None] | None = None,
  70. ) -> None:
  71. """Initialize maintenance task.
  72. Args:
  73. repo: Repository object
  74. auto: If True, only run if needed
  75. progress: Optional progress callback
  76. """
  77. self.repo = repo
  78. self.auto = auto
  79. self.progress = progress
  80. @abstractmethod
  81. def run(self) -> bool:
  82. """Run the maintenance task.
  83. Returns:
  84. True if successful, False otherwise
  85. """
  86. def is_enabled(self) -> bool:
  87. """Check if task is enabled in repository configuration.
  88. Returns:
  89. True if task is enabled
  90. """
  91. if not self.name:
  92. return False
  93. config = self.repo.get_config()
  94. try:
  95. enabled = config.get_boolean(
  96. (b"maintenance", self.name.encode()), b"enabled"
  97. )
  98. return enabled if enabled is not None else self.default_enabled()
  99. except KeyError:
  100. # Return default enabled state
  101. return self.default_enabled()
  102. def default_enabled(self) -> bool:
  103. """Return default enabled state for this task.
  104. Returns:
  105. True if task should be enabled by default
  106. """
  107. return False
  108. class GcTask(MaintenanceTask):
  109. """Garbage collection maintenance task."""
  110. name = "gc"
  111. def default_enabled(self) -> bool:
  112. """GC is enabled by default."""
  113. return True
  114. def run(self) -> bool:
  115. """Run garbage collection.
  116. Returns:
  117. True if successful, False otherwise
  118. """
  119. from .gc import garbage_collect
  120. from .repo import Repo
  121. if self.progress:
  122. self.progress("Running gc task")
  123. assert isinstance(self.repo, Repo)
  124. garbage_collect(self.repo, auto=self.auto, progress=self.progress)
  125. return True
  126. class CommitGraphTask(MaintenanceTask):
  127. """Commit-graph maintenance task."""
  128. name = "commit-graph"
  129. def default_enabled(self) -> bool:
  130. """Commit-graph is enabled by default."""
  131. return True
  132. def run(self) -> bool:
  133. """Update commit-graph file.
  134. Returns:
  135. True if successful, False otherwise
  136. """
  137. if self.progress:
  138. self.progress("Running commit-graph task")
  139. # Get all refs
  140. refs = list(self.repo.refs.as_dict().values())
  141. if refs:
  142. self.repo.object_store.write_commit_graph(refs, reachable=True)
  143. return True
  144. class LooseObjectsTask(MaintenanceTask):
  145. """Loose-objects maintenance task.
  146. This packs loose objects that are not already packed.
  147. """
  148. name = "loose-objects"
  149. def run(self) -> bool:
  150. """Pack loose objects.
  151. Returns:
  152. True if successful, False otherwise
  153. """
  154. from .object_store import PackBasedObjectStore
  155. if self.progress:
  156. self.progress("Running loose-objects task")
  157. # Pack loose objects using the object store's method
  158. assert isinstance(self.repo.object_store, PackBasedObjectStore)
  159. count = self.repo.object_store.pack_loose_objects(progress=self.progress)
  160. if self.progress and count > 0:
  161. self.progress(f"Packed {count} loose objects")
  162. return True
  163. class IncrementalRepackTask(MaintenanceTask):
  164. """Incremental-repack maintenance task.
  165. This consolidates pack files incrementally.
  166. """
  167. name = "incremental-repack"
  168. def run(self) -> bool:
  169. """Consolidate pack files incrementally.
  170. Returns:
  171. True if successful, False otherwise
  172. """
  173. from .object_store import PackBasedObjectStore
  174. if self.progress:
  175. self.progress("Running incremental-repack task")
  176. # Get all packs sorted by size
  177. assert isinstance(self.repo.object_store, PackBasedObjectStore)
  178. packs = self.repo.object_store.packs
  179. if len(packs) <= 1:
  180. # Nothing to consolidate
  181. if self.progress:
  182. self.progress("No packs to consolidate")
  183. return True
  184. # In auto mode, only repack if there are many small packs
  185. # This is a heuristic similar to git's auto gc behavior
  186. if self.auto:
  187. # Only repack if we have more than 50 packs
  188. # (matching git's gc.autoPackLimit default)
  189. if len(packs) < 50:
  190. if self.progress:
  191. self.progress(
  192. f"Skipping incremental repack: only {len(packs)} packs"
  193. )
  194. return True
  195. # Perform a full repack to consolidate all packs
  196. if self.progress:
  197. self.progress(f"Consolidating {len(packs)} pack files")
  198. count = self.repo.object_store.repack(progress=self.progress)
  199. if self.progress:
  200. self.progress(f"Repacked {count} objects")
  201. return True
  202. class PackRefsTask(MaintenanceTask):
  203. """Pack-refs maintenance task."""
  204. name = "pack-refs"
  205. def run(self) -> bool:
  206. """Pack loose references.
  207. Returns:
  208. True if successful, False otherwise
  209. """
  210. if self.progress:
  211. self.progress("Running pack-refs task")
  212. self.repo.refs.pack_refs(all=True)
  213. return True
  214. class PrefetchTask(MaintenanceTask):
  215. """Prefetch maintenance task.
  216. This prefetches remote refs to keep the object database up-to-date.
  217. """
  218. name = "prefetch"
  219. def run(self) -> bool:
  220. """Prefetch remote refs.
  221. Returns:
  222. True if successful, False otherwise
  223. """
  224. from .porcelain import fetch
  225. from .repo import Repo
  226. if self.progress:
  227. self.progress("Running prefetch task")
  228. config = self.repo.get_config()
  229. # Get all configured remotes
  230. remotes = set()
  231. for section in config.sections():
  232. if len(section) == 2 and section[0] == b"remote":
  233. remotes.add(section[1].decode())
  234. if not remotes:
  235. if self.progress:
  236. self.progress("No remotes configured, skipping prefetch")
  237. return True
  238. # Fetch from each remote
  239. success = True
  240. for remote_name in sorted(remotes):
  241. try:
  242. if self.progress:
  243. self.progress(f"Fetching from {remote_name}")
  244. # Fetch quietly without updating working tree
  245. # The fetch operation will update refs under refs/remotes/
  246. assert isinstance(self.repo, Repo)
  247. fetch(
  248. self.repo,
  249. remote_location=remote_name,
  250. quiet=True,
  251. )
  252. except Exception as e:
  253. # Log error and mark as failed
  254. logger.error(f"Failed to fetch from {remote_name}: {e}")
  255. success = False
  256. return success
  257. # Registry of available maintenance tasks
  258. MAINTENANCE_TASKS: dict[str, type[MaintenanceTask]] = {
  259. "gc": GcTask,
  260. "commit-graph": CommitGraphTask,
  261. "loose-objects": LooseObjectsTask,
  262. "incremental-repack": IncrementalRepackTask,
  263. "pack-refs": PackRefsTask,
  264. "prefetch": PrefetchTask,
  265. }
  266. def get_enabled_tasks(
  267. repo: "BaseRepo",
  268. task_filter: list[str] | None = None,
  269. ) -> list[str]:
  270. """Get list of enabled maintenance tasks.
  271. Args:
  272. repo: Repository object
  273. task_filter: Optional list of specific task names to run
  274. Returns:
  275. List of enabled task names
  276. """
  277. if task_filter:
  278. # Validate requested tasks exist
  279. return [name for name in task_filter if name in MAINTENANCE_TASKS]
  280. enabled_tasks = []
  281. # Check each task to see if it's enabled
  282. for task_name, task_class in MAINTENANCE_TASKS.items():
  283. # Create temporary task instance to check if enabled
  284. task = task_class(repo, auto=False, progress=None)
  285. if task.is_enabled():
  286. enabled_tasks.append(task_name)
  287. return enabled_tasks
  288. def run_maintenance(
  289. repo: "BaseRepo",
  290. tasks: list[str] | None = None,
  291. auto: bool = False,
  292. progress: Callable[[str], None] | None = None,
  293. ) -> MaintenanceResult:
  294. """Run maintenance tasks on a repository.
  295. Args:
  296. repo: Repository object
  297. tasks: Optional list of specific task names to run
  298. auto: If True, only run tasks if needed
  299. progress: Optional progress callback
  300. Returns:
  301. MaintenanceResult with task execution results
  302. """
  303. result = MaintenanceResult()
  304. enabled_tasks = get_enabled_tasks(repo, tasks)
  305. for task_name in enabled_tasks:
  306. result.tasks_run.append(task_name)
  307. task_class = MAINTENANCE_TASKS.get(task_name)
  308. if not task_class:
  309. result.tasks_failed.append(task_name)
  310. result.errors[task_name] = "Unknown task"
  311. continue
  312. try:
  313. task = task_class(repo, auto=auto, progress=progress)
  314. success = task.run()
  315. if success:
  316. result.tasks_succeeded.append(task_name)
  317. else:
  318. result.tasks_failed.append(task_name)
  319. except Exception as e:
  320. result.tasks_failed.append(task_name)
  321. result.errors[task_name] = str(e)
  322. logger.error(f"Task {task_name} failed: {e}")
  323. return result
  324. def register_repository(repo: "Repo") -> None:
  325. """Register a repository for background maintenance.
  326. This adds the repository to the global maintenance.repo config and sets
  327. up recommended configuration for scheduled maintenance.
  328. Args:
  329. repo: Repository to register
  330. """
  331. from .config import ConfigFile
  332. repo_path = os.path.abspath(repo.path)
  333. # Get global config path
  334. global_config_path = os.path.expanduser("~/.gitconfig")
  335. try:
  336. global_config = ConfigFile.from_path(global_config_path)
  337. except FileNotFoundError:
  338. # Create new config file if it doesn't exist
  339. global_config = ConfigFile()
  340. global_config.path = global_config_path
  341. # Add repository to maintenance.repo list
  342. # Check if already registered
  343. repo_path_bytes = repo_path.encode()
  344. try:
  345. existing_repos = list(global_config.get_multivar((b"maintenance",), b"repo"))
  346. except KeyError:
  347. existing_repos = []
  348. if repo_path_bytes in existing_repos:
  349. # Already registered
  350. return
  351. # Add to global config
  352. global_config.set((b"maintenance",), b"repo", repo_path_bytes)
  353. # Set up incremental strategy in global config if not already set
  354. try:
  355. global_config.get((b"maintenance",), b"strategy")
  356. except KeyError:
  357. global_config.set((b"maintenance",), b"strategy", b"incremental")
  358. # Configure task schedules for incremental strategy
  359. schedule_config = {
  360. b"commit-graph": b"hourly",
  361. b"prefetch": b"hourly",
  362. b"loose-objects": b"daily",
  363. b"incremental-repack": b"daily",
  364. }
  365. for task, schedule in schedule_config.items():
  366. try:
  367. global_config.get((b"maintenance", task), b"schedule")
  368. except KeyError:
  369. global_config.set((b"maintenance", task), b"schedule", schedule)
  370. global_config.write_to_path()
  371. # Disable foreground auto maintenance in the repository
  372. repo_config = repo.get_config()
  373. repo_config.set((b"maintenance",), b"auto", False)
  374. repo_config.write_to_path()
  375. def unregister_repository(repo: "Repo", force: bool = False) -> None:
  376. """Unregister a repository from background maintenance.
  377. This removes the repository from the global maintenance.repo config.
  378. Args:
  379. repo: Repository to unregister
  380. force: If True, don't error if repository is not registered
  381. Raises:
  382. ValueError: If repository is not registered and force is False
  383. """
  384. from .config import ConfigFile
  385. repo_path = os.path.abspath(repo.path)
  386. # Get global config
  387. global_config_path = os.path.expanduser("~/.gitconfig")
  388. try:
  389. global_config = ConfigFile.from_path(global_config_path)
  390. except FileNotFoundError:
  391. if not force:
  392. raise ValueError(
  393. f"Repository {repo_path} is not registered for maintenance"
  394. )
  395. return
  396. # Check if repository is registered
  397. repo_path_bytes = repo_path.encode()
  398. try:
  399. existing_repos = list(global_config.get_multivar((b"maintenance",), b"repo"))
  400. except KeyError:
  401. if not force:
  402. raise ValueError(
  403. f"Repository {repo_path} is not registered for maintenance"
  404. )
  405. return
  406. if repo_path_bytes not in existing_repos:
  407. if not force:
  408. raise ValueError(
  409. f"Repository {repo_path} is not registered for maintenance"
  410. )
  411. return
  412. # Remove from list
  413. existing_repos.remove(repo_path_bytes)
  414. # Delete the maintenance section and recreate it with remaining repos
  415. try:
  416. del global_config[(b"maintenance",)]
  417. except KeyError:
  418. pass
  419. # Re-add remaining repos
  420. for remaining_repo in existing_repos:
  421. global_config.set((b"maintenance",), b"repo", remaining_repo)
  422. global_config.write_to_path()