archive_purge.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. """Archive auto-purge service (#1008 follow-up).
  2. Age-based hard-delete of print archives. Unlike the library trash flow there is
  3. no soft-delete intermediate — archives are historical print records, so the
  4. "undo" window the library bin provides doesn't apply here. A user who wants to
  5. keep an archive should download or favourite it before the purge window elapses.
  6. The sweeper runs on the same 15-minute cadence as the library trash sweeper but
  7. throttles actual purge runs to once per 24h. Admins can also trigger a manual
  8. purge from the Settings UI.
  9. """
  10. from __future__ import annotations
  11. import asyncio
  12. import logging
  13. from datetime import datetime, timedelta, timezone
  14. from sqlalchemy import func, select
  15. from sqlalchemy.ext.asyncio import AsyncSession
  16. from backend.app.core import database as _database
  17. from backend.app.models.archive import PrintArchive
  18. from backend.app.models.settings import Settings
  19. from backend.app.services.archive import ArchiveService
  20. logger = logging.getLogger(__name__)
  21. AUTO_PURGE_ENABLED_KEY = "archive_auto_purge_enabled"
  22. AUTO_PURGE_DAYS_KEY = "archive_auto_purge_days"
  23. AUTO_PURGE_LAST_RUN_KEY = "archive_auto_purge_last_run"
  24. # #1390 follow-up: bulk and scheduled purge inherit the same "soft vs hard"
  25. # choice the single-archive delete already exposes (#1343). When False
  26. # (default), each purged archive goes through soft_delete_archive — files
  27. # removed from disk, row hidden via `deleted_at`, PrintLogEntry rows
  28. # untouched so Quick Stats keeps every contribution. When True, the linked
  29. # log rows are deleted up front and the archive row is hard-removed,
  30. # matching the route's `?purge_stats=true` semantics.
  31. AUTO_PURGE_STATS_KEY = "archive_auto_purge_stats"
  32. DEFAULT_AUTO_PURGE_DAYS = 365
  33. # 7-day floor mirrors the library auto-purge; anything shorter treats archives
  34. # as ephemeral which is rarely what anyone wants.
  35. MIN_AUTO_PURGE_DAYS = 7
  36. MAX_AUTO_PURGE_DAYS = 3650
  37. def _age_cutoff(now: datetime, older_than_days: int) -> datetime:
  38. return now - timedelta(days=older_than_days)
  39. def _last_activity_expr():
  40. """Most-recent timestamp on an archive row.
  41. Reprints reuse the archive row and update ``completed_at``/``started_at`` but
  42. leave ``created_at`` pinned to the first print, so purging on ``created_at``
  43. would evict recently-reprinted archives. Use the latest of the three instead.
  44. """
  45. return func.coalesce(
  46. PrintArchive.completed_at,
  47. PrintArchive.started_at,
  48. PrintArchive.created_at,
  49. )
  50. class ArchivePurgeService:
  51. """Manages archive auto-purge sweeper + admin-triggered manual purges."""
  52. def __init__(self):
  53. self._scheduler_task: asyncio.Task | None = None
  54. # Match library trash cadence — the 24h throttle keeps actual work rare.
  55. self._check_interval = 900
  56. async def start_scheduler(self):
  57. if self._scheduler_task is not None:
  58. return
  59. logger.info("Starting archive auto-purge sweeper")
  60. self._scheduler_task = asyncio.create_task(self._scheduler_loop())
  61. def stop_scheduler(self):
  62. if self._scheduler_task:
  63. self._scheduler_task.cancel()
  64. self._scheduler_task = None
  65. logger.info("Stopped archive auto-purge sweeper")
  66. async def _scheduler_loop(self):
  67. while True:
  68. try:
  69. await asyncio.sleep(self._check_interval)
  70. async with _database.async_session() as db:
  71. await self._maybe_run_auto_purge(db)
  72. except asyncio.CancelledError:
  73. break
  74. except Exception as e: # pragma: no cover - defensive
  75. logger.error("Error in archive auto-purge sweeper: %s", e)
  76. await asyncio.sleep(60)
  77. # ---- Settings -----------------------------------------------------
  78. @staticmethod
  79. async def _read_setting(db: AsyncSession, key: str) -> str | None:
  80. result = await db.execute(select(Settings.value).where(Settings.key == key))
  81. return result.scalar_one_or_none()
  82. @staticmethod
  83. async def _write_setting(db: AsyncSession, key: str, value: str) -> None:
  84. result = await db.execute(select(Settings).where(Settings.key == key))
  85. row = result.scalar_one_or_none()
  86. if row is None:
  87. db.add(Settings(key=key, value=value))
  88. else:
  89. row.value = value
  90. async def get_settings(self, db: AsyncSession) -> dict:
  91. """Return ``{enabled, days, purge_stats}``. Missing keys default to
  92. disabled / 365d / soft-delete (Quick Stats preserved)."""
  93. enabled_raw = await self._read_setting(db, AUTO_PURGE_ENABLED_KEY)
  94. days_raw = await self._read_setting(db, AUTO_PURGE_DAYS_KEY)
  95. stats_raw = await self._read_setting(db, AUTO_PURGE_STATS_KEY)
  96. enabled = (enabled_raw or "false").lower() == "true"
  97. try:
  98. days = int(days_raw) if days_raw is not None else DEFAULT_AUTO_PURGE_DAYS
  99. except (TypeError, ValueError):
  100. days = DEFAULT_AUTO_PURGE_DAYS
  101. days = max(MIN_AUTO_PURGE_DAYS, min(MAX_AUTO_PURGE_DAYS, days))
  102. purge_stats = (stats_raw or "false").lower() == "true"
  103. return {"enabled": enabled, "days": days, "purge_stats": purge_stats}
  104. async def set_settings(self, db: AsyncSession, *, enabled: bool, days: int, purge_stats: bool = False) -> dict:
  105. clamped_days = max(MIN_AUTO_PURGE_DAYS, min(MAX_AUTO_PURGE_DAYS, int(days)))
  106. await self._write_setting(db, AUTO_PURGE_ENABLED_KEY, "true" if enabled else "false")
  107. await self._write_setting(db, AUTO_PURGE_DAYS_KEY, str(clamped_days))
  108. await self._write_setting(db, AUTO_PURGE_STATS_KEY, "true" if purge_stats else "false")
  109. await db.commit()
  110. return {"enabled": enabled, "days": clamped_days, "purge_stats": purge_stats}
  111. async def _get_last_run(self, db: AsyncSession) -> datetime | None:
  112. raw = await self._read_setting(db, AUTO_PURGE_LAST_RUN_KEY)
  113. if not raw:
  114. return None
  115. try:
  116. return datetime.fromisoformat(raw.replace("Z", "+00:00"))
  117. except ValueError:
  118. return None
  119. async def _stamp_last_run(self, db: AsyncSession, when: datetime) -> None:
  120. await self._write_setting(db, AUTO_PURGE_LAST_RUN_KEY, when.isoformat())
  121. await db.commit()
  122. async def _maybe_run_auto_purge(self, db: AsyncSession) -> int:
  123. """Run the auto-purge if enabled and >=24h has elapsed since last run."""
  124. cfg = await self.get_settings(db)
  125. if not cfg["enabled"]:
  126. return 0
  127. now = datetime.now(timezone.utc)
  128. last = await self._get_last_run(db)
  129. if last is not None and (now - last) < timedelta(hours=24):
  130. return 0
  131. deleted = await self.purge_older_than(
  132. db,
  133. older_than_days=cfg["days"],
  134. purge_stats=cfg["purge_stats"],
  135. )
  136. await self._stamp_last_run(db, now)
  137. if deleted:
  138. logger.info(
  139. "Archive auto-purge: %s %d archive(s) (threshold=%d days, purge_stats=%s)",
  140. "hard-deleted" if cfg["purge_stats"] else "soft-deleted",
  141. deleted,
  142. cfg["days"],
  143. cfg["purge_stats"],
  144. )
  145. return deleted
  146. # ---- Preview / purge ---------------------------------------------
  147. async def preview_purge(
  148. self,
  149. db: AsyncSession,
  150. older_than_days: int,
  151. sample_limit: int = 5,
  152. *,
  153. purge_stats: bool = False,
  154. ) -> dict:
  155. """Count + size of archives eligible for purge. Read-only.
  156. Soft-delete mode (default) excludes already-soft-deleted rows so the
  157. admin slider's "eligible" count matches what a fresh purge would
  158. actually touch. Hard-delete mode counts every row past the cutoff —
  159. already-soft-deleted rows are eligible for promotion to hard-delete.
  160. """
  161. if older_than_days < 1:
  162. return {
  163. "count": 0,
  164. "total_bytes": 0,
  165. "sample_filenames": [],
  166. "older_than_days": older_than_days,
  167. }
  168. now = datetime.now(timezone.utc)
  169. cutoff = _age_cutoff(now, older_than_days)
  170. last_activity = _last_activity_expr()
  171. clause = last_activity < cutoff
  172. count_stmt = select(func.count(PrintArchive.id)).where(clause)
  173. size_stmt = select(func.coalesce(func.sum(PrintArchive.file_size), 0)).where(clause)
  174. sample_stmt = select(PrintArchive.filename).where(clause).order_by(last_activity).limit(sample_limit)
  175. if not purge_stats:
  176. count_stmt = count_stmt.where(PrintArchive.deleted_at.is_(None))
  177. size_stmt = size_stmt.where(PrintArchive.deleted_at.is_(None))
  178. sample_stmt = sample_stmt.where(PrintArchive.deleted_at.is_(None))
  179. count_result = await db.execute(count_stmt)
  180. count = int(count_result.scalar() or 0)
  181. size_result = await db.execute(size_stmt)
  182. total_bytes = int(size_result.scalar() or 0)
  183. sample_result = await db.execute(sample_stmt)
  184. samples = [row[0] for row in sample_result.all()]
  185. return {
  186. "count": count,
  187. "total_bytes": total_bytes,
  188. "sample_filenames": samples,
  189. "older_than_days": older_than_days,
  190. }
  191. async def purge_older_than(
  192. self,
  193. db: AsyncSession,
  194. older_than_days: int,
  195. *,
  196. purge_stats: bool = False,
  197. ) -> int:
  198. """Bulk-delete archives older than ``older_than_days``. Returns count.
  199. Two modes, parameter-controlled (#1390):
  200. * ``purge_stats=False`` (default): each archive goes through
  201. :meth:`ArchiveService.soft_delete_archive` — files removed from disk
  202. and the row hidden via ``deleted_at``, but the linked
  203. ``PrintLogEntry`` rows are untouched so Quick Stats keeps every
  204. contribution (filament, cost, energy, time accuracy).
  205. * ``purge_stats=True``: linked log rows are hard-deleted up front and
  206. the archive row is hard-removed via
  207. :meth:`ArchiveService.delete_archive`. Matches the single-archive
  208. ``DELETE /archives/{id}?purge_stats=true`` semantics from #1343.
  209. Each delete runs in its own session so a commit-per-row doesn't churn
  210. the caller's session (matches how the sweeper uses
  211. :func:`_database.async_session` in production).
  212. """
  213. if older_than_days < 1:
  214. return 0
  215. now = datetime.now(timezone.utc)
  216. cutoff = _age_cutoff(now, older_than_days)
  217. # Soft-delete mode must also skip rows already soft-deleted, otherwise
  218. # a repeat sweeper run keeps re-touching the same rows. Hard-delete
  219. # mode doesn't filter — already-soft-deleted rows are eligible for
  220. # promotion to hard-delete when the user opts in.
  221. select_stmt = select(PrintArchive.id).where(_last_activity_expr() < cutoff)
  222. if not purge_stats:
  223. select_stmt = select_stmt.where(PrintArchive.deleted_at.is_(None))
  224. id_result = await db.execute(select_stmt)
  225. ids = [row[0] for row in id_result.all()]
  226. if not ids:
  227. return 0
  228. deleted = 0
  229. for archive_id in ids:
  230. async with _database.async_session() as delete_db:
  231. service = ArchiveService(delete_db)
  232. if purge_stats:
  233. # Hard-delete linked PrintLogEntry rows first so their
  234. # filament / cost contributions stop counting in /stats.
  235. # FK is ON DELETE SET NULL, so without this they'd
  236. # survive the archive row and keep showing up in totals
  237. # (#1343 / #1378 / #1390).
  238. from sqlalchemy import delete as sa_delete
  239. from backend.app.models.print_log import PrintLogEntry
  240. await delete_db.execute(sa_delete(PrintLogEntry).where(PrintLogEntry.archive_id == archive_id))
  241. await delete_db.commit()
  242. if await service.delete_archive(archive_id):
  243. deleted += 1
  244. else:
  245. if await service.soft_delete_archive(archive_id):
  246. deleted += 1
  247. if deleted:
  248. logger.info(
  249. "Archive purge: %s %d archive(s) (older_than_days=%d, purge_stats=%s)",
  250. "hard-deleted" if purge_stats else "soft-deleted",
  251. deleted,
  252. older_than_days,
  253. purge_stats,
  254. )
  255. return deleted
  256. archive_purge_service = ArchivePurgeService()