archive_purge.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. """Archive auto-purge service (#1008 follow-up).
  2. Age-based hard-delete of print archives. Unlike the library trash flow there is
  3. no soft-delete intermediate — archives are historical print records, so the
  4. "undo" window the library bin provides doesn't apply here. A user who wants to
  5. keep an archive should download or favourite it before the purge window elapses.
  6. The sweeper runs on the same 15-minute cadence as the library trash sweeper but
  7. throttles actual purge runs to once per 24h. Admins can also trigger a manual
  8. purge from the Settings UI.
  9. """
  10. from __future__ import annotations
  11. import asyncio
  12. import logging
  13. from datetime import datetime, timedelta, timezone
  14. from sqlalchemy import func, select
  15. from sqlalchemy.ext.asyncio import AsyncSession
  16. from backend.app.core import database as _database
  17. from backend.app.models.archive import PrintArchive
  18. from backend.app.models.settings import Settings
  19. from backend.app.services.archive import ArchiveService
  20. logger = logging.getLogger(__name__)
  21. AUTO_PURGE_ENABLED_KEY = "archive_auto_purge_enabled"
  22. AUTO_PURGE_DAYS_KEY = "archive_auto_purge_days"
  23. AUTO_PURGE_LAST_RUN_KEY = "archive_auto_purge_last_run"
  24. DEFAULT_AUTO_PURGE_DAYS = 365
  25. # 7-day floor mirrors the library auto-purge; anything shorter treats archives
  26. # as ephemeral which is rarely what anyone wants.
  27. MIN_AUTO_PURGE_DAYS = 7
  28. MAX_AUTO_PURGE_DAYS = 3650
  29. def _age_cutoff(now: datetime, older_than_days: int) -> datetime:
  30. return now - timedelta(days=older_than_days)
  31. def _last_activity_expr():
  32. """Most-recent timestamp on an archive row.
  33. Reprints reuse the archive row and update ``completed_at``/``started_at`` but
  34. leave ``created_at`` pinned to the first print, so purging on ``created_at``
  35. would evict recently-reprinted archives. Use the latest of the three instead.
  36. """
  37. return func.coalesce(
  38. PrintArchive.completed_at,
  39. PrintArchive.started_at,
  40. PrintArchive.created_at,
  41. )
  42. class ArchivePurgeService:
  43. """Manages archive auto-purge sweeper + admin-triggered manual purges."""
  44. def __init__(self):
  45. self._scheduler_task: asyncio.Task | None = None
  46. # Match library trash cadence — the 24h throttle keeps actual work rare.
  47. self._check_interval = 900
  48. async def start_scheduler(self):
  49. if self._scheduler_task is not None:
  50. return
  51. logger.info("Starting archive auto-purge sweeper")
  52. self._scheduler_task = asyncio.create_task(self._scheduler_loop())
  53. def stop_scheduler(self):
  54. if self._scheduler_task:
  55. self._scheduler_task.cancel()
  56. self._scheduler_task = None
  57. logger.info("Stopped archive auto-purge sweeper")
  58. async def _scheduler_loop(self):
  59. while True:
  60. try:
  61. await asyncio.sleep(self._check_interval)
  62. async with _database.async_session() as db:
  63. await self._maybe_run_auto_purge(db)
  64. except asyncio.CancelledError:
  65. break
  66. except Exception as e: # pragma: no cover - defensive
  67. logger.error("Error in archive auto-purge sweeper: %s", e)
  68. await asyncio.sleep(60)
  69. # ---- Settings -----------------------------------------------------
  70. @staticmethod
  71. async def _read_setting(db: AsyncSession, key: str) -> str | None:
  72. result = await db.execute(select(Settings.value).where(Settings.key == key))
  73. return result.scalar_one_or_none()
  74. @staticmethod
  75. async def _write_setting(db: AsyncSession, key: str, value: str) -> None:
  76. result = await db.execute(select(Settings).where(Settings.key == key))
  77. row = result.scalar_one_or_none()
  78. if row is None:
  79. db.add(Settings(key=key, value=value))
  80. else:
  81. row.value = value
  82. async def get_settings(self, db: AsyncSession) -> dict:
  83. """Return ``{enabled, days}``. Missing keys default to disabled / 365d."""
  84. enabled_raw = await self._read_setting(db, AUTO_PURGE_ENABLED_KEY)
  85. days_raw = await self._read_setting(db, AUTO_PURGE_DAYS_KEY)
  86. enabled = (enabled_raw or "false").lower() == "true"
  87. try:
  88. days = int(days_raw) if days_raw is not None else DEFAULT_AUTO_PURGE_DAYS
  89. except (TypeError, ValueError):
  90. days = DEFAULT_AUTO_PURGE_DAYS
  91. days = max(MIN_AUTO_PURGE_DAYS, min(MAX_AUTO_PURGE_DAYS, days))
  92. return {"enabled": enabled, "days": days}
  93. async def set_settings(self, db: AsyncSession, *, enabled: bool, days: int) -> dict:
  94. clamped_days = max(MIN_AUTO_PURGE_DAYS, min(MAX_AUTO_PURGE_DAYS, int(days)))
  95. await self._write_setting(db, AUTO_PURGE_ENABLED_KEY, "true" if enabled else "false")
  96. await self._write_setting(db, AUTO_PURGE_DAYS_KEY, str(clamped_days))
  97. await db.commit()
  98. return {"enabled": enabled, "days": clamped_days}
  99. async def _get_last_run(self, db: AsyncSession) -> datetime | None:
  100. raw = await self._read_setting(db, AUTO_PURGE_LAST_RUN_KEY)
  101. if not raw:
  102. return None
  103. try:
  104. return datetime.fromisoformat(raw.replace("Z", "+00:00"))
  105. except ValueError:
  106. return None
  107. async def _stamp_last_run(self, db: AsyncSession, when: datetime) -> None:
  108. await self._write_setting(db, AUTO_PURGE_LAST_RUN_KEY, when.isoformat())
  109. await db.commit()
  110. async def _maybe_run_auto_purge(self, db: AsyncSession) -> int:
  111. """Run the auto-purge if enabled and >=24h has elapsed since last run."""
  112. cfg = await self.get_settings(db)
  113. if not cfg["enabled"]:
  114. return 0
  115. now = datetime.now(timezone.utc)
  116. last = await self._get_last_run(db)
  117. if last is not None and (now - last) < timedelta(hours=24):
  118. return 0
  119. deleted = await self.purge_older_than(db, older_than_days=cfg["days"])
  120. await self._stamp_last_run(db, now)
  121. if deleted:
  122. logger.info(
  123. "Archive auto-purge: hard-deleted %d archive(s) (threshold=%d days)",
  124. deleted,
  125. cfg["days"],
  126. )
  127. return deleted
  128. # ---- Preview / purge ---------------------------------------------
  129. async def preview_purge(
  130. self,
  131. db: AsyncSession,
  132. older_than_days: int,
  133. sample_limit: int = 5,
  134. ) -> dict:
  135. """Count + size of archives eligible for purge. Read-only."""
  136. if older_than_days < 1:
  137. return {
  138. "count": 0,
  139. "total_bytes": 0,
  140. "sample_filenames": [],
  141. "older_than_days": older_than_days,
  142. }
  143. now = datetime.now(timezone.utc)
  144. cutoff = _age_cutoff(now, older_than_days)
  145. last_activity = _last_activity_expr()
  146. clause = last_activity < cutoff
  147. count_result = await db.execute(select(func.count(PrintArchive.id)).where(clause))
  148. count = int(count_result.scalar() or 0)
  149. size_result = await db.execute(select(func.coalesce(func.sum(PrintArchive.file_size), 0)).where(clause))
  150. total_bytes = int(size_result.scalar() or 0)
  151. sample_result = await db.execute(
  152. select(PrintArchive.filename).where(clause).order_by(last_activity).limit(sample_limit)
  153. )
  154. samples = [row[0] for row in sample_result.all()]
  155. return {
  156. "count": count,
  157. "total_bytes": total_bytes,
  158. "sample_filenames": samples,
  159. "older_than_days": older_than_days,
  160. }
  161. async def purge_older_than(self, db: AsyncSession, older_than_days: int) -> int:
  162. """Hard-delete archives older than ``older_than_days``. Returns count.
  163. Delegates to :meth:`ArchiveService.delete_archive` for every row so the
  164. on-disk cleanup (3MF, thumbnail, timelapse, photos) goes through the
  165. same safety-checked path as manual deletion. Each delete runs in its
  166. own session so a commit-per-row doesn't churn the caller's session
  167. (and matches how the sweeper uses :func:`_database.async_session` in production).
  168. """
  169. if older_than_days < 1:
  170. return 0
  171. now = datetime.now(timezone.utc)
  172. cutoff = _age_cutoff(now, older_than_days)
  173. id_result = await db.execute(select(PrintArchive.id).where(_last_activity_expr() < cutoff))
  174. ids = [row[0] for row in id_result.all()]
  175. if not ids:
  176. return 0
  177. deleted = 0
  178. for archive_id in ids:
  179. async with _database.async_session() as delete_db:
  180. service = ArchiveService(delete_db)
  181. if await service.delete_archive(archive_id):
  182. deleted += 1
  183. if deleted:
  184. logger.info(
  185. "Archive purge: hard-deleted %d archive(s) (older_than_days=%d)",
  186. deleted,
  187. older_than_days,
  188. )
  189. return deleted
  190. archive_purge_service = ArchivePurgeService()