| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 |
- """Archive auto-purge service (#1008 follow-up).
- Age-based hard-delete of print archives. Unlike the library trash flow there is
- no soft-delete intermediate — archives are historical print records, so the
- "undo" window the library bin provides doesn't apply here. A user who wants to
- keep an archive should download or favourite it before the purge window elapses.
- The sweeper runs on the same 15-minute cadence as the library trash sweeper but
- throttles actual purge runs to once per 24h. Admins can also trigger a manual
- purge from the Settings UI.
- """
- from __future__ import annotations
- import asyncio
- import logging
- from datetime import datetime, timedelta, timezone
- from sqlalchemy import func, select
- from sqlalchemy.ext.asyncio import AsyncSession
- from backend.app.core import database as _database
- from backend.app.models.archive import PrintArchive
- from backend.app.models.settings import Settings
- from backend.app.services.archive import ArchiveService
- logger = logging.getLogger(__name__)
- AUTO_PURGE_ENABLED_KEY = "archive_auto_purge_enabled"
- AUTO_PURGE_DAYS_KEY = "archive_auto_purge_days"
- AUTO_PURGE_LAST_RUN_KEY = "archive_auto_purge_last_run"
- # #1390 follow-up: bulk and scheduled purge inherit the same "soft vs hard"
- # choice the single-archive delete already exposes (#1343). When False
- # (default), each purged archive goes through soft_delete_archive — files
- # removed from disk, row hidden via `deleted_at`, PrintLogEntry rows
- # untouched so Quick Stats keeps every contribution. When True, the linked
- # log rows are deleted up front and the archive row is hard-removed,
- # matching the route's `?purge_stats=true` semantics.
- AUTO_PURGE_STATS_KEY = "archive_auto_purge_stats"
- DEFAULT_AUTO_PURGE_DAYS = 365
- # 7-day floor mirrors the library auto-purge; anything shorter treats archives
- # as ephemeral which is rarely what anyone wants.
- MIN_AUTO_PURGE_DAYS = 7
- MAX_AUTO_PURGE_DAYS = 3650
- def _age_cutoff(now: datetime, older_than_days: int) -> datetime:
- return now - timedelta(days=older_than_days)
- def _last_activity_expr():
- """Most-recent timestamp on an archive row.
- Reprints reuse the archive row and update ``completed_at``/``started_at`` but
- leave ``created_at`` pinned to the first print, so purging on ``created_at``
- would evict recently-reprinted archives. Use the latest of the three instead.
- """
- return func.coalesce(
- PrintArchive.completed_at,
- PrintArchive.started_at,
- PrintArchive.created_at,
- )
- class ArchivePurgeService:
- """Manages archive auto-purge sweeper + admin-triggered manual purges."""
- def __init__(self):
- self._scheduler_task: asyncio.Task | None = None
- # Match library trash cadence — the 24h throttle keeps actual work rare.
- self._check_interval = 900
- async def start_scheduler(self):
- if self._scheduler_task is not None:
- return
- logger.info("Starting archive auto-purge sweeper")
- self._scheduler_task = asyncio.create_task(self._scheduler_loop())
- def stop_scheduler(self):
- if self._scheduler_task:
- self._scheduler_task.cancel()
- self._scheduler_task = None
- logger.info("Stopped archive auto-purge sweeper")
- async def _scheduler_loop(self):
- while True:
- try:
- await asyncio.sleep(self._check_interval)
- async with _database.async_session() as db:
- await self._maybe_run_auto_purge(db)
- except asyncio.CancelledError:
- break
- except Exception as e: # pragma: no cover - defensive
- logger.error("Error in archive auto-purge sweeper: %s", e)
- await asyncio.sleep(60)
- # ---- Settings -----------------------------------------------------
- @staticmethod
- async def _read_setting(db: AsyncSession, key: str) -> str | None:
- result = await db.execute(select(Settings.value).where(Settings.key == key))
- return result.scalar_one_or_none()
- @staticmethod
- async def _write_setting(db: AsyncSession, key: str, value: str) -> None:
- result = await db.execute(select(Settings).where(Settings.key == key))
- row = result.scalar_one_or_none()
- if row is None:
- db.add(Settings(key=key, value=value))
- else:
- row.value = value
- async def get_settings(self, db: AsyncSession) -> dict:
- """Return ``{enabled, days, purge_stats}``. Missing keys default to
- disabled / 365d / soft-delete (Quick Stats preserved)."""
- enabled_raw = await self._read_setting(db, AUTO_PURGE_ENABLED_KEY)
- days_raw = await self._read_setting(db, AUTO_PURGE_DAYS_KEY)
- stats_raw = await self._read_setting(db, AUTO_PURGE_STATS_KEY)
- enabled = (enabled_raw or "false").lower() == "true"
- try:
- days = int(days_raw) if days_raw is not None else DEFAULT_AUTO_PURGE_DAYS
- except (TypeError, ValueError):
- days = DEFAULT_AUTO_PURGE_DAYS
- days = max(MIN_AUTO_PURGE_DAYS, min(MAX_AUTO_PURGE_DAYS, days))
- purge_stats = (stats_raw or "false").lower() == "true"
- return {"enabled": enabled, "days": days, "purge_stats": purge_stats}
- async def set_settings(self, db: AsyncSession, *, enabled: bool, days: int, purge_stats: bool = False) -> dict:
- clamped_days = max(MIN_AUTO_PURGE_DAYS, min(MAX_AUTO_PURGE_DAYS, int(days)))
- await self._write_setting(db, AUTO_PURGE_ENABLED_KEY, "true" if enabled else "false")
- await self._write_setting(db, AUTO_PURGE_DAYS_KEY, str(clamped_days))
- await self._write_setting(db, AUTO_PURGE_STATS_KEY, "true" if purge_stats else "false")
- await db.commit()
- return {"enabled": enabled, "days": clamped_days, "purge_stats": purge_stats}
- async def _get_last_run(self, db: AsyncSession) -> datetime | None:
- raw = await self._read_setting(db, AUTO_PURGE_LAST_RUN_KEY)
- if not raw:
- return None
- try:
- return datetime.fromisoformat(raw.replace("Z", "+00:00"))
- except ValueError:
- return None
- async def _stamp_last_run(self, db: AsyncSession, when: datetime) -> None:
- await self._write_setting(db, AUTO_PURGE_LAST_RUN_KEY, when.isoformat())
- await db.commit()
- async def _maybe_run_auto_purge(self, db: AsyncSession) -> int:
- """Run the auto-purge if enabled and >=24h has elapsed since last run."""
- cfg = await self.get_settings(db)
- if not cfg["enabled"]:
- return 0
- now = datetime.now(timezone.utc)
- last = await self._get_last_run(db)
- if last is not None and (now - last) < timedelta(hours=24):
- return 0
- deleted = await self.purge_older_than(
- db,
- older_than_days=cfg["days"],
- purge_stats=cfg["purge_stats"],
- )
- await self._stamp_last_run(db, now)
- if deleted:
- logger.info(
- "Archive auto-purge: %s %d archive(s) (threshold=%d days, purge_stats=%s)",
- "hard-deleted" if cfg["purge_stats"] else "soft-deleted",
- deleted,
- cfg["days"],
- cfg["purge_stats"],
- )
- return deleted
- # ---- Preview / purge ---------------------------------------------
- async def preview_purge(
- self,
- db: AsyncSession,
- older_than_days: int,
- sample_limit: int = 5,
- *,
- purge_stats: bool = False,
- ) -> dict:
- """Count + size of archives eligible for purge. Read-only.
- Soft-delete mode (default) excludes already-soft-deleted rows so the
- admin slider's "eligible" count matches what a fresh purge would
- actually touch. Hard-delete mode counts every row past the cutoff —
- already-soft-deleted rows are eligible for promotion to hard-delete.
- """
- if older_than_days < 1:
- return {
- "count": 0,
- "total_bytes": 0,
- "sample_filenames": [],
- "older_than_days": older_than_days,
- }
- now = datetime.now(timezone.utc)
- cutoff = _age_cutoff(now, older_than_days)
- last_activity = _last_activity_expr()
- clause = last_activity < cutoff
- count_stmt = select(func.count(PrintArchive.id)).where(clause)
- size_stmt = select(func.coalesce(func.sum(PrintArchive.file_size), 0)).where(clause)
- sample_stmt = select(PrintArchive.filename).where(clause).order_by(last_activity).limit(sample_limit)
- if not purge_stats:
- count_stmt = count_stmt.where(PrintArchive.deleted_at.is_(None))
- size_stmt = size_stmt.where(PrintArchive.deleted_at.is_(None))
- sample_stmt = sample_stmt.where(PrintArchive.deleted_at.is_(None))
- count_result = await db.execute(count_stmt)
- count = int(count_result.scalar() or 0)
- size_result = await db.execute(size_stmt)
- total_bytes = int(size_result.scalar() or 0)
- sample_result = await db.execute(sample_stmt)
- samples = [row[0] for row in sample_result.all()]
- return {
- "count": count,
- "total_bytes": total_bytes,
- "sample_filenames": samples,
- "older_than_days": older_than_days,
- }
- async def purge_older_than(
- self,
- db: AsyncSession,
- older_than_days: int,
- *,
- purge_stats: bool = False,
- ) -> int:
- """Bulk-delete archives older than ``older_than_days``. Returns count.
- Two modes, parameter-controlled (#1390):
- * ``purge_stats=False`` (default): each archive goes through
- :meth:`ArchiveService.soft_delete_archive` — files removed from disk
- and the row hidden via ``deleted_at``, but the linked
- ``PrintLogEntry`` rows are untouched so Quick Stats keeps every
- contribution (filament, cost, energy, time accuracy).
- * ``purge_stats=True``: linked log rows are hard-deleted up front and
- the archive row is hard-removed via
- :meth:`ArchiveService.delete_archive`. Matches the single-archive
- ``DELETE /archives/{id}?purge_stats=true`` semantics from #1343.
- Each delete runs in its own session so a commit-per-row doesn't churn
- the caller's session (matches how the sweeper uses
- :func:`_database.async_session` in production).
- """
- if older_than_days < 1:
- return 0
- now = datetime.now(timezone.utc)
- cutoff = _age_cutoff(now, older_than_days)
- # Soft-delete mode must also skip rows already soft-deleted, otherwise
- # a repeat sweeper run keeps re-touching the same rows. Hard-delete
- # mode doesn't filter — already-soft-deleted rows are eligible for
- # promotion to hard-delete when the user opts in.
- select_stmt = select(PrintArchive.id).where(_last_activity_expr() < cutoff)
- if not purge_stats:
- select_stmt = select_stmt.where(PrintArchive.deleted_at.is_(None))
- id_result = await db.execute(select_stmt)
- ids = [row[0] for row in id_result.all()]
- if not ids:
- return 0
- deleted = 0
- for archive_id in ids:
- async with _database.async_session() as delete_db:
- service = ArchiveService(delete_db)
- if purge_stats:
- # Hard-delete linked PrintLogEntry rows first so their
- # filament / cost contributions stop counting in /stats.
- # FK is ON DELETE SET NULL, so without this they'd
- # survive the archive row and keep showing up in totals
- # (#1343 / #1378 / #1390).
- from sqlalchemy import delete as sa_delete
- from backend.app.models.print_log import PrintLogEntry
- await delete_db.execute(sa_delete(PrintLogEntry).where(PrintLogEntry.archive_id == archive_id))
- await delete_db.commit()
- if await service.delete_archive(archive_id):
- deleted += 1
- else:
- if await service.soft_delete_archive(archive_id):
- deleted += 1
- if deleted:
- logger.info(
- "Archive purge: %s %d archive(s) (older_than_days=%d, purge_stats=%s)",
- "hard-deleted" if purge_stats else "soft-deleted",
- deleted,
- older_than_days,
- purge_stats,
- )
- return deleted
- archive_purge_service = ArchivePurgeService()
|