| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218 |
- """Archive auto-purge service (#1008 follow-up).
- Age-based hard-delete of print archives. Unlike the library trash flow there is
- no soft-delete intermediate — archives are historical print records, so the
- "undo" window the library bin provides doesn't apply here. A user who wants to
- keep an archive should download or favourite it before the purge window elapses.
- The sweeper runs on the same 15-minute cadence as the library trash sweeper but
- throttles actual purge runs to once per 24h. Admins can also trigger a manual
- purge from the Settings UI.
- """
- from __future__ import annotations
- import asyncio
- import logging
- from datetime import datetime, timedelta, timezone
- from sqlalchemy import func, select
- from sqlalchemy.ext.asyncio import AsyncSession
- from backend.app.core import database as _database
- from backend.app.models.archive import PrintArchive
- from backend.app.models.settings import Settings
- from backend.app.services.archive import ArchiveService
- logger = logging.getLogger(__name__)
- AUTO_PURGE_ENABLED_KEY = "archive_auto_purge_enabled"
- AUTO_PURGE_DAYS_KEY = "archive_auto_purge_days"
- AUTO_PURGE_LAST_RUN_KEY = "archive_auto_purge_last_run"
- DEFAULT_AUTO_PURGE_DAYS = 365
- # 7-day floor mirrors the library auto-purge; anything shorter treats archives
- # as ephemeral which is rarely what anyone wants.
- MIN_AUTO_PURGE_DAYS = 7
- MAX_AUTO_PURGE_DAYS = 3650
- def _age_cutoff(now: datetime, older_than_days: int) -> datetime:
- return now - timedelta(days=older_than_days)
- class ArchivePurgeService:
- """Manages archive auto-purge sweeper + admin-triggered manual purges."""
- def __init__(self):
- self._scheduler_task: asyncio.Task | None = None
- # Match library trash cadence — the 24h throttle keeps actual work rare.
- self._check_interval = 900
- async def start_scheduler(self):
- if self._scheduler_task is not None:
- return
- logger.info("Starting archive auto-purge sweeper")
- self._scheduler_task = asyncio.create_task(self._scheduler_loop())
- def stop_scheduler(self):
- if self._scheduler_task:
- self._scheduler_task.cancel()
- self._scheduler_task = None
- logger.info("Stopped archive auto-purge sweeper")
- async def _scheduler_loop(self):
- while True:
- try:
- await asyncio.sleep(self._check_interval)
- async with _database.async_session() as db:
- await self._maybe_run_auto_purge(db)
- except asyncio.CancelledError:
- break
- except Exception as e: # pragma: no cover - defensive
- logger.error("Error in archive auto-purge sweeper: %s", e)
- await asyncio.sleep(60)
- # ---- Settings -----------------------------------------------------
- @staticmethod
- async def _read_setting(db: AsyncSession, key: str) -> str | None:
- result = await db.execute(select(Settings.value).where(Settings.key == key))
- return result.scalar_one_or_none()
- @staticmethod
- async def _write_setting(db: AsyncSession, key: str, value: str) -> None:
- result = await db.execute(select(Settings).where(Settings.key == key))
- row = result.scalar_one_or_none()
- if row is None:
- db.add(Settings(key=key, value=value))
- else:
- row.value = value
- async def get_settings(self, db: AsyncSession) -> dict:
- """Return ``{enabled, days}``. Missing keys default to disabled / 365d."""
- enabled_raw = await self._read_setting(db, AUTO_PURGE_ENABLED_KEY)
- days_raw = await self._read_setting(db, AUTO_PURGE_DAYS_KEY)
- enabled = (enabled_raw or "false").lower() == "true"
- try:
- days = int(days_raw) if days_raw is not None else DEFAULT_AUTO_PURGE_DAYS
- except (TypeError, ValueError):
- days = DEFAULT_AUTO_PURGE_DAYS
- days = max(MIN_AUTO_PURGE_DAYS, min(MAX_AUTO_PURGE_DAYS, days))
- return {"enabled": enabled, "days": days}
- async def set_settings(self, db: AsyncSession, *, enabled: bool, days: int) -> dict:
- clamped_days = max(MIN_AUTO_PURGE_DAYS, min(MAX_AUTO_PURGE_DAYS, int(days)))
- await self._write_setting(db, AUTO_PURGE_ENABLED_KEY, "true" if enabled else "false")
- await self._write_setting(db, AUTO_PURGE_DAYS_KEY, str(clamped_days))
- await db.commit()
- return {"enabled": enabled, "days": clamped_days}
- async def _get_last_run(self, db: AsyncSession) -> datetime | None:
- raw = await self._read_setting(db, AUTO_PURGE_LAST_RUN_KEY)
- if not raw:
- return None
- try:
- return datetime.fromisoformat(raw.replace("Z", "+00:00"))
- except ValueError:
- return None
- async def _stamp_last_run(self, db: AsyncSession, when: datetime) -> None:
- await self._write_setting(db, AUTO_PURGE_LAST_RUN_KEY, when.isoformat())
- await db.commit()
- async def _maybe_run_auto_purge(self, db: AsyncSession) -> int:
- """Run the auto-purge if enabled and >=24h has elapsed since last run."""
- cfg = await self.get_settings(db)
- if not cfg["enabled"]:
- return 0
- now = datetime.now(timezone.utc)
- last = await self._get_last_run(db)
- if last is not None and (now - last) < timedelta(hours=24):
- return 0
- deleted = await self.purge_older_than(db, older_than_days=cfg["days"])
- await self._stamp_last_run(db, now)
- if deleted:
- logger.info(
- "Archive auto-purge: hard-deleted %d archive(s) (threshold=%d days)",
- deleted,
- cfg["days"],
- )
- return deleted
- # ---- Preview / purge ---------------------------------------------
- async def preview_purge(
- self,
- db: AsyncSession,
- older_than_days: int,
- sample_limit: int = 5,
- ) -> dict:
- """Count + size of archives eligible for purge. Read-only."""
- if older_than_days < 1:
- return {
- "count": 0,
- "total_bytes": 0,
- "sample_filenames": [],
- "older_than_days": older_than_days,
- }
- now = datetime.now(timezone.utc)
- cutoff = _age_cutoff(now, older_than_days)
- clause = PrintArchive.created_at < cutoff
- count_result = await db.execute(select(func.count(PrintArchive.id)).where(clause))
- count = int(count_result.scalar() or 0)
- size_result = await db.execute(select(func.coalesce(func.sum(PrintArchive.file_size), 0)).where(clause))
- total_bytes = int(size_result.scalar() or 0)
- sample_result = await db.execute(
- select(PrintArchive.filename).where(clause).order_by(PrintArchive.created_at).limit(sample_limit)
- )
- samples = [row[0] for row in sample_result.all()]
- return {
- "count": count,
- "total_bytes": total_bytes,
- "sample_filenames": samples,
- "older_than_days": older_than_days,
- }
- async def purge_older_than(self, db: AsyncSession, older_than_days: int) -> int:
- """Hard-delete archives older than ``older_than_days``. Returns count.
- Delegates to :meth:`ArchiveService.delete_archive` for every row so the
- on-disk cleanup (3MF, thumbnail, timelapse, photos) goes through the
- same safety-checked path as manual deletion. Each delete runs in its
- own session so a commit-per-row doesn't churn the caller's session
- (and matches how the sweeper uses :func:`_database.async_session` in production).
- """
- if older_than_days < 1:
- return 0
- now = datetime.now(timezone.utc)
- cutoff = _age_cutoff(now, older_than_days)
- id_result = await db.execute(select(PrintArchive.id).where(PrintArchive.created_at < cutoff))
- ids = [row[0] for row in id_result.all()]
- if not ids:
- return 0
- deleted = 0
- for archive_id in ids:
- async with _database.async_session() as delete_db:
- service = ArchiveService(delete_db)
- if await service.delete_archive(archive_id):
- deleted += 1
- if deleted:
- logger.info(
- "Archive purge: hard-deleted %d archive(s) (older_than_days=%d)",
- deleted,
- older_than_days,
- )
- return deleted
- archive_purge_service = ArchivePurgeService()
|