1 month ago · 46c246c504
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/backend/app/api/routes/printers.py
+++ b/backend/app/api/routes/printers.py
@@ -29,9 +29,11 @@ from backend.app.schemas.printer import (
 
				     PrintOptionsResponse,
			
 
				 )
			
 
				 from backend.app.services.bambu_ftp import (
			
 
				+    cache_3mf_download,
			
 
				     delete_file_async,
			
 
				     download_file_bytes_async,
			
 
				     download_file_try_paths_async,
			
 
				+    get_cached_3mf,
			
 
				     get_storage_info_async,
			
 
				     list_files_async,
			
 
				 )
			
@@ -792,43 +794,61 @@ async def get_printer_cover(
 
				     temp_path = settings.archive_dir / "temp" / f"cover_{printer_id}_{temp_filename}"
			
 
				     temp_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				 
			
 
				-    logger.info(
			
 
				-        f"Trying to download cover for '{subtask_name}' from {printer.ip_address} (trying {len(remote_paths)} paths)"
			
 
				-    )
			
 
				-
			
 
				-    # Retry logic for transient FTP failures
			
 
				-    max_retries = 2
			
 
				-    last_error = None
			
 
				+    # Cache check (#972): the archive-metadata flow in main.py may have already
			
 
				+    # downloaded this 3MF during the print-start handler. Reusing that file
			
 
				+    # avoids a second 36MB transfer competing with the printer's single FTP
			
 
				+    # socket (which produces the 425 errors that feed the retry storm).
			
 
				     downloaded = False
			
 
				-
			
 
				-    for attempt in range(max_retries + 1):
			
 
				-        try:
			
 
				-            downloaded = await download_file_try_paths_async(
			
 
				-                printer.ip_address,
			
 
				-                printer.access_code,
			
 
				-                remote_paths,
			
 
				-                temp_path,
			
 
				-                printer_model=printer.model,
			
 
				-            )
			
 
				-            if downloaded:
			
 
				-                break
			
 
				-        except Exception as e:
			
 
				-            last_error = e
			
 
				-            if attempt < max_retries:
			
 
				-                logger.warning("FTP download attempt %s failed: %s, retrying...", attempt + 1, e)
			
 
				-                await asyncio.sleep(0.5 * (attempt + 1))  # Brief backoff
			
 
				-            else:
			
 
				-                logger.error("FTP download failed after %s attempts: %s", max_retries + 1, e)
			
 
				-
			
 
				-    if last_error and not downloaded:
			
 
				-        raise HTTPException(503, f"FTP download temporarily unavailable: {last_error}")
			
 
				+    using_cached = False
			
 
				+    for candidate_name in possible_filenames:
			
 
				+        cached = get_cached_3mf(printer_id, candidate_name)
			
 
				+        if cached:
			
 
				+            logger.info("Cover using cached 3MF from %s (avoided duplicate FTP)", cached)
			
 
				+            temp_path = cached
			
 
				+            downloaded = True
			
 
				+            using_cached = True
			
 
				+            break
			
 
				 
			
 
				     if not downloaded:
			
 
				-        raise HTTPException(
			
 
				-            404,
			
 
				-            f"Could not download 3MF file for '{subtask_name}' from printer {printer.ip_address}. Tried: {possible_filenames}",
			
 
				+        logger.info(
			
 
				+            f"Trying to download cover for '{subtask_name}' from {printer.ip_address} (trying {len(remote_paths)} paths)"
			
 
				         )
			
 
				 
			
 
				+        # Retry logic for transient FTP failures
			
 
				+        max_retries = 2
			
 
				+        last_error = None
			
 
				+
			
 
				+        for attempt in range(max_retries + 1):
			
 
				+            try:
			
 
				+                downloaded = await download_file_try_paths_async(
			
 
				+                    printer.ip_address,
			
 
				+                    printer.access_code,
			
 
				+                    remote_paths,
			
 
				+                    temp_path,
			
 
				+                    printer_model=printer.model,
			
 
				+                )
			
 
				+                if downloaded:
			
 
				+                    break
			
 
				+            except Exception as e:
			
 
				+                last_error = e
			
 
				+                if attempt < max_retries:
			
 
				+                    logger.warning("FTP download attempt %s failed: %s, retrying...", attempt + 1, e)
			
 
				+                    await asyncio.sleep(0.5 * (attempt + 1))  # Brief backoff
			
 
				+                else:
			
 
				+                    logger.error("FTP download failed after %s attempts: %s", max_retries + 1, e)
			
 
				+
			
 
				+        if last_error and not downloaded:
			
 
				+            raise HTTPException(503, f"FTP download temporarily unavailable: {last_error}")
			
 
				+
			
 
				+        if not downloaded:
			
 
				+            raise HTTPException(
			
 
				+                404,
			
 
				+                f"Could not download 3MF file for '{subtask_name}' from printer {printer.ip_address}. Tried: {possible_filenames}",
			
 
				+            )
			
 
				+
			
 
				+        # Share the fresh download with the archive flow.
			
 
				+        cache_3mf_download(printer_id, temp_filename, temp_path)
			
 
				+
			
 
				     # Verify file actually exists and has content
			
 
				     if not temp_path.exists():
			
 
				         raise HTTPException(500, f"Download reported success but file not found: {temp_path}")
			
@@ -837,7 +857,8 @@ async def get_printer_cover(
 
				     logger.info("Downloaded file size: %s bytes", file_size)
			
 
				 
			
 
				     if file_size == 0:
			
 
				-        temp_path.unlink()
			
 
				+        if not using_cached:
			
 
				+            temp_path.unlink()
			
 
				         raise HTTPException(500, f"Downloaded file is empty for '{subtask_name}'")
			
 
				 
			
 
				     try:
			
@@ -900,7 +921,10 @@ async def get_printer_cover(
 
				             zf.close()
			
 
				 
			
 
				     finally:
			
 
				-        if temp_path.exists():
			
 
				+        # Only delete when this invocation owns the file. A cached path is
			
 
				+        # shared with the archive flow — removing it would force a refetch
			
 
				+        # the next time either flow needs the 3MF.
			
 
				+        if not using_cached and temp_path.exists():
			
 
				             temp_path.unlink()
			
 
				 
			
 
				 
			
--- a/backend/app/core/database.py
+++ b/backend/app/core/database.py
@@ -1427,6 +1427,12 @@ async def run_migrations(conn):
 
				     # energy tracking survives a backend restart mid-print.
			
 
				     await _safe_execute(conn, "ALTER TABLE print_archives ADD COLUMN energy_start_kwh REAL")
			
 
				 
			
 
				+    # Migration: Add subtask_id to print_archives (#972)
			
 
				+    # MQTT-provided task identifier used to resume the same archive row across a
			
 
				+    # backend restart mid-print. Without it, a long print (e.g. 13h) triggers
			
 
				+    # stale-cancel + new-archive, losing started_at continuity.
			
 
				+    await _safe_execute(conn, "ALTER TABLE print_archives ADD COLUMN subtask_id VARCHAR(64)")
			
 
				+
			
 
				     # Migration: Create smart_plug_energy_snapshots table (#941)
			
 
				     # Hourly snapshots of each plug's lifetime counter, so date-range queries in
			
 
				     # "total consumption" energy mode can compute (last - first) deltas.
			
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -63,7 +63,15 @@ from backend.app.core.websocket import ws_manager
 
				 from backend.app.models.smart_plug import SmartPlug
			
 
				 from backend.app.services.archive import ArchiveService
			
 
				 from backend.app.services.background_dispatch import background_dispatch
			
 
				-from backend.app.services.bambu_ftp import download_file_async, get_ftp_retry_settings, with_ftp_retry
			
 
				+from backend.app.services.bambu_ftp import (
			
 
				+    FileNotOnPrinterError,
			
 
				+    cache_3mf_download,
			
 
				+    clear_3mf_cache,
			
 
				+    download_file_async,
			
 
				+    get_cached_3mf,
			
 
				+    get_ftp_retry_settings,
			
 
				+    with_ftp_retry,
			
 
				+)
			
 
				 from backend.app.services.bambu_mqtt import PrinterState
			
 
				 from backend.app.services.github_backup import github_backup_service
			
 
				 from backend.app.services.homeassistant import homeassistant_service
			
@@ -1493,6 +1501,18 @@ async def on_print_start(printer_id: int, data: dict):
 
				         filename = data.get("filename", "")
			
 
				         subtask_name = data.get("subtask_name", "")
			
 
				 
			
 
				+        # MQTT subtask_id uniquely identifies a print job on the printer. When
			
 
				+        # present, it lets us match an archive across a backend restart (#972):
			
 
				+        # same id → same print → resume the existing row instead of cancelling
			
 
				+        # it and recreating from scratch (which loses started_at). Treat "0"
			
 
				+        # and "" as absent — Bambu reports "0" for non-cloud / local prints.
			
 
				+        raw_mqtt = data.get("raw_data") or {}
			
 
				+        subtask_id = raw_mqtt.get("subtask_id")
			
 
				+        if subtask_id is not None:
			
 
				+            subtask_id = str(subtask_id).strip()
			
 
				+            if subtask_id in ("", "0"):
			
 
				+                subtask_id = None
			
 
				+
			
 
				         logger.info("[CALLBACK] Print start detected - filename: %s, subtask: %s", filename, subtask_name)
			
 
				 
			
 
				         # Skip calibration prints — internal printer files should not be archived
			
@@ -1548,6 +1568,8 @@ async def on_print_start(printer_id: int, data: dict):
 
				                 # Update archive status to printing
			
 
				                 archive.status = "printing"
			
 
				                 archive.started_at = datetime.now(timezone.utc)
			
 
				+                if subtask_id and not archive.subtask_id:
			
 
				+                    archive.subtask_id = subtask_id
			
 
				                 await db.commit()
			
 
				 
			
 
				                 # Track as active print
			
@@ -1619,29 +1641,80 @@ async def on_print_start(printer_id: int, data: dict):
 
				         # This prevents duplicates when backend restarts during an active print
			
 
				         from backend.app.models.archive import PrintArchive
			
 
				 
			
 
				-        check_name = subtask_name or filename.split("/")[-1].replace(".gcode", "").replace(".3mf", "")
			
 
				-        existing = await db.execute(
			
 
				-            select(PrintArchive)
			
 
				-            .where(PrintArchive.printer_id == printer_id)
			
 
				-            .where(PrintArchive.status == "printing")
			
 
				-            .where(
			
 
				-                or_(
			
 
				-                    PrintArchive.print_name == check_name,
			
 
				-                    PrintArchive.filename.in_(
			
 
				-                        [
			
 
				-                            f"{check_name}.3mf",
			
 
				-                            f"{check_name}.gcode.3mf",
			
 
				-                        ]
			
 
				-                    ),
			
 
				+        existing_archive: PrintArchive | None = None
			
 
				+
			
 
				+        # Preferred match: subtask_id equality. MQTT reports the same subtask_id
			
 
				+        # across a backend restart for the same print, so this is the most
			
 
				+        # reliable way to reattach. We also accept a previously stale-cancelled
			
 
				+        # archive here so users upgrading mid-print get revived when the row
			
 
				+        # their earlier Bambuddy version wrongly cancelled reappears (#972).
			
 
				+        if subtask_id:
			
 
				+            by_id = await db.execute(
			
 
				+                select(PrintArchive)
			
 
				+                .where(PrintArchive.printer_id == printer_id)
			
 
				+                .where(PrintArchive.subtask_id == subtask_id)
			
 
				+                .where(PrintArchive.status.in_(["printing", "cancelled"]))
			
 
				+                .order_by(PrintArchive.created_at.desc())
			
 
				+                .limit(1)
			
 
				+            )
			
 
				+            candidate = by_id.scalar_one_or_none()
			
 
				+            if candidate and (candidate.status == "printing" or (candidate.failure_reason or "").startswith("Stale")):
			
 
				+                existing_archive = candidate
			
 
				+
			
 
				+        # Fallback match: name-based lookup. Kept as-is for prints whose
			
 
				+        # subtask_id is missing ("0" / local / non-cloud prints).
			
 
				+        if existing_archive is None:
			
 
				+            check_name = subtask_name or filename.split("/")[-1].replace(".gcode", "").replace(".3mf", "")
			
 
				+            existing = await db.execute(
			
 
				+                select(PrintArchive)
			
 
				+                .where(PrintArchive.printer_id == printer_id)
			
 
				+                .where(PrintArchive.status == "printing")
			
 
				+                .where(
			
 
				+                    or_(
			
 
				+                        PrintArchive.print_name == check_name,
			
 
				+                        PrintArchive.filename.in_(
			
 
				+                            [
			
 
				+                                f"{check_name}.3mf",
			
 
				+                                f"{check_name}.gcode.3mf",
			
 
				+                            ]
			
 
				+                        ),
			
 
				+                    )
			
 
				                 )
			
 
				+                .order_by(PrintArchive.created_at.desc())
			
 
				+                .limit(1)
			
 
				             )
			
 
				-            .order_by(PrintArchive.created_at.desc())
			
 
				-            .limit(1)
			
 
				-        )
			
 
				-        existing_archive = existing.scalar_one_or_none()
			
 
				+            existing_archive = existing.scalar_one_or_none()
			
 
				+
			
 
				         if existing_archive:
			
 
				-            # Check if archive is stale (older than 4 hours) - likely a failed/cancelled print
			
 
				-            # that didn't get properly updated
			
 
				+            # subtask_id match → always resume, regardless of age. Same print,
			
 
				+            # just a backend restart. Revive if it was previously stale-cancelled.
			
 
				+            subtask_match = bool(subtask_id and existing_archive.subtask_id == subtask_id)
			
 
				+
			
 
				+            if subtask_match:
			
 
				+                if existing_archive.status == "cancelled":
			
 
				+                    logger.warning(
			
 
				+                        "Reviving stale-cancelled archive %s — matching subtask_id %s confirms same print (#972)",
			
 
				+                        existing_archive.id,
			
 
				+                        subtask_id,
			
 
				+                    )
			
 
				+                    existing_archive.status = "printing"
			
 
				+                    existing_archive.failure_reason = None
			
 
				+                    await db.commit()
			
 
				+                else:
			
 
				+                    logger.info("Resuming archive %s on subtask_id match (%s)", existing_archive.id, subtask_id)
			
 
				+                _active_prints[(printer_id, existing_archive.filename)] = existing_archive.id
			
 
				+                if existing_archive.energy_start_kwh is None:
			
 
				+                    await _record_energy_start(existing_archive, printer_id, db, context="subtask-resume")
			
 
				+                if not notification_sent:
			
 
				+                    archive_data = {
			
 
				+                        "print_time_seconds": existing_archive.print_time_seconds,
			
 
				+                        "created_by_id": existing_archive.created_by_id,
			
 
				+                    }
			
 
				+                    await _send_print_start_notification(printer_id, data, archive_data, logger)
			
 
				+                _load_objects_from_archive(existing_archive, printer_id, logger)
			
 
				+                return
			
 
				+
			
 
				+            # Name-match only: fall back to the legacy 4h staleness heuristic.
			
 
				             archive_age = datetime.now(timezone.utc) - existing_archive.created_at.replace(tzinfo=timezone.utc)
			
 
				             if archive_age.total_seconds() > 4 * 60 * 60:  # 4 hours
			
 
				                 logger.warning(
			
@@ -1659,6 +1732,10 @@ async def on_print_start(printer_id: int, data: dict):
 
				                 )
			
 
				                 # Track this as the active print
			
 
				                 _active_prints[(printer_id, existing_archive.filename)] = existing_archive.id
			
 
				+                # Attach subtask_id retroactively so future restarts can resume
			
 
				+                if subtask_id and not existing_archive.subtask_id:
			
 
				+                    existing_archive.subtask_id = subtask_id
			
 
				+                    await db.commit()
			
 
				                 # Also set up energy tracking if not already tracked (#941: persisted column)
			
 
				                 if existing_archive.energy_start_kwh is None:
			
 
				                     await _record_energy_start(existing_archive, printer_id, db, context="existing-printing")
			
@@ -1714,19 +1791,38 @@ async def on_print_start(printer_id: int, data: dict):
 
				         temp_path = None
			
 
				         downloaded_filename = None
			
 
				 
			
 
				+        # Cache check: cover endpoint may have already pulled this 3MF during
			
 
				+        # the print (frontend opens the card and shows the thumbnail) — reuse
			
 
				+        # that file instead of re-downloading 36MB over the same FTP link that
			
 
				+        # just served it (#972). The cache keys on a normalized filename so
			
 
				+        # variants like "X", "X.3mf", "X.gcode.3mf" all collapse to one entry.
			
 
				+        for try_filename in possible_names:
			
 
				+            if not try_filename.endswith(".3mf"):
			
 
				+                continue
			
 
				+            cached = get_cached_3mf(printer_id, try_filename)
			
 
				+            if cached:
			
 
				+                logger.info("Reusing cached 3MF from %s (avoided duplicate FTP)", cached)
			
 
				+                temp_path = cached
			
 
				+                downloaded_filename = try_filename
			
 
				+                break
			
 
				+
			
 
				         # Get FTP retry settings
			
 
				         ftp_retry_enabled, ftp_retry_count, ftp_retry_delay, ftp_timeout = await get_ftp_retry_settings()
			
 
				 
			
 
				-        for try_filename in possible_names:
			
 
				+        for try_filename in possible_names if not downloaded_filename else []:
			
 
				             if not try_filename.endswith(".3mf"):
			
 
				                 continue
			
 
				 
			
 
				+            # Root (/) is where BambuStudio/OrcaSlicer uploads land on A1/P1-series
			
 
				+            # printers, so try it first — deferring it to last cost #972's reporter
			
 
				+            # ~48 minutes of retries on /cache//model//data//data/Metadata before
			
 
				+            # landing on the path that actually had the file.
			
 
				             remote_paths = [
			
 
				+                f"/{try_filename}",
			
 
				                 f"/cache/{try_filename}",
			
 
				                 f"/model/{try_filename}",
			
 
				                 f"/data/{try_filename}",
			
 
				                 f"/data/Metadata/{try_filename}",
			
 
				-                f"/{try_filename}",
			
 
				             ]
			
 
				 
			
 
				             temp_path = app_settings.archive_dir / "temp" / try_filename
			
@@ -1748,6 +1844,7 @@ async def on_print_start(printer_id: int, data: dict):
 
				                             max_retries=ftp_retry_count,
			
 
				                             retry_delay=ftp_retry_delay,
			
 
				                             operation_name=f"Download 3MF from {remote_path}",
			
 
				+                            non_retry_exceptions=(FileNotOnPrinterError,),
			
 
				                         )
			
 
				                     else:
			
 
				                         downloaded = await download_file_async(
			
@@ -1762,7 +1859,14 @@ async def on_print_start(printer_id: int, data: dict):
 
				                     if downloaded:
			
 
				                         downloaded_filename = try_filename
			
 
				                         logger.info("Downloaded: %s", remote_path)
			
 
				+                        # Populate shared cache so the cover endpoint (if it
			
 
				+                        # runs next) doesn't refetch the same 36MB over FTP.
			
 
				+                        cache_3mf_download(printer_id, try_filename, temp_path)
			
 
				                         break
			
 
				+                except FileNotOnPrinterError:
			
 
				+                    # 550 — file isn't at this path. Advance to next candidate
			
 
				+                    # without burning the retry budget.
			
 
				+                    logger.debug("3MF not at %s (550), trying next path", remote_path)
			
 
				                 except Exception as e:
			
 
				                     logger.debug("FTP download failed for %s: %s", remote_path, e)
			
 
				 
			
@@ -1826,6 +1930,7 @@ async def on_print_start(printer_id: int, data: dict):
 
				                             if downloaded:
			
 
				                                 downloaded_filename = fname
			
 
				                                 logger.info("Found and downloaded from %s: %s", search_dir, fname)
			
 
				+                                cache_3mf_download(printer_id, fname, temp_path)
			
 
				                                 break
			
 
				                 except Exception as e:
			
 
				                     logger.debug("Failed to list %s: %s", search_dir, e)
			
@@ -1866,6 +1971,7 @@ async def on_print_start(printer_id: int, data: dict):
 
				                     print_time_seconds=fallback_print_time,
			
 
				                     status="printing",
			
 
				                     started_at=datetime.now(timezone.utc),
			
 
				+                    subtask_id=subtask_id,
			
 
				                     extra_data={"no_3mf_available": True, "original_subtask": subtask_name, "_print_data": data},
			
 
				                 )
			
 
				 
			
@@ -1951,6 +2057,7 @@ async def on_print_start(printer_id: int, data: dict):
 
				                 printer_id=printer_id,
			
 
				                 source_file=temp_path,
			
 
				                 print_data={**data, "status": "printing"},
			
 
				+                subtask_id=subtask_id,
			
 
				             )
			
 
				 
			
 
				             if archive:
			
@@ -2055,7 +2162,12 @@ async def on_print_start(printer_id: int, data: dict):
 
				                 except Exception as e:
			
 
				                     logger.warning("[TIMELAPSE] Failed to capture baseline at print start: %s", e)
			
 
				         finally:
			
 
				-            if temp_path and temp_path.exists():
			
 
				+            # Keep temp_path around until print completes so the cover endpoint
			
 
				+            # can reuse it (#972). Cache eviction in on_print_complete deletes
			
 
				+            # the file. If the cache entry was evicted early (file vanished),
			
 
				+            # clean up any stragglers here to avoid leaking disk on retries.
			
 
				+            cached_now = get_cached_3mf(printer_id, downloaded_filename) if downloaded_filename else None
			
 
				+            if temp_path and temp_path.exists() and cached_now != temp_path:
			
 
				                 temp_path.unlink()
			
 
				 
			
 
				 
			
@@ -2297,6 +2409,11 @@ async def on_print_complete(printer_id: int, data: dict):
 
				 
			
 
				     logger.info("[CALLBACK] on_print_complete started for printer %s", printer_id)
			
 
				 
			
 
				+    # Drop the 3MF download cache for this printer (#972). The print is over,
			
 
				+    # nothing else legitimately needs the bytes; keeping them would only risk
			
 
				+    # handing a stale file to the next print if it reuses the same name.
			
 
				+    clear_3mf_cache(printer_id)
			
 
				+
			
 
				     try:
			
 
				         ws_data = {
			
 
				             "status": data.get("status"),
			
--- a/backend/app/models/archive.py
+++ b/backend/app/models/archive.py
@@ -43,6 +43,12 @@ class PrintArchive(Base):
 
				     started_at: Mapped[datetime | None] = mapped_column(DateTime)
			
 
				     completed_at: Mapped[datetime | None] = mapped_column(DateTime)
			
 
				 
			
 
				+    # Printer-assigned subtask identifier from MQTT. Used to resume the same
			
 
				+    # archive row across a backend restart during a long-running print (#972):
			
 
				+    # if the same subtask_id reappears after restart, we know it's the same
			
 
				+    # print and keep the original row instead of cancel-then-create.
			
 
				+    subtask_id: Mapped[str | None] = mapped_column(String(64), nullable=True)
			
 
				+
			
 
				     # Extended metadata (JSON blob for flexibility)
			
 
				     extra_data: Mapped[dict | None] = mapped_column(JSON)
			
 
				 
			
--- a/backend/app/services/archive.py
+++ b/backend/app/services/archive.py
@@ -855,6 +855,7 @@ class ArchiveService:
 
				         created_by_id: int | None = None,
			
 
				         original_filename: str | None = None,
			
 
				         project_id: int | None = None,
			
 
				+        subtask_id: str | None = None,
			
 
				     ) -> PrintArchive | None:
			
 
				         """Archive a 3MF file with metadata.
			
 
				 
			
@@ -867,6 +868,9 @@ class ArchiveService:
 
				                 stored with UUID names)
			
 
				             project_id: Project to associate this archive with (optional, set when triggered
			
 
				                 from the project view)
			
 
				+            subtask_id: MQTT-provided task identifier (optional). Used to match an
			
 
				+                existing archive across a backend restart mid-print so the
			
 
				+                original row can be resumed instead of cancelled (#972).
			
 
				         """
			
 
				         # Verify printer exists if specified
			
 
				         if printer_id is not None:
			
@@ -978,6 +982,7 @@ class ArchiveService:
 
				             extra_data=metadata,
			
 
				             created_by_id=created_by_id,
			
 
				             project_id=project_id,
			
 
				+            subtask_id=subtask_id,
			
 
				         )
			
 
				 
			
 
				         self.db.add(archive)
			
--- a/backend/app/services/bambu_ftp.py
+++ b/backend/app/services/bambu_ftp.py
@@ -16,6 +16,17 @@ logger = logging.getLogger(__name__)
 
				 T = TypeVar("T")
			
 
				 
			
 
				 
			
 
				+class FileNotOnPrinterError(Exception):
			
 
				+    """Raised when a remote FTP path returns 550 (file not found).
			
 
				+
			
 
				+    550 means the file does not exist at that path — retrying the same path
			
 
				+    will never succeed. Callers use this sentinel with with_ftp_retry's
			
 
				+    non_retry_exceptions to immediately move on to the next candidate path
			
 
				+    instead of burning the full retry budget (up to 11 × 30s per path) on
			
 
				+    a lookup that cannot recover.
			
 
				+    """
			
 
				+
			
 
				+
			
 
				 class ImplicitFTP_TLS(FTP_TLS):
			
 
				     """FTP_TLS subclass for implicit FTPS (port 990) with model-specific SSL handling.
			
 
				 
			
@@ -280,14 +291,21 @@ class BambuFTPClient:
 
				             logger.info("Successfully downloaded %s to %s (%s bytes)", remote_path, local_path, file_size)
			
 
				             return True
			
 
				         except (OSError, ftplib.Error) as e:
			
 
				-            # Log at INFO level so we can see failures in normal logs
			
 
				-            logger.info("FTP download failed for %s: %s", remote_path, e)
			
 
				             # Clean up partial file if it exists
			
 
				             if local_path.exists():
			
 
				                 try:
			
 
				                     local_path.unlink()
			
 
				                 except OSError:
			
 
				                     pass  # Best-effort partial file cleanup; not critical if removal fails
			
 
				+            # 550 means the file is not at this path. Surface as a sentinel so
			
 
				+            # with_ftp_retry can abandon this path immediately and the caller
			
 
				+            # can advance to the next candidate instead of retrying 11× at
			
 
				+            # 30s intervals (the pattern that cost #972's reporter ~48min).
			
 
				+            if isinstance(e, ftplib.error_perm) and str(e).startswith("550"):
			
 
				+                logger.info("FTP download failed for %s: %s (not on printer)", remote_path, e)
			
 
				+                raise FileNotOnPrinterError(f"{remote_path}: {e}") from e
			
 
				+            # Log at INFO level so we can see failures in normal logs
			
 
				+            logger.info("FTP download failed for %s: %s", remote_path, e)
			
 
				             return False
			
 
				 
			
 
				     def diagnose_storage(self) -> dict:
			
@@ -597,6 +615,79 @@ class BambuFTPClient:
 
				         return result if result else None
			
 
				 
			
 
				 
			
 
				+# Shared 3MF download cache (#972).
			
 
				+#
			
 
				+# Both the cover thumbnail endpoint (api/routes/printers.py) and the archive
			
 
				+# metadata flow (main.py) fetch the same 3MF file over FTP during a print.
			
 
				+# On slow / contended links (A1 Wi-Fi, large files) the duplicate transfers
			
 
				+# compete for the printer's single FTP socket and trigger 425 "can't open
			
 
				+# data channel" errors, feeding back into cause-2's retry storm.
			
 
				+#
			
 
				+# This cache stores the local path of a successfully-downloaded 3MF keyed
			
 
				+# by (printer_id, normalized_name). Whichever flow downloads first populates
			
 
				+# the cache; the other flow reuses the file read-only. Evicted on print
			
 
				+# completion so a later print with the same name re-downloads fresh bytes.
			
 
				+_threemf_path_cache: dict[tuple[int, str], Path] = {}
			
 
				+
			
 
				+
			
 
				+def normalize_3mf_name(name: str) -> str:
			
 
				+    """Collapse various 3MF filename variants to a cache key.
			
 
				+
			
 
				+    Bambu tooling produces names as bare subtask ("Part"), with .3mf, with
			
 
				+    .gcode.3mf, or (Studio-normalized) with spaces → underscores. All of
			
 
				+    these refer to the same print job on the same printer, so they must
			
 
				+    hash to the same cache key.
			
 
				+    """
			
 
				+    # Lowercase first so .3MF / .GCODE.3MF variants strip cleanly — a
			
 
				+    # real-world case since Windows-side tooling sometimes uppercases
			
 
				+    # extensions.
			
 
				+    cleaned = name.strip().lower().replace(".gcode.3mf", "").replace(".gcode", "").replace(".3mf", "")
			
 
				+    return cleaned.replace(" ", "_")
			
 
				+
			
 
				+
			
 
				+def cache_3mf_download(printer_id: int, name: str, local_path: Path) -> None:
			
 
				+    """Record a successfully-downloaded 3MF so a sibling flow can reuse it."""
			
 
				+    _threemf_path_cache[(printer_id, normalize_3mf_name(name))] = local_path
			
 
				+
			
 
				+
			
 
				+def get_cached_3mf(printer_id: int, name: str) -> Path | None:
			
 
				+    """Return a cached 3MF path for this printer/name if the file still exists."""
			
 
				+    key = (printer_id, normalize_3mf_name(name))
			
 
				+    cached = _threemf_path_cache.get(key)
			
 
				+    if cached and cached.exists() and cached.stat().st_size > 0:
			
 
				+        return cached
			
 
				+    # Evict dead entry — the file was cleaned up (temp dir clean, manual
			
 
				+    # deletion, restart) so the cache value is no longer usable.
			
 
				+    if cached:
			
 
				+        _threemf_path_cache.pop(key, None)
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def clear_3mf_cache(printer_id: int | None = None, delete_files: bool = True) -> None:
			
 
				+    """Drop cache entries for one printer (or all with None).
			
 
				+
			
 
				+    When ``delete_files`` is True (default) the on-disk 3MF is removed as well
			
 
				+    — called from on_print_complete so temp files don't accumulate across
			
 
				+    prints. Tests that want to inspect the cache contents disable this.
			
 
				+    """
			
 
				+
			
 
				+    def _maybe_unlink(path: Path) -> None:
			
 
				+        if delete_files and path.exists():
			
 
				+            try:
			
 
				+                path.unlink()
			
 
				+            except OSError as exc:
			
 
				+                logger.debug("3MF cache cleanup skipped %s: %s", path, exc)
			
 
				+
			
 
				+    if printer_id is None:
			
 
				+        for path in list(_threemf_path_cache.values()):
			
 
				+            _maybe_unlink(path)
			
 
				+        _threemf_path_cache.clear()
			
 
				+        return
			
 
				+    for key in [k for k in _threemf_path_cache if k[0] == printer_id]:
			
 
				+        _maybe_unlink(_threemf_path_cache[key])
			
 
				+        _threemf_path_cache.pop(key, None)
			
 
				+
			
 
				+
			
 
				 async def download_file_async(
			
 
				     ip_address: str,
			
 
				     access_code: str,
			
@@ -711,7 +802,16 @@ async def download_file_try_paths_async(
 
				             return False
			
 
				 
			
 
				         try:
			
 
				-            return any(client.download_to_file(remote_path, local_path) for remote_path in remote_paths)
			
 
				+            # FileNotOnPrinterError signals "try the next path", not "give up" —
			
 
				+            # this function's whole purpose is to walk a list of candidates
			
 
				+            # over one connection. Only a real transport error should bubble.
			
 
				+            for remote_path in remote_paths:
			
 
				+                try:
			
 
				+                    if client.download_to_file(remote_path, local_path):
			
 
				+                        return True
			
 
				+                except FileNotOnPrinterError:
			
 
				+                    continue
			
 
				+            return False
			
 
				         finally:
			
 
				             client.disconnect()
			
 
				 
			
--- a/backend/tests/unit/services/test_bambu_ftp.py
+++ b/backend/tests/unit/services/test_bambu_ftp.py
@@ -20,11 +20,17 @@ import pytest
 
				 
			
 
				 from backend.app.services.bambu_ftp import (
			
 
				     BambuFTPClient,
			
 
				+    FileNotOnPrinterError,
			
 
				+    cache_3mf_download,
			
 
				+    clear_3mf_cache,
			
 
				     delete_file_async,
			
 
				     download_file_async,
			
 
				     download_file_try_paths_async,
			
 
				+    get_cached_3mf,
			
 
				     list_files_async,
			
 
				+    normalize_3mf_name,
			
 
				     upload_file_async,
			
 
				+    with_ftp_retry,
			
 
				 )
			
 
				 
			
 
				 # Brief delay to allow pyftpdlib to flush uploaded files to disk.
			
@@ -265,14 +271,19 @@ class TestDownload:
 
				         assert not local.exists()
			
 
				         client.disconnect()
			
 
				 
			
 
				-    def test_download_to_file_missing_returns_false(self, ftp_client_factory, tmp_path):
			
 
				-        """Missing file returns False."""
			
 
				+    def test_download_to_file_missing_raises_not_on_printer(self, ftp_client_factory, tmp_path):
			
 
				+        """Missing file raises FileNotOnPrinterError so callers can short-circuit
			
 
				+        the retry loop — 550 means the file isn't there and retrying won't help."""
			
 
				+        from backend.app.services.bambu_ftp import FileNotOnPrinterError
			
 
				+
			
 
				         local = tmp_path / "missing.bin"
			
 
				         client = ftp_client_factory()
			
 
				         client.connect()
			
 
				-        result = client.download_to_file("/cache/no_such_file.bin", local)
			
 
				-        assert result is False
			
 
				-        client.disconnect()
			
 
				+        try:
			
 
				+            with pytest.raises(FileNotOnPrinterError):
			
 
				+                client.download_to_file("/cache/no_such_file.bin", local)
			
 
				+        finally:
			
 
				+            client.disconnect()
			
 
				 
			
 
				     def test_download_large_file(self, ftp_client_factory, ftp_server):
			
 
				         """Large file download (>1MB) works correctly."""
			
@@ -1001,3 +1012,156 @@ class TestFailureScenarios:
 
				             downloaded = client2.download_file("/cache/voidresp_test.3mf")
			
 
				             assert downloaded == content, f"Content mismatch for model={model}"
			
 
				             client2.disconnect()
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# Short-circuit retries on 550 (#972)
			
 
				+# ---------------------------------------------------------------------------
			
 
				+class TestFileNotOnPrinterShortCircuit:
			
 
				+    """FileNotOnPrinterError must bypass the retry budget.
			
 
				+
			
 
				+    Before this fix, a 3MF path that wasn't on the printer (550) cost
			
 
				+    `ftp_retry_count + 1` attempts × `ftp_retry_delay` seconds per candidate
			
 
				+    path. With ftp_retry_count=10 and four candidate paths, that's ~22 min
			
 
				+    of dead retries before the real path is tried. #972 in the wild showed
			
 
				+    48 min of retrying paths that didn't exist.
			
 
				+    """
			
 
				+
			
 
				+    async def test_with_ftp_retry_propagates_file_not_on_printer_without_retrying(self):
			
 
				+        """with_ftp_retry raises FileNotOnPrinterError on first attempt.
			
 
				+
			
 
				+        Verifies non_retry_exceptions short-circuits before the retry loop
			
 
				+        has a chance to sleep and try again.
			
 
				+        """
			
 
				+        attempts = {"n": 0}
			
 
				+
			
 
				+        async def always_missing(*_args, **_kwargs):
			
 
				+            attempts["n"] += 1
			
 
				+            raise FileNotOnPrinterError("/cache/absent.3mf: 550")
			
 
				+
			
 
				+        with pytest.raises(FileNotOnPrinterError):
			
 
				+            await with_ftp_retry(
			
 
				+                always_missing,
			
 
				+                max_retries=10,
			
 
				+                retry_delay=0.01,
			
 
				+                operation_name="test 550 short-circuit",
			
 
				+                non_retry_exceptions=(FileNotOnPrinterError,),
			
 
				+            )
			
 
				+
			
 
				+        assert attempts["n"] == 1, "550 must not trigger any retry"
			
 
				+
			
 
				+    async def test_with_ftp_retry_still_retries_transient_errors(self):
			
 
				+        """Non-550 exceptions continue to retry up to max_retries + 1."""
			
 
				+        attempts = {"n": 0}
			
 
				+
			
 
				+        async def flaky(*_args, **_kwargs):
			
 
				+            attempts["n"] += 1
			
 
				+            raise TimeoutError("transient")
			
 
				+
			
 
				+        result = await with_ftp_retry(
			
 
				+            flaky,
			
 
				+            max_retries=2,
			
 
				+            retry_delay=0.01,
			
 
				+            operation_name="test transient retries",
			
 
				+            non_retry_exceptions=(FileNotOnPrinterError,),
			
 
				+        )
			
 
				+
			
 
				+        assert result is None
			
 
				+        assert attempts["n"] == 3, "Transient errors should retry to exhaustion"
			
 
				+
			
 
				+    def test_download_to_file_raises_on_missing_path(self, ftp_client_factory, tmp_path):
			
 
				+        """download_to_file surfaces 550 as FileNotOnPrinterError end-to-end
			
 
				+        against the real mock FTPS server, not just a hand-rolled mock."""
			
 
				+        local = tmp_path / "never_downloaded.3mf"
			
 
				+        client = ftp_client_factory()
			
 
				+        client.connect()
			
 
				+        try:
			
 
				+            with pytest.raises(FileNotOnPrinterError):
			
 
				+                client.download_to_file("/cache/does_not_exist.3mf", local)
			
 
				+        finally:
			
 
				+            client.disconnect()
			
 
				+        assert not local.exists(), "Partial file must be cleaned up on 550"
			
 
				+
			
 
				+
			
 
				+# ---------------------------------------------------------------------------
			
 
				+# 3MF download cache (#972)
			
 
				+# ---------------------------------------------------------------------------
			
 
				+class TestThreeMFCache:
			
 
				+    """Cover endpoint and archive flow share downloaded 3MF bytes via this
			
 
				+    cache. Tests isolate themselves with clear_3mf_cache(delete_files=False)
			
 
				+    so they don't clobber each other."""
			
 
				+
			
 
				+    def setup_method(self):
			
 
				+        clear_3mf_cache(delete_files=False)
			
 
				+
			
 
				+    def teardown_method(self):
			
 
				+        clear_3mf_cache(delete_files=False)
			
 
				+
			
 
				+    def test_normalize_collapses_filename_variants(self):
			
 
				+        """Bambu names vary (.3mf, .gcode.3mf, with spaces) — they all map
			
 
				+        to the same cache slot so both flows agree on the key."""
			
 
				+        canonical = normalize_3mf_name("Broly_Legendary.gcode.3mf")
			
 
				+        assert normalize_3mf_name("Broly_Legendary.3mf") == canonical
			
 
				+        assert normalize_3mf_name("Broly_Legendary") == canonical
			
 
				+        # Bambu Studio rewrites spaces to underscores on upload — treat as equal
			
 
				+        assert normalize_3mf_name("Broly Legendary") == canonical
			
 
				+        # Case is also collapsed so keys match across capitalizations
			
 
				+        assert normalize_3mf_name("BROLY_LEGENDARY.3MF") == canonical
			
 
				+
			
 
				+    def test_cache_hit_returns_stored_path(self, tmp_path):
			
 
				+        """get_cached_3mf returns the same Path that was put in."""
			
 
				+        f = tmp_path / "Broly.gcode.3mf"
			
 
				+        f.write_bytes(b"fake 3mf content")
			
 
				+        cache_3mf_download(1, "Broly.gcode.3mf", f)
			
 
				+        assert get_cached_3mf(1, "Broly.gcode.3mf") == f
			
 
				+
			
 
				+    def test_cache_lookup_uses_normalized_name(self, tmp_path):
			
 
				+        """Caching under .gcode.3mf and querying with bare name still hits."""
			
 
				+        f = tmp_path / "Broly.gcode.3mf"
			
 
				+        f.write_bytes(b"x")
			
 
				+        cache_3mf_download(1, "Broly.gcode.3mf", f)
			
 
				+        assert get_cached_3mf(1, "Broly.3mf") == f
			
 
				+        assert get_cached_3mf(1, "Broly") == f
			
 
				+
			
 
				+    def test_cache_miss_on_different_printer(self, tmp_path):
			
 
				+        """Printer id is part of the key — two printers never collide."""
			
 
				+        f = tmp_path / "A.3mf"
			
 
				+        f.write_bytes(b"x")
			
 
				+        cache_3mf_download(1, "A.3mf", f)
			
 
				+        assert get_cached_3mf(2, "A.3mf") is None
			
 
				+
			
 
				+    def test_cache_evicts_when_file_deleted(self, tmp_path):
			
 
				+        """Stale entry (file gone) returns None and is dropped from the dict."""
			
 
				+        f = tmp_path / "A.3mf"
			
 
				+        f.write_bytes(b"x")
			
 
				+        cache_3mf_download(1, "A.3mf", f)
			
 
				+        f.unlink()
			
 
				+        assert get_cached_3mf(1, "A.3mf") is None
			
 
				+        # Re-populating after eviction works — no ghost entries remain.
			
 
				+        f.write_bytes(b"y")
			
 
				+        cache_3mf_download(1, "A.3mf", f)
			
 
				+        assert get_cached_3mf(1, "A.3mf") == f
			
 
				+
			
 
				+    def test_clear_by_printer_scoped(self, tmp_path):
			
 
				+        """Clearing one printer leaves the other untouched."""
			
 
				+        f1 = tmp_path / "one.3mf"
			
 
				+        f1.write_bytes(b"1")
			
 
				+        f2 = tmp_path / "two.3mf"
			
 
				+        f2.write_bytes(b"2")
			
 
				+        cache_3mf_download(1, "one.3mf", f1)
			
 
				+        cache_3mf_download(2, "two.3mf", f2)
			
 
				+        clear_3mf_cache(1)
			
 
				+        assert get_cached_3mf(1, "one.3mf") is None
			
 
				+        assert get_cached_3mf(2, "two.3mf") == f2
			
 
				+        # clear_3mf_cache defaulted to delete_files=True, so the file is gone
			
 
				+        assert not f1.exists()
			
 
				+        assert f2.exists()
			
 
				+
			
 
				+    def test_clear_without_deleting_files(self, tmp_path):
			
 
				+        """delete_files=False leaves files on disk — used by tests."""
			
 
				+        f = tmp_path / "keep.3mf"
			
 
				+        f.write_bytes(b"x")
			
 
				+        cache_3mf_download(1, "keep.3mf", f)
			
 
				+        clear_3mf_cache(1, delete_files=False)
			
 
				+        assert get_cached_3mf(1, "keep.3mf") is None
			
 
				+        assert f.exists()
			
--- a/backend/tests/unit/test_subtask_archive_resume.py
+++ b/backend/tests/unit/test_subtask_archive_resume.py
@@ -0,0 +1,185 @@
 
				+"""Regression tests for subtask_id-based archive resume (#972).
			
 
				+
			
 
				+Before this fix, a Bambuddy restart during a long print (e.g. 13h) triggered
			
 
				+the name-based "stale archive" path at 4h, cancelled the original row, and
			
 
				+created a new archive with `started_at = now()` — losing ~9h of print time
			
 
				+continuity. mstko reported this on a 37.5MB Broly print on an A1: after a
			
 
				+container restart mid-print, the archive ended up showing ~1h37m duration
			
 
				+for a print that actually ran 13h08m.
			
 
				+
			
 
				+The fix stores `subtask_id` (MQTT-provided job identifier) on the archive row.
			
 
				+On print-start detection, the handler first tries to match an existing
			
 
				+archive by subtask_id regardless of age — same id ⇒ same print ⇒ resume.
			
 
				+Only unmatched prints fall through to the legacy 4h staleness heuristic.
			
 
				+"""
			
 
				+
			
 
				+from datetime import datetime, timedelta, timezone
			
 
				+
			
 
				+import pytest
			
 
				+from sqlalchemy import select
			
 
				+
			
 
				+from backend.app.models.archive import PrintArchive
			
 
				+
			
 
				+
			
 
				+def _extract_subtask_id(data: dict) -> str | None:
			
 
				+    """Mirrors the extraction logic in main.on_print_start.
			
 
				+
			
 
				+    Hoisted here so the test can pin the contract: Bambu reports "0" and
			
 
				+    empty string for local / non-cloud prints, both of which must collapse
			
 
				+    to None so we don't match every non-cloud print to every other one.
			
 
				+    """
			
 
				+    raw = data.get("raw_data") or {}
			
 
				+    val = raw.get("subtask_id")
			
 
				+    if val is None:
			
 
				+        return None
			
 
				+    val = str(val).strip()
			
 
				+    if val in ("", "0"):
			
 
				+        return None
			
 
				+    return val
			
 
				+
			
 
				+
			
 
				+class TestSubtaskIdExtraction:
			
 
				+    """subtask_id extraction mirrors the in-handler logic."""
			
 
				+
			
 
				+    def test_valid_id_returns_string(self):
			
 
				+        assert _extract_subtask_id({"raw_data": {"subtask_id": "12345"}}) == "12345"
			
 
				+
			
 
				+    def test_zero_collapses_to_none(self):
			
 
				+        """Bambu reports '0' for local (non-cloud) prints; must not match anything."""
			
 
				+        assert _extract_subtask_id({"raw_data": {"subtask_id": "0"}}) is None
			
 
				+
			
 
				+    def test_empty_collapses_to_none(self):
			
 
				+        assert _extract_subtask_id({"raw_data": {"subtask_id": ""}}) is None
			
 
				+
			
 
				+    def test_missing_raw_data(self):
			
 
				+        assert _extract_subtask_id({}) is None
			
 
				+
			
 
				+    def test_missing_subtask_id(self):
			
 
				+        assert _extract_subtask_id({"raw_data": {"foo": "bar"}}) is None
			
 
				+
			
 
				+    def test_integer_value_stringified(self):
			
 
				+        """MQTT may send the id as an int — coerce consistently."""
			
 
				+        assert _extract_subtask_id({"raw_data": {"subtask_id": 12345}}) == "12345"
			
 
				+
			
 
				+    def test_whitespace_trimmed(self):
			
 
				+        assert _extract_subtask_id({"raw_data": {"subtask_id": "  42  "}}) == "42"
			
 
				+
			
 
				+
			
 
				+class TestSubtaskIdResume:
			
 
				+    """End-to-end DB behavior of the resume path: a second on_print_start
			
 
				+    for the same subtask_id must find and reuse the first archive row."""
			
 
				+
			
 
				+    @pytest.fixture
			
 
				+    async def archive_factory(self, db_session, printer_factory):
			
 
				+        printer = await printer_factory()
			
 
				+
			
 
				+        async def _create(
			
 
				+            subtask_id: str | None = None,
			
 
				+            status: str = "printing",
			
 
				+            age_hours: float = 0,
			
 
				+            failure_reason: str | None = None,
			
 
				+        ):
			
 
				+            started = datetime.now(timezone.utc) - timedelta(hours=age_hours)
			
 
				+            archive = PrintArchive(
			
 
				+                printer_id=printer.id,
			
 
				+                filename="Broly_Legendary.gcode.3mf",
			
 
				+                file_path="archive/1/x/Broly.gcode.3mf",
			
 
				+                file_size=100,
			
 
				+                print_name="Broly_Legendary",
			
 
				+                status=status,
			
 
				+                started_at=started,
			
 
				+                subtask_id=subtask_id,
			
 
				+                failure_reason=failure_reason,
			
 
				+            )
			
 
				+            # Override server_default on created_at so age-based tests work
			
 
				+            archive.created_at = started
			
 
				+            db_session.add(archive)
			
 
				+            await db_session.commit()
			
 
				+            await db_session.refresh(archive)
			
 
				+            return printer, archive
			
 
				+
			
 
				+        return _create
			
 
				+
			
 
				+    async def test_subtask_id_query_finds_matching_printing_row(self, archive_factory, db_session):
			
 
				+        """The lookup used by main.on_print_start finds a matching row even
			
 
				+        when the archive is older than the 4h name-based staleness cutoff."""
			
 
				+        printer, archive = await archive_factory(subtask_id="t-123", age_hours=10)
			
 
				+
			
 
				+        result = await db_session.execute(
			
 
				+            select(PrintArchive)
			
 
				+            .where(PrintArchive.printer_id == printer.id)
			
 
				+            .where(PrintArchive.subtask_id == "t-123")
			
 
				+            .where(PrintArchive.status.in_(["printing", "cancelled"]))
			
 
				+            .order_by(PrintArchive.created_at.desc())
			
 
				+            .limit(1)
			
 
				+        )
			
 
				+        found = result.scalar_one_or_none()
			
 
				+        assert found is not None
			
 
				+        assert found.id == archive.id
			
 
				+
			
 
				+    async def test_subtask_id_revives_stale_cancelled_row(self, archive_factory, db_session):
			
 
				+        """If an older Bambuddy wrongly cancelled the archive (legacy 4h path),
			
 
				+        the next print-start with the same subtask_id must revive it rather
			
 
				+        than start a third row."""
			
 
				+        printer, archive = await archive_factory(
			
 
				+            subtask_id="t-456",
			
 
				+            status="cancelled",
			
 
				+            failure_reason="Stale - print likely cancelled or failed without status update",
			
 
				+            age_hours=10,
			
 
				+        )
			
 
				+
			
 
				+        result = await db_session.execute(
			
 
				+            select(PrintArchive)
			
 
				+            .where(PrintArchive.printer_id == printer.id)
			
 
				+            .where(PrintArchive.subtask_id == "t-456")
			
 
				+            .where(PrintArchive.status.in_(["printing", "cancelled"]))
			
 
				+            .order_by(PrintArchive.created_at.desc())
			
 
				+            .limit(1)
			
 
				+        )
			
 
				+        candidate = result.scalar_one_or_none()
			
 
				+        assert candidate is not None
			
 
				+
			
 
				+        # Revival mirrors the main.py logic: only revive stale-cancelled rows,
			
 
				+        # not user-cancelled ones. The failure_reason prefix is the signal.
			
 
				+        is_stale_cancelled = (candidate.failure_reason or "").startswith("Stale")
			
 
				+        assert is_stale_cancelled
			
 
				+
			
 
				+        candidate.status = "printing"
			
 
				+        candidate.failure_reason = None
			
 
				+        await db_session.commit()
			
 
				+        await db_session.refresh(candidate)
			
 
				+
			
 
				+        assert candidate.status == "printing"
			
 
				+        # Crucially, started_at is preserved — this is the whole point of the
			
 
				+        # fix. A fresh archive would have started_at = now, losing continuity.
			
 
				+        age_after = datetime.now(timezone.utc) - candidate.started_at.replace(tzinfo=timezone.utc)
			
 
				+        assert age_after > timedelta(hours=9), "started_at must survive revival"
			
 
				+
			
 
				+    async def test_subtask_id_null_does_not_match_other_nulls(self, archive_factory, db_session):
			
 
				+        """Two different non-cloud prints both have subtask_id=NULL. They
			
 
				+        must NOT match each other via the subtask_id lookup (which is why
			
 
				+        the handler filters by `subtask_id IS NOT NULL` in the Python layer
			
 
				+        before even running this query)."""
			
 
				+        printer, _archive = await archive_factory(subtask_id=None, age_hours=1)
			
 
				+
			
 
				+        # This shape of query (subtask_id == None) would return rows via
			
 
				+        # SQLAlchemy's NULL handling, but the handler only runs it when
			
 
				+        # subtask_id is truthy — so the query is never issued for NULL.
			
 
				+        # Assert the guard by testing the subtask_id != "" branch.
			
 
				+        result = await db_session.execute(select(PrintArchive).where(PrintArchive.subtask_id == ""))
			
 
				+        found = result.scalar_one_or_none()
			
 
				+        assert found is None, "Empty string must not match NULL rows"
			
 
				+
			
 
				+    async def test_completed_archive_not_resumed(self, archive_factory, db_session):
			
 
				+        """A completed archive with the same subtask_id must not be reopened
			
 
				+        as printing — that subtask's job is done; a new run is a new row."""
			
 
				+        printer, _ = await archive_factory(subtask_id="t-789", status="completed")
			
 
				+
			
 
				+        result = await db_session.execute(
			
 
				+            select(PrintArchive)
			
 
				+            .where(PrintArchive.printer_id == printer.id)
			
 
				+            .where(PrintArchive.subtask_id == "t-789")
			
 
				+            .where(PrintArchive.status.in_(["printing", "cancelled"]))
			
 
				+        )
			
 
				+        found = result.scalar_one_or_none()
			
 
				+        assert found is None