6 dní pred · 745ed847e6
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -2039,8 +2039,23 @@ async def on_print_start(printer_id: int, data: dict):
 
				                 # Update archive status to printing
			
 
				                 archive.status = "printing"
			
 
				                 archive.started_at = datetime.now(timezone.utc)
			
 
				-                if subtask_id and not archive.subtask_id:
			
 
				-                    archive.subtask_id = subtask_id
			
 
				+                # Persist a restart-stable id so a later restart resumes this
			
 
				+                # archive by subtask_id instead of name-matching + duplicating
			
 
				+                # it (#1485). The printer often hasn't echoed subtask_id back
			
 
				+                # this soon after dispatch, so fall back to the id Bambuddy
			
 
				+                # minted when it sent the print command. Scoped to this
			
 
				+                # expected-print branch on purpose: an expected match means
			
 
				+                # Bambuddy dispatched this exact print in this process, so the
			
 
				+                # client's last-dispatch id genuinely belongs to it — using it
			
 
				+                # for an externally-started print could mis-tag the archive.
			
 
				+                effective_subtask_id = subtask_id
			
 
				+                if not effective_subtask_id:
			
 
				+                    _client = printer_manager.get_client(printer_id)
			
 
				+                    _dispatched = getattr(_client, "last_dispatch_subtask_id", None) if _client else None
			
 
				+                    if _dispatched:
			
 
				+                        effective_subtask_id = str(_dispatched).strip() or None
			
 
				+                if effective_subtask_id and not archive.subtask_id:
			
 
				+                    archive.subtask_id = effective_subtask_id
			
 
				                 # #1403 follow-up: VP-queue archives are created with
			
 
				                 # printer_id=None at queue-add time (we don't know which
			
 
				                 # printer will run the job yet). When the print actually
			
@@ -2208,18 +2223,31 @@ async def on_print_start(printer_id: int, data: dict):
 
				                 _load_objects_from_archive(existing_archive, printer_id, logger)
			
 
				                 return
			
 
				 
			
 
				-            # Name-match only: fall back to the legacy 4h staleness heuristic.
			
 
				+            # Name-match only (no subtask_id to anchor on): decide resume vs.
			
 
				+            # stale from the printer's *current* progress, not wall-clock age.
			
 
				+            # A genuinely long print used to trip a blind 4h cutoff and have its
			
 
				+            # live archive cancelled + duplicated on every backend restart
			
 
				+            # (#1485). If the printer reports real progress, this name-matched
			
 
				+            # 'printing' archive IS that ongoing print — resume it whatever its
			
 
				+            # age. Only treat it as a stale leftover when the printer clearly
			
 
				+            # shows a different, freshly-started print: near-0% progress on an
			
 
				+            # archive far too old to still be at 0%. Unknown progress (printer
			
 
				+            # not connected) never cancels — resuming is the safe default.
			
 
				             archive_age = datetime.now(timezone.utc) - existing_archive.created_at.replace(tzinfo=timezone.utc)
			
 
				-            if archive_age.total_seconds() > 4 * 60 * 60:  # 4 hours
			
 
				+            live_status = printer_manager.get_status(printer_id)
			
 
				+            live_progress = getattr(live_status, "progress", None) if live_status else None
			
 
				+            looks_stale = (
			
 
				+                live_progress is not None and live_progress < 1.0 and archive_age.total_seconds() > 2 * 60 * 60
			
 
				+            )
			
 
				+            if looks_stale:
			
 
				                 logger.warning(
			
 
				-                    f"Found stale 'printing' archive {existing_archive.id} (age: {archive_age}), "
			
 
				-                    f"marking as cancelled and creating new archive"
			
 
				+                    f"Found stale 'printing' archive {existing_archive.id} (age: {archive_age}, "
			
 
				+                    f"printer progress {live_progress:.0f}%) — marking cancelled and creating new archive"
			
 
				                 )
			
 
				                 existing_archive.status = "cancelled"
			
 
				                 existing_archive.failure_reason = "Stale - print likely cancelled or failed without status update"
			
 
				                 await db.commit()
			
 
				                 # Fall through to create new archive (don't return)
			
 
				-                _existing_archive = None  # Clear so we don't use stale archive
			
 
				             else:
			
 
				                 logger.info(
			
 
				                     f"Skipping duplicate - already have printing archive {existing_archive.id} for {check_name}"
			
--- a/backend/app/services/bambu_mqtt.py
+++ b/backend/app/services/bambu_mqtt.py
@@ -362,6 +362,12 @@ class BambuMQTTClient:
 
				         self._timelapse_during_print: bool = False  # Track if timelapse was active during this print
			
 
				         self._last_valid_progress: float = 0.0  # Last non-zero progress (firmware resets on cancel)
			
 
				         self._last_valid_layer_num: int = 0  # Last non-zero layer (firmware resets on cancel)
			
 
				+        # The subtask_id minted for the most recent start_print() command. The
			
 
				+        # printer echoes it back in status, but often not within the first few
			
 
				+        # seconds — so on_print_start uses this as the id source when the
			
 
				+        # printer hasn't reported it yet, letting queue/scheduled archives
			
 
				+        # persist a restart-stable id from the moment they dispatch (#1485).
			
 
				+        self.last_dispatch_subtask_id: str | None = None
			
 
				         self._is_dual_nozzle: bool = False  # Set when device.extruder.info has >= 2 entries
			
 
				         self._message_log: deque[MQTTLogEntry] = deque(maxlen=100)
			
 
				         self._logging_enabled: bool = False
			
@@ -3358,6 +3364,9 @@ class BambuMQTTClient:
 
				             # Modulo keeps uniqueness within a ~24-day wrap window; `or 1` guards
			
 
				             # the (astronomically unlikely) zero case since task_id=0 is rejected.
			
 
				             submission_id = str(int(time.time() * 1000) % 2_147_483_647 or 1)
			
 
				+            # Remember it so on_print_start can persist a restart-stable id on
			
 
				+            # the archive even before the printer echoes subtask_id back (#1485).
			
 
				+            self.last_dispatch_subtask_id = submission_id
			
 
				 
			
 
				             command = {
			
 
				                 "print": {
			
--- a/backend/tests/unit/services/test_bambu_mqtt.py
+++ b/backend/tests/unit/services/test_bambu_mqtt.py
@@ -3906,6 +3906,24 @@ class TestStartPrintUniqueIdentityFields:
 
				         assert int(cmd["task_id"]) > 0
			
 
				         assert len(cmd["task_id"]) <= 64
			
 
				 
			
 
				+    def test_last_dispatch_subtask_id_records_the_minted_id(self, mqtt_client):
			
 
				+        """#1485: start_print records the minted id on the client so
			
 
				+        on_print_start can persist it on the archive before the printer
			
 
				+        echoes subtask_id back — letting a later restart resume by id."""
			
 
				+        assert mqtt_client.last_dispatch_subtask_id is None
			
 
				+        mqtt_client.start_print("test.3mf")
			
 
				+        cmd = self._get_published_command(mqtt_client)
			
 
				+        assert mqtt_client.last_dispatch_subtask_id == cmd["subtask_id"]
			
 
				+
			
 
				+    def test_last_dispatch_subtask_id_updates_per_submission(self, mqtt_client):
			
 
				+        """Each dispatch overwrites the recorded id with the new submission's."""
			
 
				+        mqtt_client.start_print("test.3mf")
			
 
				+        first = mqtt_client.last_dispatch_subtask_id
			
 
				+        time.sleep(0.002)
			
 
				+        mqtt_client.start_print("test.3mf")
			
 
				+        assert mqtt_client.last_dispatch_subtask_id != first
			
 
				+        assert mqtt_client.last_dispatch_subtask_id == self._get_published_command(mqtt_client)["subtask_id"]
			
 
				+
			
 
				     def test_submission_id_fits_signed_int32(self, mqtt_client):
			
 
				         """Regression for #1042: P1S firmware clamps oversized task identity
			
 
				         fields to signed int32 max (2**31-1 = 2147483647). If we send raw
			
--- a/backend/tests/unit/test_subtask_archive_resume.py
+++ b/backend/tests/unit/test_subtask_archive_resume.py
@@ -10,7 +10,12 @@ for a print that actually ran 13h08m.
 
				 The fix stores `subtask_id` (MQTT-provided job identifier) on the archive row.
			
 
				 On print-start detection, the handler first tries to match an existing
			
 
				 archive by subtask_id regardless of age — same id ⇒ same print ⇒ resume.
			
 
				-Only unmatched prints fall through to the legacy 4h staleness heuristic.
			
 
				+Only unmatched prints fall through to the name-based fallback.
			
 
				+
			
 
				+#1485 follow-up: the name-based fallback no longer cancels on a blind 4h
			
 
				+age cutoff (which duplicated the archive of any genuinely long print on
			
 
				+every restart). It now decides resume-vs-stale from the printer's current
			
 
				+progress — see TestStaleVsResume.
			
 
				 """
			
 
				 
			
 
				 from datetime import datetime, timedelta, timezone
			
@@ -183,3 +188,48 @@ class TestSubtaskIdResume:
 
				         )
			
 
				         found = result.scalar_one_or_none()
			
 
				         assert found is None
			
 
				+
			
 
				+
			
 
				+def _looks_stale(live_progress: float | None, archive_age_seconds: float) -> bool:
			
 
				+    """Mirrors the name-fallback stale decision in main.on_print_start (#1485).
			
 
				+
			
 
				+    A name-matched 'printing' archive is treated as a stale leftover ONLY when
			
 
				+    the printer clearly shows a different, freshly-started print: near-0%
			
 
				+    progress on an archive far too old to still be at 0%. Real progress, or
			
 
				+    unknown progress (printer not connected), always resumes — the old blind
			
 
				+    4h age cutoff cancelled the live archive of every long print on restart.
			
 
				+    """
			
 
				+    return live_progress is not None and live_progress < 1.0 and archive_age_seconds > 2 * 60 * 60
			
 
				+
			
 
				+
			
 
				+class TestStaleVsResume:
			
 
				+    """The progress-aware replacement for the 4h staleness heuristic (#1485)."""
			
 
				+
			
 
				+    def test_long_print_in_progress_resumes_not_stale(self):
			
 
				+        """The reporter's case: a ~10h print, backend restarts, printer is
			
 
				+        mid-print at 60%. The old 4h cutoff cancelled + duplicated it; it
			
 
				+        must now resume regardless of age."""
			
 
				+        assert _looks_stale(60.0, archive_age_seconds=10 * 3600) is False
			
 
				+
			
 
				+    def test_barely_started_long_print_resumes(self):
			
 
				+        """A genuine print a few percent in is still the same print."""
			
 
				+        assert _looks_stale(3.0, archive_age_seconds=5 * 3600) is False
			
 
				+
			
 
				+    def test_fresh_print_with_old_archive_is_stale(self):
			
 
				+        """Printer reports a just-started print (~0%) but the matched archive
			
 
				+        is hours old — that archive is a dead leftover from a previous run."""
			
 
				+        assert _looks_stale(0.0, archive_age_seconds=9 * 3600) is True
			
 
				+
			
 
				+    def test_fresh_print_with_young_archive_resumes(self):
			
 
				+        """~0% progress on a young archive is just the same print still
			
 
				+        heating / leveling — not stale."""
			
 
				+        assert _looks_stale(0.0, archive_age_seconds=20 * 60) is False
			
 
				+
			
 
				+    def test_unknown_progress_never_cancels(self):
			
 
				+        """Printer not connected / progress unknown: resuming is the safe
			
 
				+        default — never cancel + duplicate when we can't tell."""
			
 
				+        assert _looks_stale(None, archive_age_seconds=10 * 3600) is False
			
 
				+
			
 
				+    def test_sub_one_percent_old_archive_is_stale(self):
			
 
				+        """The boundary: just under 1% past the 2h mark counts as stale."""
			
 
				+        assert _looks_stale(0.5, archive_age_seconds=3 * 3600) is True