test_subtask_archive_resume.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. """Regression tests for subtask_id-based archive resume (#972).
  2. Before this fix, a Bambuddy restart during a long print (e.g. 13h) triggered
  3. the name-based "stale archive" path at 4h, cancelled the original row, and
  4. created a new archive with `started_at = now()` — losing ~9h of print time
  5. continuity. mstko reported this on a 37.5MB Broly print on an A1: after a
  6. container restart mid-print, the archive ended up showing ~1h37m duration
  7. for a print that actually ran 13h08m.
  8. The fix stores `subtask_id` (MQTT-provided job identifier) on the archive row.
  9. On print-start detection, the handler first tries to match an existing
  10. archive by subtask_id regardless of age — same id ⇒ same print ⇒ resume.
  11. Only unmatched prints fall through to the name-based fallback.
  12. #1485 follow-up: the name-based fallback no longer cancels on a blind 4h
  13. age cutoff (which duplicated the archive of any genuinely long print on
  14. every restart). It now decides resume-vs-stale from the printer's current
  15. progress — see TestStaleVsResume.
  16. """
  17. from datetime import datetime, timedelta, timezone
  18. import pytest
  19. from sqlalchemy import select
  20. from backend.app.models.archive import PrintArchive
  21. def _extract_subtask_id(data: dict) -> str | None:
  22. """Mirrors the extraction logic in main.on_print_start.
  23. Hoisted here so the test can pin the contract: Bambu reports "0" and
  24. empty string for local / non-cloud prints, both of which must collapse
  25. to None so we don't match every non-cloud print to every other one.
  26. """
  27. raw = data.get("raw_data") or {}
  28. val = raw.get("subtask_id")
  29. if val is None:
  30. return None
  31. val = str(val).strip()
  32. if val in ("", "0"):
  33. return None
  34. return val
  35. class TestSubtaskIdExtraction:
  36. """subtask_id extraction mirrors the in-handler logic."""
  37. def test_valid_id_returns_string(self):
  38. assert _extract_subtask_id({"raw_data": {"subtask_id": "12345"}}) == "12345"
  39. def test_zero_collapses_to_none(self):
  40. """Bambu reports '0' for local (non-cloud) prints; must not match anything."""
  41. assert _extract_subtask_id({"raw_data": {"subtask_id": "0"}}) is None
  42. def test_empty_collapses_to_none(self):
  43. assert _extract_subtask_id({"raw_data": {"subtask_id": ""}}) is None
  44. def test_missing_raw_data(self):
  45. assert _extract_subtask_id({}) is None
  46. def test_missing_subtask_id(self):
  47. assert _extract_subtask_id({"raw_data": {"foo": "bar"}}) is None
  48. def test_integer_value_stringified(self):
  49. """MQTT may send the id as an int — coerce consistently."""
  50. assert _extract_subtask_id({"raw_data": {"subtask_id": 12345}}) == "12345"
  51. def test_whitespace_trimmed(self):
  52. assert _extract_subtask_id({"raw_data": {"subtask_id": " 42 "}}) == "42"
  53. class TestSubtaskIdResume:
  54. """End-to-end DB behavior of the resume path: a second on_print_start
  55. for the same subtask_id must find and reuse the first archive row."""
  56. @pytest.fixture
  57. async def archive_factory(self, db_session, printer_factory):
  58. printer = await printer_factory()
  59. async def _create(
  60. subtask_id: str | None = None,
  61. status: str = "printing",
  62. age_hours: float = 0,
  63. failure_reason: str | None = None,
  64. ):
  65. started = datetime.now(timezone.utc) - timedelta(hours=age_hours)
  66. archive = PrintArchive(
  67. printer_id=printer.id,
  68. filename="Broly_Legendary.gcode.3mf",
  69. file_path="archive/1/x/Broly.gcode.3mf",
  70. file_size=100,
  71. print_name="Broly_Legendary",
  72. status=status,
  73. started_at=started,
  74. subtask_id=subtask_id,
  75. failure_reason=failure_reason,
  76. )
  77. # Override server_default on created_at so age-based tests work
  78. archive.created_at = started
  79. db_session.add(archive)
  80. await db_session.commit()
  81. await db_session.refresh(archive)
  82. return printer, archive
  83. return _create
  84. async def test_subtask_id_query_finds_matching_printing_row(self, archive_factory, db_session):
  85. """The lookup used by main.on_print_start finds a matching row even
  86. when the archive is older than the 4h name-based staleness cutoff."""
  87. printer, archive = await archive_factory(subtask_id="t-123", age_hours=10)
  88. result = await db_session.execute(
  89. select(PrintArchive)
  90. .where(PrintArchive.printer_id == printer.id)
  91. .where(PrintArchive.subtask_id == "t-123")
  92. .where(PrintArchive.status.in_(["printing", "cancelled"]))
  93. .order_by(PrintArchive.created_at.desc())
  94. .limit(1)
  95. )
  96. found = result.scalar_one_or_none()
  97. assert found is not None
  98. assert found.id == archive.id
  99. async def test_subtask_id_revives_stale_cancelled_row(self, archive_factory, db_session):
  100. """If an older Bambuddy wrongly cancelled the archive (legacy 4h path),
  101. the next print-start with the same subtask_id must revive it rather
  102. than start a third row."""
  103. printer, archive = await archive_factory(
  104. subtask_id="t-456",
  105. status="cancelled",
  106. failure_reason="Stale - print likely cancelled or failed without status update",
  107. age_hours=10,
  108. )
  109. result = await db_session.execute(
  110. select(PrintArchive)
  111. .where(PrintArchive.printer_id == printer.id)
  112. .where(PrintArchive.subtask_id == "t-456")
  113. .where(PrintArchive.status.in_(["printing", "cancelled"]))
  114. .order_by(PrintArchive.created_at.desc())
  115. .limit(1)
  116. )
  117. candidate = result.scalar_one_or_none()
  118. assert candidate is not None
  119. # Revival mirrors the main.py logic: only revive stale-cancelled rows,
  120. # not user-cancelled ones. The failure_reason prefix is the signal.
  121. is_stale_cancelled = (candidate.failure_reason or "").startswith("Stale")
  122. assert is_stale_cancelled
  123. candidate.status = "printing"
  124. candidate.failure_reason = None
  125. await db_session.commit()
  126. await db_session.refresh(candidate)
  127. assert candidate.status == "printing"
  128. # Crucially, started_at is preserved — this is the whole point of the
  129. # fix. A fresh archive would have started_at = now, losing continuity.
  130. age_after = datetime.now(timezone.utc) - candidate.started_at.replace(tzinfo=timezone.utc)
  131. assert age_after > timedelta(hours=9), "started_at must survive revival"
  132. async def test_subtask_id_null_does_not_match_other_nulls(self, archive_factory, db_session):
  133. """Two different non-cloud prints both have subtask_id=NULL. They
  134. must NOT match each other via the subtask_id lookup (which is why
  135. the handler filters by `subtask_id IS NOT NULL` in the Python layer
  136. before even running this query)."""
  137. printer, _archive = await archive_factory(subtask_id=None, age_hours=1)
  138. # This shape of query (subtask_id == None) would return rows via
  139. # SQLAlchemy's NULL handling, but the handler only runs it when
  140. # subtask_id is truthy — so the query is never issued for NULL.
  141. # Assert the guard by testing the subtask_id != "" branch.
  142. result = await db_session.execute(select(PrintArchive).where(PrintArchive.subtask_id == ""))
  143. found = result.scalar_one_or_none()
  144. assert found is None, "Empty string must not match NULL rows"
  145. async def test_completed_archive_not_resumed(self, archive_factory, db_session):
  146. """A completed archive with the same subtask_id must not be reopened
  147. as printing — that subtask's job is done; a new run is a new row."""
  148. printer, _ = await archive_factory(subtask_id="t-789", status="completed")
  149. result = await db_session.execute(
  150. select(PrintArchive)
  151. .where(PrintArchive.printer_id == printer.id)
  152. .where(PrintArchive.subtask_id == "t-789")
  153. .where(PrintArchive.status.in_(["printing", "cancelled"]))
  154. )
  155. found = result.scalar_one_or_none()
  156. assert found is None
  157. def _looks_stale(live_progress: float | None, archive_age_seconds: float) -> bool:
  158. """Mirrors the name-fallback stale decision in main.on_print_start (#1485).
  159. A name-matched 'printing' archive is treated as a stale leftover ONLY when
  160. the printer clearly shows a different, freshly-started print: near-0%
  161. progress on an archive far too old to still be at 0%. Real progress, or
  162. unknown progress (printer not connected), always resumes — the old blind
  163. 4h age cutoff cancelled the live archive of every long print on restart.
  164. """
  165. return live_progress is not None and live_progress < 1.0 and archive_age_seconds > 2 * 60 * 60
  166. class TestStaleVsResume:
  167. """The progress-aware replacement for the 4h staleness heuristic (#1485)."""
  168. def test_long_print_in_progress_resumes_not_stale(self):
  169. """The reporter's case: a ~10h print, backend restarts, printer is
  170. mid-print at 60%. The old 4h cutoff cancelled + duplicated it; it
  171. must now resume regardless of age."""
  172. assert _looks_stale(60.0, archive_age_seconds=10 * 3600) is False
  173. def test_barely_started_long_print_resumes(self):
  174. """A genuine print a few percent in is still the same print."""
  175. assert _looks_stale(3.0, archive_age_seconds=5 * 3600) is False
  176. def test_fresh_print_with_old_archive_is_stale(self):
  177. """Printer reports a just-started print (~0%) but the matched archive
  178. is hours old — that archive is a dead leftover from a previous run."""
  179. assert _looks_stale(0.0, archive_age_seconds=9 * 3600) is True
  180. def test_fresh_print_with_young_archive_resumes(self):
  181. """~0% progress on a young archive is just the same print still
  182. heating / leveling — not stale."""
  183. assert _looks_stale(0.0, archive_age_seconds=20 * 60) is False
  184. def test_unknown_progress_never_cancels(self):
  185. """Printer not connected / progress unknown: resuming is the safe
  186. default — never cancel + duplicate when we can't tell."""
  187. assert _looks_stale(None, archive_age_seconds=10 * 3600) is False
  188. def test_sub_one_percent_old_archive_is_stale(self):
  189. """The boundary: just under 1% past the 2h mark counts as stale."""
  190. assert _looks_stale(0.5, archive_age_seconds=3 * 3600) is True