test_subtask_archive_resume.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. """Regression tests for subtask_id-based archive resume (#972).
  2. Before this fix, a Bambuddy restart during a long print (e.g. 13h) triggered
  3. the name-based "stale archive" path at 4h, cancelled the original row, and
  4. created a new archive with `started_at = now()` — losing ~9h of print time
  5. continuity. mstko reported this on a 37.5MB Broly print on an A1: after a
  6. container restart mid-print, the archive ended up showing ~1h37m duration
  7. for a print that actually ran 13h08m.
  8. The fix stores `subtask_id` (MQTT-provided job identifier) on the archive row.
  9. On print-start detection, the handler first tries to match an existing
  10. archive by subtask_id regardless of age — same id ⇒ same print ⇒ resume.
  11. Only unmatched prints fall through to the legacy 4h staleness heuristic.
  12. """
  13. from datetime import datetime, timedelta, timezone
  14. import pytest
  15. from sqlalchemy import select
  16. from backend.app.models.archive import PrintArchive
  17. def _extract_subtask_id(data: dict) -> str | None:
  18. """Mirrors the extraction logic in main.on_print_start.
  19. Hoisted here so the test can pin the contract: Bambu reports "0" and
  20. empty string for local / non-cloud prints, both of which must collapse
  21. to None so we don't match every non-cloud print to every other one.
  22. """
  23. raw = data.get("raw_data") or {}
  24. val = raw.get("subtask_id")
  25. if val is None:
  26. return None
  27. val = str(val).strip()
  28. if val in ("", "0"):
  29. return None
  30. return val
  31. class TestSubtaskIdExtraction:
  32. """subtask_id extraction mirrors the in-handler logic."""
  33. def test_valid_id_returns_string(self):
  34. assert _extract_subtask_id({"raw_data": {"subtask_id": "12345"}}) == "12345"
  35. def test_zero_collapses_to_none(self):
  36. """Bambu reports '0' for local (non-cloud) prints; must not match anything."""
  37. assert _extract_subtask_id({"raw_data": {"subtask_id": "0"}}) is None
  38. def test_empty_collapses_to_none(self):
  39. assert _extract_subtask_id({"raw_data": {"subtask_id": ""}}) is None
  40. def test_missing_raw_data(self):
  41. assert _extract_subtask_id({}) is None
  42. def test_missing_subtask_id(self):
  43. assert _extract_subtask_id({"raw_data": {"foo": "bar"}}) is None
  44. def test_integer_value_stringified(self):
  45. """MQTT may send the id as an int — coerce consistently."""
  46. assert _extract_subtask_id({"raw_data": {"subtask_id": 12345}}) == "12345"
  47. def test_whitespace_trimmed(self):
  48. assert _extract_subtask_id({"raw_data": {"subtask_id": " 42 "}}) == "42"
  49. class TestSubtaskIdResume:
  50. """End-to-end DB behavior of the resume path: a second on_print_start
  51. for the same subtask_id must find and reuse the first archive row."""
  52. @pytest.fixture
  53. async def archive_factory(self, db_session, printer_factory):
  54. printer = await printer_factory()
  55. async def _create(
  56. subtask_id: str | None = None,
  57. status: str = "printing",
  58. age_hours: float = 0,
  59. failure_reason: str | None = None,
  60. ):
  61. started = datetime.now(timezone.utc) - timedelta(hours=age_hours)
  62. archive = PrintArchive(
  63. printer_id=printer.id,
  64. filename="Broly_Legendary.gcode.3mf",
  65. file_path="archive/1/x/Broly.gcode.3mf",
  66. file_size=100,
  67. print_name="Broly_Legendary",
  68. status=status,
  69. started_at=started,
  70. subtask_id=subtask_id,
  71. failure_reason=failure_reason,
  72. )
  73. # Override server_default on created_at so age-based tests work
  74. archive.created_at = started
  75. db_session.add(archive)
  76. await db_session.commit()
  77. await db_session.refresh(archive)
  78. return printer, archive
  79. return _create
  80. async def test_subtask_id_query_finds_matching_printing_row(self, archive_factory, db_session):
  81. """The lookup used by main.on_print_start finds a matching row even
  82. when the archive is older than the 4h name-based staleness cutoff."""
  83. printer, archive = await archive_factory(subtask_id="t-123", age_hours=10)
  84. result = await db_session.execute(
  85. select(PrintArchive)
  86. .where(PrintArchive.printer_id == printer.id)
  87. .where(PrintArchive.subtask_id == "t-123")
  88. .where(PrintArchive.status.in_(["printing", "cancelled"]))
  89. .order_by(PrintArchive.created_at.desc())
  90. .limit(1)
  91. )
  92. found = result.scalar_one_or_none()
  93. assert found is not None
  94. assert found.id == archive.id
  95. async def test_subtask_id_revives_stale_cancelled_row(self, archive_factory, db_session):
  96. """If an older Bambuddy wrongly cancelled the archive (legacy 4h path),
  97. the next print-start with the same subtask_id must revive it rather
  98. than start a third row."""
  99. printer, archive = await archive_factory(
  100. subtask_id="t-456",
  101. status="cancelled",
  102. failure_reason="Stale - print likely cancelled or failed without status update",
  103. age_hours=10,
  104. )
  105. result = await db_session.execute(
  106. select(PrintArchive)
  107. .where(PrintArchive.printer_id == printer.id)
  108. .where(PrintArchive.subtask_id == "t-456")
  109. .where(PrintArchive.status.in_(["printing", "cancelled"]))
  110. .order_by(PrintArchive.created_at.desc())
  111. .limit(1)
  112. )
  113. candidate = result.scalar_one_or_none()
  114. assert candidate is not None
  115. # Revival mirrors the main.py logic: only revive stale-cancelled rows,
  116. # not user-cancelled ones. The failure_reason prefix is the signal.
  117. is_stale_cancelled = (candidate.failure_reason or "").startswith("Stale")
  118. assert is_stale_cancelled
  119. candidate.status = "printing"
  120. candidate.failure_reason = None
  121. await db_session.commit()
  122. await db_session.refresh(candidate)
  123. assert candidate.status == "printing"
  124. # Crucially, started_at is preserved — this is the whole point of the
  125. # fix. A fresh archive would have started_at = now, losing continuity.
  126. age_after = datetime.now(timezone.utc) - candidate.started_at.replace(tzinfo=timezone.utc)
  127. assert age_after > timedelta(hours=9), "started_at must survive revival"
  128. async def test_subtask_id_null_does_not_match_other_nulls(self, archive_factory, db_session):
  129. """Two different non-cloud prints both have subtask_id=NULL. They
  130. must NOT match each other via the subtask_id lookup (which is why
  131. the handler filters by `subtask_id IS NOT NULL` in the Python layer
  132. before even running this query)."""
  133. printer, _archive = await archive_factory(subtask_id=None, age_hours=1)
  134. # This shape of query (subtask_id == None) would return rows via
  135. # SQLAlchemy's NULL handling, but the handler only runs it when
  136. # subtask_id is truthy — so the query is never issued for NULL.
  137. # Assert the guard by testing the subtask_id != "" branch.
  138. result = await db_session.execute(select(PrintArchive).where(PrintArchive.subtask_id == ""))
  139. found = result.scalar_one_or_none()
  140. assert found is None, "Empty string must not match NULL rows"
  141. async def test_completed_archive_not_resumed(self, archive_factory, db_session):
  142. """A completed archive with the same subtask_id must not be reopened
  143. as printing — that subtask's job is done; a new run is a new row."""
  144. printer, _ = await archive_factory(subtask_id="t-789", status="completed")
  145. result = await db_session.execute(
  146. select(PrintArchive)
  147. .where(PrintArchive.printer_id == printer.id)
  148. .where(PrintArchive.subtask_id == "t-789")
  149. .where(PrintArchive.status.in_(["printing", "cancelled"]))
  150. )
  151. found = result.scalar_one_or_none()
  152. assert found is None