test_scheduler_watchdog.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. """Regression tests for ``_watchdog_print_start``.
  2. The watchdog reverts queue items to ``pending`` when a dispatched print never
  3. lands on the printer (half-broken MQTT session — #887/#936/#967). H2D firmware
  4. can sit at ``FINISH`` for 50+ seconds after accepting a ``project_file``
  5. command before flipping ``gcode_state`` to ``PREPARE``, which used to trip the
  6. state-only watchdog and cause the scheduler to revert the item; the subsequent
  7. successful dispatch then looked like a reprint of the just-finished job (#1078).
  8. The fix: treat ``subtask_id`` advancing past the pre-dispatch value as an
  9. equivalent "command landed" signal, and raise the timeout from 45 s to 90 s as
  10. belt-and-braces for slow transitions that also don't emit an early subtask_id
  11. tick.
  12. """
  13. from types import SimpleNamespace
  14. from unittest.mock import MagicMock, patch
  15. import pytest
  16. from backend.app.models.print_queue import PrintQueueItem
  17. from backend.app.services.print_scheduler import PrintScheduler
  18. @pytest.fixture
  19. async def db_session():
  20. """In-memory SQLite with one ``printing`` queue item at id=1."""
  21. from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
  22. import backend.app.models # noqa: F401 — populate Base.metadata
  23. from backend.app.core.database import Base
  24. engine = create_async_engine("sqlite+aiosqlite:///:memory:", echo=False)
  25. async with engine.begin() as conn:
  26. await conn.run_sync(Base.metadata.create_all)
  27. session_maker = async_sessionmaker(engine, expire_on_commit=False)
  28. async with session_maker() as db:
  29. db.add(PrintQueueItem(id=1, printer_id=42, archive_id=99, status="printing"))
  30. await db.commit()
  31. try:
  32. yield session_maker
  33. finally:
  34. await engine.dispose()
  35. def _status(state: str, subtask_id: str | None = None):
  36. """Minimal stand-in for PrinterState — only the two fields the watchdog reads."""
  37. return SimpleNamespace(state=state, subtask_id=subtask_id)
  38. class TestWatchdogExitsEarlyOnPickup:
  39. """The watchdog must NOT revert when the printer has clearly picked up the job."""
  40. @pytest.mark.asyncio
  41. async def test_exits_on_state_change(self, db_session):
  42. """State transitioning away from pre_state is the primary "accepted" signal."""
  43. get_status = MagicMock(return_value=_status("RUNNING", "OLD_SUBTASK"))
  44. with (
  45. patch("backend.app.services.print_scheduler.printer_manager.get_status", get_status),
  46. patch("backend.app.services.print_scheduler.async_session", db_session),
  47. ):
  48. await PrintScheduler._watchdog_print_start(
  49. queue_item_id=1,
  50. printer_id=42,
  51. pre_state="FINISH",
  52. pre_subtask_id="OLD_SUBTASK",
  53. timeout=0.3,
  54. poll_interval=0.05,
  55. )
  56. # Item should remain "printing" — watchdog recognised the pickup.
  57. async with db_session() as db:
  58. item = await db.get(PrintQueueItem, 1)
  59. assert item.status == "printing"
  60. @pytest.mark.asyncio
  61. async def test_exits_on_subtask_id_change_even_if_state_still_finish(self, db_session):
  62. """Regression for #1078: H2D keeps state=FINISH for ~50 s after accepting
  63. project_file, but subtask_id flips to our new submission_id almost
  64. immediately. That must short-circuit the revert."""
  65. get_status = MagicMock(return_value=_status("FINISH", "NEW_SUBTASK_12345"))
  66. with (
  67. patch("backend.app.services.print_scheduler.printer_manager.get_status", get_status),
  68. patch("backend.app.services.print_scheduler.async_session", db_session),
  69. ):
  70. await PrintScheduler._watchdog_print_start(
  71. queue_item_id=1,
  72. printer_id=42,
  73. pre_state="FINISH",
  74. pre_subtask_id="OLD_SUBTASK_99999",
  75. timeout=0.3,
  76. poll_interval=0.05,
  77. )
  78. async with db_session() as db:
  79. item = await db.get(PrintQueueItem, 1)
  80. assert item.status == "printing", (
  81. "subtask_id advanced past pre_subtask_id — the printer accepted our "
  82. "project_file and the watchdog must not revert the queue item even "
  83. "though state is still FINISH (#1078)"
  84. )
  85. class TestWatchdogRevertsWhenStuck:
  86. """Genuine half-broken sessions still need the revert + reconnect recovery."""
  87. @pytest.mark.asyncio
  88. async def test_reverts_when_neither_state_nor_subtask_id_changes(self, db_session):
  89. """Both signals unchanged across the full timeout → revert to pending
  90. and force MQTT reconnect (the #967 recovery path)."""
  91. get_status = MagicMock(return_value=_status("FINISH", "OLD_SUBTASK"))
  92. client = MagicMock()
  93. get_client = MagicMock(return_value=client)
  94. with (
  95. patch("backend.app.services.print_scheduler.printer_manager.get_status", get_status),
  96. patch("backend.app.services.print_scheduler.printer_manager.get_client", get_client),
  97. patch("backend.app.services.print_scheduler.async_session", db_session),
  98. ):
  99. await PrintScheduler._watchdog_print_start(
  100. queue_item_id=1,
  101. printer_id=42,
  102. pre_state="FINISH",
  103. pre_subtask_id="OLD_SUBTASK",
  104. timeout=0.2,
  105. poll_interval=0.05,
  106. )
  107. async with db_session() as db:
  108. item = await db.get(PrintQueueItem, 1)
  109. assert item.status == "pending"
  110. assert item.started_at is None
  111. client.force_reconnect_stale_session.assert_called_once()
  112. @pytest.mark.asyncio
  113. async def test_default_timeout_is_90_seconds(self):
  114. """The default timeout must cover slow H2D FINISH→PREPARE transitions
  115. (~50 s observed). A 45 s default would trip on the exact scenario the
  116. subtask_id check is guarding against, leaving no fallback for printers
  117. that don't echo subtask_id."""
  118. import inspect
  119. sig = inspect.signature(PrintScheduler._watchdog_print_start)
  120. assert sig.parameters["timeout"].default == 90.0
  121. class TestWatchdogFallbackBehaviour:
  122. """Backwards-compat and defensive behaviour around missing data."""
  123. @pytest.mark.asyncio
  124. async def test_pre_subtask_id_none_falls_back_to_state_only(self, db_session):
  125. """When we never captured a pre-dispatch subtask_id (e.g. printer just
  126. connected), the watchdog must still work on the state signal alone —
  127. and still revert when state stays unchanged, so half-broken sessions
  128. are still recovered."""
  129. get_status = MagicMock(return_value=_status("FINISH", "SOMETHING"))
  130. get_client = MagicMock(return_value=None)
  131. with (
  132. patch("backend.app.services.print_scheduler.printer_manager.get_status", get_status),
  133. patch("backend.app.services.print_scheduler.printer_manager.get_client", get_client),
  134. patch("backend.app.services.print_scheduler.async_session", db_session),
  135. ):
  136. await PrintScheduler._watchdog_print_start(
  137. queue_item_id=1,
  138. printer_id=42,
  139. pre_state="FINISH",
  140. pre_subtask_id=None,
  141. timeout=0.2,
  142. poll_interval=0.05,
  143. )
  144. async with db_session() as db:
  145. item = await db.get(PrintQueueItem, 1)
  146. assert item.status == "pending"
  147. @pytest.mark.asyncio
  148. async def test_current_subtask_id_none_does_not_trigger_early_exit(self, db_session):
  149. """If the printer transiently reports subtask_id=None (e.g. during
  150. reconnect), that must not be treated as "changed" — otherwise the
  151. watchdog would exit early without a real pickup signal and leave the
  152. item stuck in "printing" after a genuinely broken session."""
  153. get_status = MagicMock(return_value=_status("FINISH", None))
  154. get_client = MagicMock(return_value=None)
  155. with (
  156. patch("backend.app.services.print_scheduler.printer_manager.get_status", get_status),
  157. patch("backend.app.services.print_scheduler.printer_manager.get_client", get_client),
  158. patch("backend.app.services.print_scheduler.async_session", db_session),
  159. ):
  160. await PrintScheduler._watchdog_print_start(
  161. queue_item_id=1,
  162. printer_id=42,
  163. pre_state="FINISH",
  164. pre_subtask_id="OLD_SUBTASK",
  165. timeout=0.2,
  166. poll_interval=0.05,
  167. )
  168. async with db_session() as db:
  169. item = await db.get(PrintQueueItem, 1)
  170. assert item.status == "pending"
  171. @pytest.mark.asyncio
  172. async def test_printer_disconnected_returns_without_reverting(self, db_session):
  173. """If the printer drops during the watchdog window, don't touch the DB —
  174. the reconnect path will sort the queue state out."""
  175. get_status = MagicMock(return_value=None)
  176. with (
  177. patch("backend.app.services.print_scheduler.printer_manager.get_status", get_status),
  178. patch("backend.app.services.print_scheduler.async_session", db_session),
  179. ):
  180. await PrintScheduler._watchdog_print_start(
  181. queue_item_id=1,
  182. printer_id=42,
  183. pre_state="FINISH",
  184. pre_subtask_id="OLD_SUBTASK",
  185. timeout=0.2,
  186. poll_interval=0.05,
  187. )
  188. async with db_session() as db:
  189. item = await db.get(PrintQueueItem, 1)
  190. assert item.status == "printing"
  191. @pytest.mark.asyncio
  192. async def test_no_revert_if_item_already_completed(self, db_session):
  193. """If the print completed between watchdog arm-time and timeout (item is
  194. no longer "printing"), the watchdog must not clobber whatever status it
  195. ended up in — #967 race guard."""
  196. # Move item on to "completed" before the watchdog fires.
  197. async with db_session() as db:
  198. item = await db.get(PrintQueueItem, 1)
  199. item.status = "completed"
  200. await db.commit()
  201. get_status = MagicMock(return_value=_status("FINISH", "OLD_SUBTASK"))
  202. get_client = MagicMock(return_value=None)
  203. with (
  204. patch("backend.app.services.print_scheduler.printer_manager.get_status", get_status),
  205. patch("backend.app.services.print_scheduler.printer_manager.get_client", get_client),
  206. patch("backend.app.services.print_scheduler.async_session", db_session),
  207. ):
  208. await PrintScheduler._watchdog_print_start(
  209. queue_item_id=1,
  210. printer_id=42,
  211. pre_state="FINISH",
  212. pre_subtask_id="OLD_SUBTASK",
  213. timeout=0.2,
  214. poll_interval=0.05,
  215. )
  216. async with db_session() as db:
  217. item = await db.get(PrintQueueItem, 1)
  218. assert item.status == "completed" # untouched