test_background_dispatch_watchdog.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. """Regression tests for ``BackgroundDispatchService._verify_print_response``.
  2. The background-dispatch watchdog used to be fire-and-forget — it logged a
  3. warning and force-reconnected MQTT, but the dispatch job had already been
  4. marked successful. The user therefore saw "Print started successfully" while
  5. the printer never actually transitioned (#1042 follow-up). The watchdog now
  6. returns a bool so the caller can fail the dispatch job when the printer
  7. doesn't acknowledge the command, mirroring what `_watchdog_print_start` does
  8. on the queue side.
  9. Both transition signals are accepted: ``state`` advancing past ``pre_state``
  10. *or* ``subtask_id`` advancing past ``pre_subtask_id`` — H2D firmware can sit
  11. at FINISH for ~50 s after accepting ``project_file`` while echoing the new
  12. subtask_id back almost immediately (#1078).
  13. """
  14. from types import SimpleNamespace
  15. from unittest.mock import MagicMock, patch
  16. import pytest
  17. from backend.app.services.background_dispatch import BackgroundDispatchService
  18. def _status(state: str, subtask_id: str | None = None):
  19. """Minimal stand-in for PrinterState — only the two fields the watchdog reads."""
  20. return SimpleNamespace(state=state, subtask_id=subtask_id)
  21. class TestReturnsTrueOnPickup:
  22. @pytest.mark.asyncio
  23. async def test_returns_true_on_state_change(self):
  24. get_status = MagicMock(return_value=_status("RUNNING", "OLD_SUBTASK"))
  25. with patch(
  26. "backend.app.services.background_dispatch.printer_manager.get_status",
  27. get_status,
  28. ):
  29. result = await BackgroundDispatchService._verify_print_response(
  30. printer_id=42,
  31. printer_name="P1S",
  32. pre_state="FINISH",
  33. pre_subtask_id="OLD_SUBTASK",
  34. timeout=0.3,
  35. poll_interval=0.05,
  36. )
  37. assert result is True
  38. @pytest.mark.asyncio
  39. async def test_returns_true_on_subtask_id_change_even_if_state_still_finish(self):
  40. """#1078: H2D keeps state=FINISH for ~50 s after accepting project_file
  41. but flips subtask_id immediately. Must be accepted as a pickup signal."""
  42. get_status = MagicMock(return_value=_status("FINISH", "NEW_SUBTASK_12345"))
  43. with patch(
  44. "backend.app.services.background_dispatch.printer_manager.get_status",
  45. get_status,
  46. ):
  47. result = await BackgroundDispatchService._verify_print_response(
  48. printer_id=42,
  49. printer_name="H2D",
  50. pre_state="FINISH",
  51. pre_subtask_id="OLD_SUBTASK_99999",
  52. timeout=0.3,
  53. poll_interval=0.05,
  54. )
  55. assert result is True
  56. class TestReturnsFalseOnTimeout:
  57. @pytest.mark.asyncio
  58. async def test_returns_false_when_neither_state_nor_subtask_id_changes(self):
  59. """The exact #1042 scenario: P1S sits in FAILED with HMS pending,
  60. accepts the MQTT publish, never transitions. Watchdog must report
  61. failure so the caller fails the dispatch job."""
  62. get_status = MagicMock(return_value=_status("FINISH", "OLD_SUBTASK"))
  63. client = MagicMock()
  64. get_client = MagicMock(return_value=client)
  65. with (
  66. patch(
  67. "backend.app.services.background_dispatch.printer_manager.get_status",
  68. get_status,
  69. ),
  70. patch(
  71. "backend.app.services.background_dispatch.printer_manager.get_client",
  72. get_client,
  73. ),
  74. ):
  75. result = await BackgroundDispatchService._verify_print_response(
  76. printer_id=42,
  77. printer_name="P1S",
  78. pre_state="FINISH",
  79. pre_subtask_id="OLD_SUBTASK",
  80. timeout=0.2,
  81. poll_interval=0.05,
  82. )
  83. assert result is False
  84. client.force_reconnect_stale_session.assert_called_once()
  85. @pytest.mark.asyncio
  86. async def test_returns_false_when_pre_subtask_id_none_and_state_unchanged(self):
  87. """Backward-compat: callers without a captured pre_subtask_id (e.g. the
  88. printer never reported one) must still get the timeout failure path
  89. based on state alone."""
  90. get_status = MagicMock(return_value=_status("FINISH", "ANYTHING"))
  91. get_client = MagicMock(return_value=None)
  92. with (
  93. patch(
  94. "backend.app.services.background_dispatch.printer_manager.get_status",
  95. get_status,
  96. ),
  97. patch(
  98. "backend.app.services.background_dispatch.printer_manager.get_client",
  99. get_client,
  100. ),
  101. ):
  102. result = await BackgroundDispatchService._verify_print_response(
  103. printer_id=42,
  104. printer_name="P1S",
  105. pre_state="FINISH",
  106. pre_subtask_id=None,
  107. timeout=0.2,
  108. poll_interval=0.05,
  109. )
  110. assert result is False
  111. @pytest.mark.asyncio
  112. async def test_subtask_id_none_post_dispatch_does_not_count_as_change(self):
  113. """If the printer transiently reports subtask_id=None during the
  114. watchdog window (e.g. mid-reconnect), that must not be treated as
  115. "advanced past pre_subtask_id" — otherwise we'd false-pass and mark
  116. a never-started print as successful."""
  117. get_status = MagicMock(return_value=_status("FINISH", None))
  118. get_client = MagicMock(return_value=None)
  119. with (
  120. patch(
  121. "backend.app.services.background_dispatch.printer_manager.get_status",
  122. get_status,
  123. ),
  124. patch(
  125. "backend.app.services.background_dispatch.printer_manager.get_client",
  126. get_client,
  127. ),
  128. ):
  129. result = await BackgroundDispatchService._verify_print_response(
  130. printer_id=42,
  131. printer_name="P1S",
  132. pre_state="FINISH",
  133. pre_subtask_id="OLD_SUBTASK",
  134. timeout=0.2,
  135. poll_interval=0.05,
  136. )
  137. assert result is False
  138. class TestDisconnectHandling:
  139. @pytest.mark.asyncio
  140. async def test_disconnect_does_not_short_circuit_window(self):
  141. """A momentary ``get_status() is None`` (brief MQTT disconnect mid-window)
  142. must not immediately fail the dispatch — the printer may reconnect and
  143. still produce a valid transition before timeout. Falsely failing on the
  144. first missed tick is the previous bug class we're moving away from."""
  145. # First call: disconnected. Second call onward: reconnected and transitioned.
  146. get_status = MagicMock(side_effect=[None, _status("RUNNING")])
  147. with patch(
  148. "backend.app.services.background_dispatch.printer_manager.get_status",
  149. get_status,
  150. ):
  151. result = await BackgroundDispatchService._verify_print_response(
  152. printer_id=42,
  153. printer_name="P1S",
  154. pre_state="FINISH",
  155. pre_subtask_id="OLD_SUBTASK",
  156. timeout=0.3,
  157. poll_interval=0.05,
  158. )
  159. assert result is True
  160. assert get_status.call_count >= 2
  161. @pytest.mark.asyncio
  162. async def test_disconnect_for_full_window_returns_false(self):
  163. """Persistent disconnect for the full window is treated as failure.
  164. Better to false-fail and let the user retry than to false-succeed and
  165. leave them watching an idle printer (#1042)."""
  166. get_status = MagicMock(return_value=None)
  167. get_client = MagicMock(return_value=None)
  168. with (
  169. patch(
  170. "backend.app.services.background_dispatch.printer_manager.get_status",
  171. get_status,
  172. ),
  173. patch(
  174. "backend.app.services.background_dispatch.printer_manager.get_client",
  175. get_client,
  176. ),
  177. ):
  178. result = await BackgroundDispatchService._verify_print_response(
  179. printer_id=42,
  180. printer_name="P1S",
  181. pre_state="FINISH",
  182. pre_subtask_id="OLD_SUBTASK",
  183. timeout=0.2,
  184. poll_interval=0.05,
  185. )
  186. assert result is False
  187. class TestDefaults:
  188. def test_default_timeout_matches_queue_watchdog(self):
  189. """Queue and background watchdogs need the same 90 s default to give
  190. slow H2D FINISH→PREPARE transitions the same headroom on both paths."""
  191. import inspect
  192. sig = inspect.signature(BackgroundDispatchService._verify_print_response)
  193. assert sig.parameters["timeout"].default == 90.0
  194. # ---------------------------------------------------------------------------
  195. # Integration tests: the call sites in _run_reprint_archive and
  196. # _run_print_library_file must (a) await the watchdog instead of fire-and-
  197. # forget, (b) raise RuntimeError on watchdog False so _run_active_job marks
  198. # the job failed, (c) rollback the library-file flow's freshly-created
  199. # archive on timeout. Heavy mocking — the goal is to verify the new wiring,
  200. # not to re-test the dependencies.
  201. # ---------------------------------------------------------------------------
  202. from contextlib import asynccontextmanager # noqa: E402
  203. from unittest.mock import AsyncMock # noqa: E402
  204. from backend.app.services.background_dispatch import ( # noqa: E402
  205. ActiveDispatchState,
  206. PrintDispatchJob,
  207. )
  208. def _make_session_factory(db_mock):
  209. """Build an async-session factory whose context manager yields ``db_mock``.
  210. Mirrors the ``async with async_session() as db`` shape used by both
  211. ``_run_*`` methods so the test can intercept ``db.rollback`` / ``db.scalar``.
  212. """
  213. @asynccontextmanager
  214. async def _factory():
  215. yield db_mock
  216. return _factory
  217. def _printer_namespace():
  218. return SimpleNamespace(
  219. id=10,
  220. name="P1S",
  221. ip_address="1.2.3.4",
  222. access_code="abc",
  223. model="P1S",
  224. )
  225. def _make_dispatch_job(kind: str = "reprint_archive") -> PrintDispatchJob:
  226. return PrintDispatchJob(
  227. id=1,
  228. kind=kind,
  229. source_id=99,
  230. source_name="Test.gcode.3mf",
  231. printer_id=10,
  232. printer_name="P1S",
  233. options={},
  234. requested_by_user_id=None,
  235. requested_by_username=None,
  236. )
  237. @pytest.fixture
  238. def reprint_archive_mocks(tmp_path):
  239. """Mock harness for ``_run_reprint_archive`` covering every external
  240. dependency up to (and including) ``start_print``. The watchdog is left
  241. real so the caller can patch ``_verify_print_response`` per-test."""
  242. archive_file = tmp_path / "test.3mf"
  243. archive_file.write_bytes(b"fake-3mf-content")
  244. archive = SimpleNamespace(
  245. id=99,
  246. filename="Test.gcode.3mf",
  247. file_path=str(archive_file),
  248. )
  249. db = MagicMock()
  250. db.scalar = AsyncMock(return_value=_printer_namespace())
  251. db.rollback = AsyncMock()
  252. archive_service = MagicMock()
  253. archive_service.get_archive = AsyncMock(return_value=archive)
  254. return {
  255. "archive": archive,
  256. "archive_file": archive_file,
  257. "db": db,
  258. "archive_service": archive_service,
  259. "session_factory": _make_session_factory(db),
  260. }
  261. @pytest.fixture
  262. def library_file_mocks(tmp_path):
  263. """Mock harness for ``_run_print_library_file`` — separate from the
  264. reprint fixture because the library flow creates its archive via
  265. ``archive_service.archive_print(...)`` rather than fetching one."""
  266. src_file = tmp_path / "lib_src.3mf"
  267. src_file.write_bytes(b"fake-3mf-content")
  268. lib_file = SimpleNamespace(
  269. id=22,
  270. filename="cube.gcode.3mf",
  271. file_path=str(src_file.relative_to(tmp_path)),
  272. )
  273. lib_file.active = staticmethod(lambda: lib_file) # mimic LibraryFile.active() chainable
  274. new_archive = SimpleNamespace(id=500, filename="cube.gcode.3mf", file_path=str(src_file))
  275. db = MagicMock()
  276. db.scalar = AsyncMock() # configured per-test
  277. db.flush = AsyncMock()
  278. db.commit = AsyncMock()
  279. db.rollback = AsyncMock()
  280. archive_service = MagicMock()
  281. archive_service.archive_print = AsyncMock(return_value=new_archive)
  282. return {
  283. "lib_file": lib_file,
  284. "src_file": src_file,
  285. "new_archive": new_archive,
  286. "db": db,
  287. "archive_service": archive_service,
  288. "session_factory": _make_session_factory(db),
  289. }
  290. class TestReprintArchiveDispatchWiring:
  291. """Verify ``_run_reprint_archive`` (a) awaits the watchdog inline and
  292. (b) raises RuntimeError on False so the dispatch job is marked failed."""
  293. @pytest.mark.asyncio
  294. async def test_raises_runtime_error_when_watchdog_returns_false(self, reprint_archive_mocks):
  295. """The exact #1042 propagation gap: watchdog detects non-transition,
  296. _run_reprint_archive must surface it as a RuntimeError so the surrounding
  297. _run_active_job marks the job failed (instead of silently completing)."""
  298. from backend.app.services.background_dispatch import BackgroundDispatchService
  299. m = reprint_archive_mocks
  300. service = BackgroundDispatchService()
  301. job = _make_dispatch_job(kind="reprint_archive")
  302. watchdog = AsyncMock(return_value=False)
  303. with (
  304. patch("backend.app.services.background_dispatch.async_session", m["session_factory"]),
  305. patch(
  306. "backend.app.services.background_dispatch.ArchiveService",
  307. return_value=m["archive_service"],
  308. ),
  309. patch.object(BackgroundDispatchService, "_verify_print_response", watchdog),
  310. patch(
  311. "backend.app.services.background_dispatch.printer_manager.is_connected",
  312. return_value=True,
  313. ),
  314. patch(
  315. "backend.app.services.background_dispatch.printer_manager.get_status",
  316. return_value=SimpleNamespace(state="FINISH", subtask_id="OLD_SUBTASK"),
  317. ),
  318. patch(
  319. "backend.app.services.background_dispatch.printer_manager.start_print",
  320. return_value=True,
  321. ),
  322. patch(
  323. "backend.app.services.background_dispatch.delete_file_async",
  324. new_callable=AsyncMock,
  325. ),
  326. patch(
  327. "backend.app.services.background_dispatch.with_ftp_retry",
  328. new_callable=AsyncMock,
  329. return_value=True,
  330. ),
  331. patch(
  332. "backend.app.services.background_dispatch.get_ftp_retry_settings",
  333. new_callable=AsyncMock,
  334. return_value=(False, 0, 0, 30.0),
  335. ),
  336. patch(
  337. "backend.app.services.background_dispatch.upload_file_async",
  338. new_callable=AsyncMock,
  339. return_value=True,
  340. ),
  341. patch(
  342. "backend.app.services.background_dispatch.ws_manager.broadcast",
  343. new_callable=AsyncMock,
  344. ),
  345. patch("backend.app.main.register_expected_print"),
  346. pytest.raises(RuntimeError, match="did not acknowledge print command"),
  347. ):
  348. await service._run_reprint_archive(job)
  349. # Watchdog received the captured pre-state and pre_subtask_id.
  350. watchdog.assert_awaited_once()
  351. kwargs = watchdog.await_args.kwargs
  352. args = watchdog.await_args.args
  353. assert "FINISH" in args # pre_state
  354. assert kwargs["pre_subtask_id"] == "OLD_SUBTASK"
  355. @pytest.mark.asyncio
  356. async def test_succeeds_when_watchdog_returns_true(self, reprint_archive_mocks):
  357. """Happy path: watchdog confirms pickup; _run_reprint_archive returns
  358. without raising. Guards against the wiring accidentally raising on True."""
  359. from backend.app.services.background_dispatch import BackgroundDispatchService
  360. m = reprint_archive_mocks
  361. service = BackgroundDispatchService()
  362. job = _make_dispatch_job(kind="reprint_archive")
  363. with (
  364. patch("backend.app.services.background_dispatch.async_session", m["session_factory"]),
  365. patch(
  366. "backend.app.services.background_dispatch.ArchiveService",
  367. return_value=m["archive_service"],
  368. ),
  369. patch.object(
  370. BackgroundDispatchService,
  371. "_verify_print_response",
  372. AsyncMock(return_value=True),
  373. ),
  374. patch(
  375. "backend.app.services.background_dispatch.printer_manager.is_connected",
  376. return_value=True,
  377. ),
  378. patch(
  379. "backend.app.services.background_dispatch.printer_manager.get_status",
  380. return_value=SimpleNamespace(state="FINISH", subtask_id="OLD_SUBTASK"),
  381. ),
  382. patch(
  383. "backend.app.services.background_dispatch.printer_manager.start_print",
  384. return_value=True,
  385. ),
  386. patch(
  387. "backend.app.services.background_dispatch.delete_file_async",
  388. new_callable=AsyncMock,
  389. ),
  390. patch(
  391. "backend.app.services.background_dispatch.with_ftp_retry",
  392. new_callable=AsyncMock,
  393. return_value=True,
  394. ),
  395. patch(
  396. "backend.app.services.background_dispatch.get_ftp_retry_settings",
  397. new_callable=AsyncMock,
  398. return_value=(False, 0, 0, 30.0),
  399. ),
  400. patch(
  401. "backend.app.services.background_dispatch.upload_file_async",
  402. new_callable=AsyncMock,
  403. return_value=True,
  404. ),
  405. patch(
  406. "backend.app.services.background_dispatch.ws_manager.broadcast",
  407. new_callable=AsyncMock,
  408. ),
  409. patch("backend.app.main.register_expected_print"),
  410. ):
  411. await service._run_reprint_archive(job) # must not raise
  412. # Reprint flow does not touch the existing archive — no rollback expected.
  413. m["db"].rollback.assert_not_called()
  414. class TestRunActiveJobMarksFailedOnRuntimeError:
  415. """End-to-end: a watchdog-driven RuntimeError must reach
  416. `_mark_job_finished(failed=True)` via the existing ``_run_active_job``
  417. catch-all, so the dispatch UI shows a real failure (not "Done")."""
  418. @pytest.mark.asyncio
  419. async def test_runtime_error_from_process_job_marks_failed_with_message(self):
  420. from backend.app.services.background_dispatch import BackgroundDispatchService
  421. service = BackgroundDispatchService()
  422. job = _make_dispatch_job()
  423. # Place the job into _active_jobs so _set_active_message has a target.
  424. service._active_jobs[job.id] = ActiveDispatchState(job=job, message="")
  425. failure_message = (
  426. "Printer did not acknowledge print command — state still FINISH. "
  427. "Check the printer for a pending error (HMS code, plate-clear prompt, "
  428. "SD card) and try again."
  429. )
  430. with (
  431. patch.object(
  432. BackgroundDispatchService,
  433. "_process_job",
  434. AsyncMock(side_effect=RuntimeError(failure_message)),
  435. ),
  436. patch.object(
  437. BackgroundDispatchService,
  438. "_mark_job_finished",
  439. new_callable=AsyncMock,
  440. ) as mark_finished,
  441. ):
  442. await service._run_active_job(job)
  443. mark_finished.assert_awaited_once()
  444. kwargs = mark_finished.await_args.kwargs
  445. assert kwargs["failed"] is True
  446. assert "did not acknowledge print command" in kwargs["message"]