Browse Source

fix(#1113): silence Windows asyncio Proactor cleanup-RST noise

  bambuddy.log on Windows fills with

    Exception in callback _ProactorBasePipeTransport._call_connection_lost()
    ConnectionResetError: [WinError 10054] An existing connection was
    forcibly closed by the remote host

  every time a printer / MQTT broker / camera RSTs a TCP socket instead
  of FINing it. The application-layer reconnect (paho-mqtt, httpx)
  handles the actual disconnect fine; the traceback is asyncio
  bookkeeping. Reported by @cadtoolbox who runs 9 printers including 5
  offline X1Es, so the log filled multiple times per minute.

  New backend/app/core/asyncio_handlers.py installs a custom
  loop.set_exception_handler on Windows that pattern-matches three
  signals together (platform == win32, exception is
  ConnectionResetError, asyncio message contains
  _call_connection_lost) and demotes the entry to DEBUG. Genuine
  ConnectionResetErrors raised inside application coroutines have a
  different message string and still surface; BrokenPipeError /
  ConnectionAbortedError on the same cleanup path also still surface.

  Wired from lifespan startup before any task can spawn that might
  trip it. Linux / macOS use the Selector loop, so install is an
  explicit no-op there with a False return.

  9 unit tests in test_asyncio_handlers.py covering signature match,
  rejection of unrelated resets, platform gate, suppress vs.
  pass-through to default handler.
maziggy 1 month ago
parent
commit
56800589ff

File diff suppressed because it is too large
+ 0 - 0
CHANGELOG.md


+ 73 - 0
backend/app/core/asyncio_handlers.py

@@ -0,0 +1,73 @@
+"""Asyncio event-loop exception handlers used at app startup.
+
+Currently houses a single Windows-specific filter for the noisy
+``_ProactorBasePipeTransport._call_connection_lost`` ``WinError 10054``
+that fires every time a printer / MQTT broker / camera RSTs a TCP socket
+instead of closing it cleanly. See ``install_proactor_reset_filter`` for
+the why and the failure mode it suppresses.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import sys
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+
+def _is_proactor_connection_reset(context: dict[str, Any]) -> bool:
+    """True if `context` describes the Windows Proactor cleanup-RST noise.
+
+    asyncio's default exception handler is invoked in two distinct cases
+    we care about — generic uncaught task exceptions, and the specific
+    `_call_connection_lost` cleanup path — and we only want to suppress
+    the latter. Match on three signals together so a real
+    `ConnectionResetError` raised inside an application task still
+    surfaces normally:
+
+      1. The exception is `ConnectionResetError` (or a subclass).
+      2. asyncio's own message string mentions `_call_connection_lost`
+         (the Proactor-cleanup callback is the only place Python emits
+         this exact phrase).
+      3. We're actually on Windows, where the Proactor is in use.
+    """
+    if sys.platform != "win32":
+        return False
+    exc = context.get("exception")
+    if not isinstance(exc, ConnectionResetError):
+        return False
+    message = context.get("message", "")
+    return "_call_connection_lost" in message
+
+
+def _proactor_reset_filter(loop: asyncio.AbstractEventLoop, context: dict[str, Any]) -> None:
+    """Custom event-loop exception handler.
+
+    Handles the Proactor-cleanup `ConnectionResetError` by logging it at
+    DEBUG instead of ERROR, and delegates everything else to asyncio's
+    default handler so unrelated bugs are still visible.
+    """
+    if _is_proactor_connection_reset(context):
+        logger.debug(
+            "asyncio Proactor: peer reset socket during cleanup (WinError 10054); "
+            "ignored — application-layer reconnect handles the disconnect"
+        )
+        return
+    loop.default_exception_handler(context)
+
+
+def install_proactor_reset_filter(loop: asyncio.AbstractEventLoop | None = None) -> bool:
+    """Install the filter on `loop` (or the running loop if omitted).
+
+    Returns True when the filter was installed (Windows only), False on
+    every other platform — so callers can branch on the return value if
+    they want to log the install / skip.
+    """
+    if sys.platform != "win32":
+        return False
+    if loop is None:
+        loop = asyncio.get_running_loop()
+    loop.set_exception_handler(_proactor_reset_filter)
+    return True

+ 6 - 0
backend/app/main.py

@@ -4160,6 +4160,12 @@ def stop_auth_cleanup() -> None:
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     # Startup
+    # Install Windows-only asyncio Proactor cleanup-RST filter (#1113) before
+    # anything else can spawn tasks that might trip it.
+    from backend.app.core.asyncio_handlers import install_proactor_reset_filter
+
+    install_proactor_reset_filter()
+
     await init_db()
 
     # Register an app-scoped httpx client for Bambu Cloud services so

+ 127 - 0
backend/tests/unit/test_asyncio_handlers.py

@@ -0,0 +1,127 @@
+"""Tests for the Windows asyncio Proactor cleanup-RST filter (#1113)."""
+
+from __future__ import annotations
+
+import asyncio
+from unittest.mock import patch
+
+import pytest
+
+from backend.app.core.asyncio_handlers import (
+    _is_proactor_connection_reset,
+    _proactor_reset_filter,
+    install_proactor_reset_filter,
+)
+
+
+# `_is_proactor_connection_reset` short-circuits on non-Windows; pretend we're
+# on Windows for the discrimination tests so they exercise the actual logic.
+@pytest.fixture
+def fake_windows():
+    with patch("backend.app.core.asyncio_handlers.sys.platform", "win32"):
+        yield
+
+
+class TestIsProactorConnectionReset:
+    """The discriminator that decides whether a context is the noise we silence."""
+
+    def test_matches_proactor_cleanup_reset(self, fake_windows):
+        ctx = {
+            "exception": ConnectionResetError(10054, "An existing connection was forcibly closed"),
+            "message": "Exception in callback _ProactorBasePipeTransport._call_connection_lost()",
+        }
+        assert _is_proactor_connection_reset(ctx) is True
+
+    def test_rejects_when_not_on_windows(self):
+        # No `fake_windows` fixture — sys.platform reflects the real OS.
+        ctx = {
+            "exception": ConnectionResetError(10054, "irrelevant"),
+            "message": "Exception in callback _ProactorBasePipeTransport._call_connection_lost()",
+        }
+        # The whole point of the filter is to be a Windows-only no-op.
+        with patch("backend.app.core.asyncio_handlers.sys.platform", "linux"):
+            assert _is_proactor_connection_reset(ctx) is False
+
+    def test_rejects_unrelated_connection_reset(self, fake_windows):
+        """A real `ConnectionResetError` raised inside an app coroutine —
+        not from the Proactor cleanup path — must NOT be suppressed.
+        Otherwise we'd hide genuine connectivity bugs."""
+        ctx = {
+            "exception": ConnectionResetError(),
+            "message": "Task exception was never retrieved",
+        }
+        assert _is_proactor_connection_reset(ctx) is False
+
+    def test_rejects_other_exception_types(self, fake_windows):
+        """Other OSErrors (BrokenPipeError, ConnectionAbortedError) might
+        share the cleanup path but they're a different signal worth
+        keeping visible — we only silence the specific 10054 family."""
+        ctx = {
+            "exception": BrokenPipeError(),
+            "message": "Exception in callback _ProactorBasePipeTransport._call_connection_lost()",
+        }
+        assert _is_proactor_connection_reset(ctx) is False
+
+    def test_rejects_when_no_exception(self, fake_windows):
+        """asyncio sometimes invokes the handler with no exception object
+        (e.g. resource warnings) — those shouldn't blanket-match."""
+        ctx = {"message": "_call_connection_lost was slow"}
+        assert _is_proactor_connection_reset(ctx) is False
+
+
+class TestProactorResetFilter:
+    """The handler glue itself — does it suppress the right ones and
+    pass everything else through to the default handler?"""
+
+    @pytest.mark.asyncio
+    async def test_suppresses_proactor_reset(self, fake_windows):
+        loop = asyncio.get_running_loop()
+        with patch.object(loop, "default_exception_handler") as default:
+            _proactor_reset_filter(
+                loop,
+                {
+                    "exception": ConnectionResetError(10054, "forcibly closed"),
+                    "message": "Exception in callback _ProactorBasePipeTransport._call_connection_lost()",
+                },
+            )
+        # Suppression = default handler is never reached.
+        default.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_passes_unrelated_through_to_default(self, fake_windows):
+        """A different uncaught exception must go through asyncio's normal
+        path so it surfaces in logs and tests as an actual problem."""
+        loop = asyncio.get_running_loop()
+        ctx = {
+            "exception": ValueError("real bug"),
+            "message": "Task exception was never retrieved",
+        }
+        with patch.object(loop, "default_exception_handler") as default:
+            _proactor_reset_filter(loop, ctx)
+        default.assert_called_once_with(ctx)
+
+
+class TestInstallation:
+    """Wiring: install_proactor_reset_filter only runs on Windows."""
+
+    @pytest.mark.asyncio
+    async def test_install_is_no_op_on_non_windows(self):
+        """Linux/macOS use the Selector loop, which doesn't hit this code
+        path — the install must be inert so the Linux production path
+        keeps the default exception handler untouched."""
+        loop = asyncio.get_running_loop()
+        with (
+            patch("backend.app.core.asyncio_handlers.sys.platform", "linux"),
+            patch.object(loop, "set_exception_handler") as setter,
+        ):
+            installed = install_proactor_reset_filter(loop)
+        assert installed is False
+        setter.assert_not_called()
+
+    @pytest.mark.asyncio
+    async def test_install_attaches_handler_on_windows(self, fake_windows):
+        loop = asyncio.get_running_loop()
+        with patch.object(loop, "set_exception_handler") as setter:
+            installed = install_proactor_reset_filter(loop)
+        assert installed is True
+        setter.assert_called_once_with(_proactor_reset_filter)

Some files were not shown because too many files changed in this diff