Browse Source

fix(camera): probe ffmpeg for the right RTSP socket-timeout flag (#1504)

  A previous attempt swapped `-timeout` → `-stimeout` unconditionally to
  fix EADDRINUSE on the reporter's transitional ffmpeg. That broke every
  install on a modern ffmpeg (5+/6+/7+) — current Debian/Ubuntu/Homebrew
  — where `-stimeout` was removed and `-timeout` is back to meaning
  socket I/O. Verified locally: `ffmpeg -stimeout ...` errors
  "Unrecognized option 'stimeout'" on ffmpeg 7.1.
  install on a modern ffmpeg (5+/6+/7+) — current Debian/Ubuntu/Homebrew
  — where `-stimeout` was removed and `-timeout` is back to meaning
  socket I/O. Verified locally: `ffmpeg -stimeout ...` errors
  "Unrecognized option 'stimeout'" on ffmpeg 7.1.

  ffmpeg has shipped THREE arrangements of this option over time and
  Bambuddy supports the full range:

  - Pre-deprecation (early 4.x and earlier): `-timeout` is socket I/O.
  - Transitional (~late-4.x, Jammy-era): `-timeout` is deprecated and
    repurposed to RTSP listen-mode timeout; any non-zero value implies
    `-listen`, which makes ffmpeg bind the TLS-proxy port and fail with
    EADDRINUSE. `-stimeout` is the replacement socket I/O option.
  - Modern (5.x / 6.x / 7.x): `-stimeout` REMOVED. `-timeout` is back to
    socket I/O — the original meaning.

  So no single literal is correct on all installs.

  Fix: `rtsp_socket_timeout_flag()` in services/camera.py probes
  `ffmpeg -h demuxer=rtsp` once and picks `-stimeout` when ffmpeg
  advertises it (covers transitional + older builds that kept it as an
  alias), else `-timeout` (modern + pre-deprecation). Cached at module
  level for the process lifetime — ffmpeg doesn't swap mid-run.

  The function returns the option name without a leading dash; callers
  prepend it themselves so a formatting bug can't pass an empty flag.

  Wired into both RTSP ffmpeg call sites in lockstep: routes/camera.py
  (printer camera) and services/external_camera.py (external RTSP),
  which use the same TLS-proxy + ffmpeg pattern and would hit the same
  regression on either ffmpeg cohort.

  Tests: 8 in test_ffmpeg_rtsp_timeout_flag.py — 6 probe unit tests
  (prefers stimeout when advertised, falls back to timeout on modern,
  defaults to timeout when ffmpeg missing or probe raises, caches across
  calls, trailing-space substring guard against `-listen_timeout`
  false-positives), 2 parametrised guards against either RTSP ffmpeg
  argv re-hard-coding a literal instead of consuming the probe. 37
  probe + existing external-camera tests green.
maziggy 3 days ago
parent
commit
eae96da56e

File diff suppressed because it is too large
+ 0 - 0
CHANGELOG.md


+ 6 - 2
backend/app/api/routes/camera.py

@@ -29,6 +29,7 @@ from backend.app.services.camera import (
     get_ffmpeg_path,
     is_chamber_image_model,
     read_next_chamber_frame,
+    rtsp_socket_timeout_flag,
     test_camera_connection,
 )
 from backend.app.services.camera_fanout import (
@@ -348,8 +349,11 @@ async def generate_rtsp_mjpeg_stream(
         "tcp",
         "-rtsp_flags",
         "prefer_tcp",
-        "-timeout",
-        "30000000",  # 30 seconds in microseconds
+        # Socket I/O timeout name varies by ffmpeg version (#1504); see
+        # rtsp_socket_timeout_flag(). The 30s value is microseconds for
+        # both names.
+        f"-{rtsp_socket_timeout_flag()}",
+        "30000000",
         "-buffer_size",
         "1024000",  # 1MB buffer
         "-max_delay",

+ 67 - 0
backend/app/services/camera.py

@@ -11,6 +11,7 @@ import os
 import shutil
 import ssl
 import struct
+import subprocess
 import uuid
 from datetime import datetime
 from pathlib import Path
@@ -24,6 +25,9 @@ JPEG_END = b"\xff\xd9"
 # Cache the ffmpeg path after first lookup
 _ffmpeg_path: str | None = None
 
+# Cached result of rtsp_socket_timeout_flag(); see that function for context.
+_rtsp_socket_timeout_flag: str | None = None
+
 # Track PIDs of ffmpeg processes spawned for one-shot frame capture (snapshot).
 # The cleanup task in routes/camera.py checks this set to avoid killing active captures.
 _active_capture_pids: set[int] = set()
@@ -66,6 +70,69 @@ def get_ffmpeg_path() -> str | None:
     return ffmpeg_path
 
 
+def rtsp_socket_timeout_flag() -> str:
+    """Return the ffmpeg argv flag (without the leading dash) that sets the
+    RTSP demuxer's client-side TCP socket I/O timeout, in microseconds.
+
+    ffmpeg has shipped three different option arrangements for this over
+    time, and Bambuddy supports the full range:
+
+    - **Modern ffmpeg (5.x / 6.x / 7.x)** — Debian 13, Ubuntu 24.04, current
+      Homebrew, etc. ``-timeout`` is the socket I/O timeout (microseconds);
+      ``-stimeout`` was REMOVED.
+    - **Transitional ffmpeg (~late-4.x, some 5.x builds)** — Ubuntu 22.04's
+      shipped version is one of these. ``-timeout`` was deprecated and
+      *repurposed* to mean the RTSP listen-mode incoming-connection
+      timeout — and any non-zero value implies ``-listen``, which makes
+      ffmpeg bind the localhost proxy port and fail with EADDRINUSE
+      (#1504). ``-stimeout`` was the replacement socket I/O timeout in
+      that window.
+    - **Old ffmpeg (early 4.x and earlier)** — ``-timeout`` is socket I/O
+      timeout (the original meaning, before the deprecation churn).
+
+    We probe ``-h demuxer=rtsp`` once and cache: if ``-stimeout`` is
+    advertised, prefer it (covers the transitional window and stays
+    correct on the older builds that still accept it as an alias); else
+    fall back to ``-timeout`` (correct on modern and pre-deprecation
+    ffmpeg). The result is cached for the process lifetime — ffmpeg
+    isn't going to swap mid-run.
+
+    Returns the option name without the leading dash, e.g. ``"timeout"``
+    or ``"stimeout"``. Callers must prepend ``-`` themselves so a string
+    formatting bug can't pass an empty flag.
+    """
+    global _rtsp_socket_timeout_flag
+
+    if _rtsp_socket_timeout_flag is not None:
+        return _rtsp_socket_timeout_flag
+
+    ffmpeg = get_ffmpeg_path()
+    chosen = "timeout"  # safe default for modern ffmpeg
+    if ffmpeg:
+        try:
+            result = subprocess.run(
+                [ffmpeg, "-hide_banner", "-h", "demuxer=rtsp"],
+                capture_output=True,
+                text=True,
+                timeout=5,
+                check=False,
+            )
+            help_text = (result.stdout or "") + (result.stderr or "")
+            # Help lines list each option as `-<name> ` (trailing space) — match
+            # that exact form so we don't accidentally hit a substring elsewhere.
+            if "-stimeout " in help_text:
+                chosen = "stimeout"
+        except (OSError, subprocess.SubprocessError) as exc:
+            # If probing fails, keep the modern-ffmpeg default. Worst case
+            # is the EADDRINUSE regression returns for transitional-ffmpeg
+            # users — same as before this function existed.
+            logger.warning("Could not probe ffmpeg RTSP timeout flag, defaulting to -timeout: %s", exc)
+
+    _rtsp_socket_timeout_flag = chosen
+    logger.info("RTSP socket I/O timeout flag: -%s", chosen)
+    return chosen
+
+
 def supports_rtsp(model: str | None) -> bool:
     """Check if printer model supports RTSP camera streaming.
 

+ 5 - 1
backend/app/services/external_camera.py

@@ -683,6 +683,8 @@ async def _stream_rtsp(url: str, fps: int) -> AsyncGenerator[bytes, None]:
         logger.error("ffmpeg not found - required for RTSP streaming")
         return
 
+    from backend.app.services.camera import rtsp_socket_timeout_flag
+
     # If the URL uses rtsps://, set up a TLS proxy so ffmpeg uses plain rtsp://
     proxy_server = None
     effective_url = url
@@ -715,7 +717,9 @@ async def _stream_rtsp(url: str, fps: int) -> AsyncGenerator[bytes, None]:
         "tcp",
         "-rtsp_flags",
         "prefer_tcp",
-        "-timeout",
+        # Socket I/O timeout name varies by ffmpeg version (#1504); see
+        # `rtsp_socket_timeout_flag()` in services.camera.
+        f"-{rtsp_socket_timeout_flag()}",
         "30000000",
         "-buffer_size",
         "1024000",

+ 139 - 0
backend/tests/unit/test_ffmpeg_rtsp_timeout_flag.py

@@ -0,0 +1,139 @@
+"""Regression for #1504: ffmpeg RTSP socket-I/O timeout flag.
+
+The RTSP demuxer's client-side socket I/O timeout option name varies by
+ffmpeg version (full chronology in
+`backend/app/services/camera.rtsp_socket_timeout_flag`). Hard-coding
+either ``-timeout`` or ``-stimeout`` regresses one half of the install
+base. The flag is therefore probed at runtime; this module tests that
+probe and guards against either RTSP ffmpeg argv re-hard-coding the
+wrong literal.
+"""
+
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+import backend.app.services.camera as camera_svc
+from backend.app.services.camera import rtsp_socket_timeout_flag
+
+
+@pytest.fixture(autouse=True)
+def _reset_cache():
+    """The probe caches its result in a module-level global. Reset it
+    before every test so each one sees a fresh probe."""
+    camera_svc._rtsp_socket_timeout_flag = None
+    yield
+    camera_svc._rtsp_socket_timeout_flag = None
+
+
+class TestRtspSocketTimeoutFlagProbe:
+    def test_prefers_stimeout_when_ffmpeg_advertises_it(self):
+        """Transitional ffmpeg (~late-4.x): both options are listed and
+        ``-timeout`` is the broken listen-mode option — pick ``-stimeout``."""
+        transitional_help = (
+            "  -listen_timeout    <int>  ... incoming connections ...\n"
+            "  -stimeout          <int64> ... socket TCP I/O ...\n"
+            "  -timeout           <int>  ... DEPRECATED ...\n"
+        )
+        with (
+            patch.object(camera_svc, "get_ffmpeg_path", return_value="/usr/bin/ffmpeg"),
+            patch("backend.app.services.camera.subprocess.run") as mock_run,
+        ):
+            mock_run.return_value.stdout = transitional_help
+            mock_run.return_value.stderr = ""
+            assert rtsp_socket_timeout_flag() == "stimeout"
+
+    def test_falls_back_to_timeout_on_modern_ffmpeg(self):
+        """Modern ffmpeg (5+/6+/7+): ``-stimeout`` no longer exists and
+        ``-timeout`` is back to meaning socket I/O — pick ``-timeout``."""
+        modern_help = (
+            "  -listen_timeout    <int>  ... incoming connections ...\n"
+            "  -timeout           <int64> ... socket I/O ...\n"
+            "  -reorder_queue_size <int> ... reordered packets ...\n"
+        )
+        with (
+            patch.object(camera_svc, "get_ffmpeg_path", return_value="/usr/bin/ffmpeg"),
+            patch("backend.app.services.camera.subprocess.run") as mock_run,
+        ):
+            mock_run.return_value.stdout = modern_help
+            mock_run.return_value.stderr = ""
+            assert rtsp_socket_timeout_flag() == "timeout"
+
+    def test_defaults_to_timeout_when_ffmpeg_missing(self):
+        """No ffmpeg available — return the modern default so we don't
+        wedge ffmpeg-less unit tests trying to import camera.py."""
+        with patch.object(camera_svc, "get_ffmpeg_path", return_value=None):
+            assert rtsp_socket_timeout_flag() == "timeout"
+
+    def test_defaults_to_timeout_when_probe_raises(self):
+        """If subprocess probe blows up, prefer the modern default —
+        breaking the transitional-ffmpeg case is preferable to crashing
+        every live-view start."""
+        with (
+            patch.object(camera_svc, "get_ffmpeg_path", return_value="/usr/bin/ffmpeg"),
+            patch("backend.app.services.camera.subprocess.run", side_effect=OSError("boom")),
+        ):
+            assert rtsp_socket_timeout_flag() == "timeout"
+
+    def test_result_is_cached_across_calls(self):
+        """Probing ffmpeg is a subprocess spawn; cache it for the
+        process lifetime (ffmpeg won't swap mid-run)."""
+        with (
+            patch.object(camera_svc, "get_ffmpeg_path", return_value="/usr/bin/ffmpeg"),
+            patch("backend.app.services.camera.subprocess.run") as mock_run,
+        ):
+            mock_run.return_value.stdout = "  -timeout  <int64>\n"
+            mock_run.return_value.stderr = ""
+            rtsp_socket_timeout_flag()
+            rtsp_socket_timeout_flag()
+            rtsp_socket_timeout_flag()
+            assert mock_run.call_count == 1
+
+    def test_substring_match_does_not_false_positive(self):
+        """Match the option as ``-stimeout `` (trailing space) so an
+        unrelated mention like ``-listen_timeout`` or a fragment in
+        another section doesn't trick us into picking the missing flag."""
+        only_listen_help = (
+            "  -listen_timeout    <int>  ... incoming connections ...\n"
+            "  -timeout           <int64> ... socket I/O ...\n"
+        )
+        with (
+            patch.object(camera_svc, "get_ffmpeg_path", return_value="/usr/bin/ffmpeg"),
+            patch("backend.app.services.camera.subprocess.run") as mock_run,
+        ):
+            mock_run.return_value.stdout = only_listen_help
+            mock_run.return_value.stderr = ""
+            assert rtsp_socket_timeout_flag() == "timeout"
+
+
+class TestRtspArgvUsesProbe:
+    """The two RTSP ffmpeg callers must not hard-code either flag literal —
+    they must consume the probe so version-dependent correctness is
+    preserved. Guards #1504 from being half-fixed again."""
+
+    # Anchor on this file so the assertion is CWD-independent (pytest can
+    # be invoked from the project root OR from backend/, depending on who
+    # runs it). __file__ lives at backend/tests/unit/, so the repo root
+    # is three parents up.
+    _REPO_ROOT = Path(__file__).resolve().parents[3]
+    _RTSP_FFMPEG_CALLERS = (
+        "backend/app/api/routes/camera.py",
+        "backend/app/services/external_camera.py",
+    )
+
+    @pytest.mark.parametrize("rel", _RTSP_FFMPEG_CALLERS)
+    def test_no_hard_coded_timeout_literal(self, rel):
+        """Neither RTSP ffmpeg argv may pass a hard-coded ``-timeout``
+        or ``-stimeout`` literal — both must come from the probe."""
+        src = (self._REPO_ROOT / rel).read_text()
+        assert '"-timeout"' not in src, (
+            f"{rel} hard-codes `-timeout` — this is the listen-mode option on "
+            f"transitional ffmpeg (EADDRINUSE, #1504). Use rtsp_socket_timeout_flag()."
+        )
+        assert '"-stimeout"' not in src, (
+            f"{rel} hard-codes `-stimeout` — this option was removed in ffmpeg 7. Use rtsp_socket_timeout_flag()."
+        )
+        assert "rtsp_socket_timeout_flag()" in src, (
+            f"{rel} should derive its RTSP socket timeout flag from rtsp_socket_timeout_flag() — see #1504."
+        )

Some files were not shown because too many files changed in this diff