Parcourir la source

fix(camera): capture ffmpeg stderr when an RTSP stream stalls (#1395)

  A P2S support bundle on 0.2.5b1 — per-model probesize fix already
  applied — showed the camera still failing: ffmpeg connects, stays alive
  30+ seconds, emits zero JPEG bytes, the 30s stdout.read times out,
  reconnect loop repeats. No ffmpeg stderr appeared anywhere in the log to
  explain why.

  The cause was a diagnostic bug, not the camera path. _read_ffmpeg_stderr
  called process.stderr.read() — read-to-EOF. A stalled-but-still-alive
  ffmpeg (the P2S RTSP failure mode) never closes stderr, so the read
  blocked until the 2s wait_for timeout and returned None, discarding the
  banner + stream-analysis lines ffmpeg had already printed. ffmpeg stderr
  was captured only when it fully exited; once the probesize bump turned
  the earlier crash into a hang, the diagnostic went dark.

  Drain stderr incrementally in bounded 8KB chunks (64KB cap), returning
  whatever ffmpeg printed so far whether or not it has exited. Also log
  the resolved per-model probesize/analyzeduration on the info-level
  "Starting RTSP camera stream" line, and log the full ffmpeg argv at
  debug level with only the credential-bearing camera URL redacted
  instead of hiding the entire command.

  No behaviour change to streaming — this makes the unresolved P2S RTSP
  stall diagnosable in the next support bundle.
maziggy il y a 5 jours
Parent
commit
056f06a396
3 fichiers modifiés avec 91 ajouts et 12 suppressions
  1. 0 0
      CHANGELOG.md
  2. 36 11
      backend/app/api/routes/camera.py
  3. 55 1
      backend/tests/unit/test_camera_stderr_summary.py

Fichier diff supprimé car celui-ci est trop grand
+ 0 - 0
CHANGELOG.md


+ 36 - 11
backend/app/api/routes/camera.py

@@ -276,20 +276,35 @@ def _summarize_ffmpeg_stderr(text: str | None) -> str:
 
 
 
 
 async def _read_ffmpeg_stderr(process: asyncio.subprocess.Process) -> str | None:
 async def _read_ffmpeg_stderr(process: asyncio.subprocess.Process) -> str | None:
-    """Read ffmpeg stderr for diagnostics (best-effort, non-blocking).
-
-    Returns the stderr content with ffmpeg's boilerplate banner stripped,
-    so log output stays focused on the actual error.
+    """Read whatever ffmpeg has written to stderr so far (best-effort).
+
+    ffmpeg's stderr must be drained *incrementally*. A stalled-but-still-alive
+    ffmpeg — the typical P2S RTSP failure, where it connects but never produces
+    a frame — never closes stderr, so a plain ``stderr.read()`` (read-to-EOF)
+    blocks until the wait_for timeout and returns nothing, discarding the
+    banner + stream-analysis lines ffmpeg already printed. Reading in bounded
+    chunks returns the buffered output promptly whether or not ffmpeg has
+    exited. Returns the content with ffmpeg's boilerplate banner stripped.
     """
     """
     if not process or not process.stderr:
     if not process or not process.stderr:
         return None
         return None
+    chunks: list[bytes] = []
+    total = 0
+    cap = 65536
     try:
     try:
-        data = await asyncio.wait_for(process.stderr.read(), timeout=2.0)
-        if not data:
-            return None
-        return _summarize_ffmpeg_stderr(data.decode(errors="replace")) or None
-    except (TimeoutError, Exception):
+        while total < cap:
+            chunk = await asyncio.wait_for(process.stderr.read(8192), timeout=2.0)
+            if not chunk:
+                break  # EOF — ffmpeg has exited
+            chunks.append(chunk)
+            total += len(chunk)
+    except Exception:
+        # Timed out waiting for more data — ffmpeg is alive but quiet now.
+        # Fall through and return whatever it already printed.
+        pass
+    if not chunks:
         return None
         return None
+    return _summarize_ffmpeg_stderr(b"".join(chunks).decode(errors="replace")) or None
 
 
 
 
 async def generate_rtsp_mjpeg_stream(
 async def generate_rtsp_mjpeg_stream(
@@ -365,9 +380,19 @@ async def generate_rtsp_mjpeg_stream(
         _disconnect_events[stream_id] = disconnect_event
         _disconnect_events[stream_id] = disconnect_event
 
 
     logger.info(
     logger.info(
-        "Starting RTSP camera stream for %s (stream_id=%s, model=%s, fps=%s)", ip_address, stream_id, model, fps
+        "Starting RTSP camera stream for %s (stream_id=%s, model=%s, fps=%s, probesize=%s, analyzeduration=%s)",
+        ip_address,
+        stream_id,
+        model,
+        fps,
+        profile.probesize,
+        profile.analyzeduration,
     )
     )
-    logger.debug("ffmpeg command: %s ... (url hidden)", ffmpeg)
+    # Log the full argv so a support bundle shows the actual ffmpeg flags
+    # (probesize, analyzeduration, transport, ...). Only camera_url carries a
+    # secret (the access code), so redact just that one element.
+    _redacted_cmd = ["rtsp://<redacted>/streaming/live/1" if a == camera_url else a for a in cmd]
+    logger.debug("ffmpeg command: %s", " ".join(_redacted_cmd))
 
 
     # On Windows, spawn ffmpeg in its own process group so that
     # On Windows, spawn ffmpeg in its own process group so that
     # terminate() doesn't broadcast CTRL_C_EVENT to uvicorn (#605).
     # terminate() doesn't broadcast CTRL_C_EVENT to uvicorn (#605).

+ 55 - 1
backend/tests/unit/test_camera_stderr_summary.py

@@ -7,7 +7,9 @@ single click produced 555 lines across 30 retries. The helper strips the
 banner so logs stay focused on the real error.
 banner so logs stay focused on the real error.
 """
 """
 
 
-from backend.app.api.routes.camera import _summarize_ffmpeg_stderr
+import asyncio
+
+from backend.app.api.routes.camera import _read_ffmpeg_stderr, _summarize_ffmpeg_stderr
 
 
 _FAKE_BANNER = """ffmpeg version 7.1.3-0+deb13u1 Copyright (c) 2000-2025 the FFmpeg developers
 _FAKE_BANNER = """ffmpeg version 7.1.3-0+deb13u1 Copyright (c) 2000-2025 the FFmpeg developers
   built with gcc 14 (Debian 14.2.0-19)
   built with gcc 14 (Debian 14.2.0-19)
@@ -66,3 +68,55 @@ def test_drops_blank_lines():
 def test_banner_only_returns_empty():
 def test_banner_only_returns_empty():
     """If ffmpeg prints only the banner (no errors), the summary should be empty."""
     """If ffmpeg prints only the banner (no errors), the summary should be empty."""
     assert _summarize_ffmpeg_stderr(_FAKE_BANNER) == ""
     assert _summarize_ffmpeg_stderr(_FAKE_BANNER) == ""
+
+
+# --- _read_ffmpeg_stderr (#1395) -------------------------------------------
+# A stalled-but-alive ffmpeg (the P2S RTSP failure) never closes stderr, so a
+# read-to-EOF discarded everything it had already printed. _read_ffmpeg_stderr
+# now drains incrementally and must return that buffered output.
+
+
+class _FakeProcess:
+    """Minimal stand-in for asyncio.subprocess.Process — only .stderr is read."""
+
+    def __init__(self, stderr):
+        self.stderr = stderr
+
+
+def _reader_with(data: bytes, *, eof: bool) -> asyncio.StreamReader:
+    reader = asyncio.StreamReader()
+    if data:
+        reader.feed_data(data)
+    if eof:
+        reader.feed_eof()
+    return reader
+
+
+async def test_read_stderr_captures_output_from_a_running_ffmpeg():
+    """The #1395 regression: ffmpeg is alive and has NOT closed stderr (no EOF).
+    The output it already printed must still be returned, not discarded while
+    waiting for an EOF that never arrives."""
+    stderr = _FAKE_BANNER + "[rtsp @ 0x5] Could not find codec parameters\n"
+    proc = _FakeProcess(_reader_with(stderr.encode(), eof=False))
+    result = await _read_ffmpeg_stderr(proc)
+    assert result is not None
+    assert "Could not find codec parameters" in result
+    assert "ffmpeg version" not in result  # banner still stripped
+
+
+async def test_read_stderr_captures_output_from_an_exited_ffmpeg():
+    stderr = _FAKE_BANNER + "Error opening input: Connection refused\n"
+    proc = _FakeProcess(_reader_with(stderr.encode(), eof=True))
+    result = await _read_ffmpeg_stderr(proc)
+    assert result is not None
+    assert "Connection refused" in result
+
+
+async def test_read_stderr_returns_none_when_no_stderr_pipe():
+    assert await _read_ffmpeg_stderr(_FakeProcess(None)) is None
+
+
+async def test_read_stderr_returns_none_for_banner_only_output():
+    """Banner with no actionable lines summarizes to empty -> None."""
+    proc = _FakeProcess(_reader_with(_FAKE_BANNER.encode(), eof=True))
+    assert await _read_ffmpeg_stderr(proc) is None

Certains fichiers n'ont pas été affichés car il y a eu trop de fichiers modifiés dans ce diff