Просмотр исходного кода

fix(camera): capture ffmpeg stderr when an RTSP stream stalls (#1395)

  A P2S support bundle on 0.2.5b1 — per-model probesize fix already
  applied — showed the camera still failing: ffmpeg connects, stays alive
  30+ seconds, emits zero JPEG bytes, the 30s stdout.read times out,
  reconnect loop repeats. No ffmpeg stderr appeared anywhere in the log to
  explain why.

  The cause was a diagnostic bug, not the camera path. _read_ffmpeg_stderr
  called process.stderr.read() — read-to-EOF. A stalled-but-still-alive
  ffmpeg (the P2S RTSP failure mode) never closes stderr, so the read
  blocked until the 2s wait_for timeout and returned None, discarding the
  banner + stream-analysis lines ffmpeg had already printed. ffmpeg stderr
  was captured only when it fully exited; once the probesize bump turned
  the earlier crash into a hang, the diagnostic went dark.

  Drain stderr incrementally in bounded 8KB chunks (64KB cap), returning
  whatever ffmpeg printed so far whether or not it has exited. Also log
  the resolved per-model probesize/analyzeduration on the info-level
  "Starting RTSP camera stream" line, and log the full ffmpeg argv at
  debug level with only the credential-bearing camera URL redacted
  instead of hiding the entire command.

  No behaviour change to streaming — this makes the unresolved P2S RTSP
  stall diagnosable in the next support bundle.
maziggy 5 дней назад
Родитель
Сommit
056f06a396
3 измененных файлов с 91 добавлено и 12 удалено
  1. 0 0
      CHANGELOG.md
  2. 36 11
      backend/app/api/routes/camera.py
  3. 55 1
      backend/tests/unit/test_camera_stderr_summary.py

Разница между файлами не показана из-за своего большого размера
+ 0 - 0
CHANGELOG.md


+ 36 - 11
backend/app/api/routes/camera.py

@@ -276,20 +276,35 @@ def _summarize_ffmpeg_stderr(text: str | None) -> str:
 
 
 async def _read_ffmpeg_stderr(process: asyncio.subprocess.Process) -> str | None:
-    """Read ffmpeg stderr for diagnostics (best-effort, non-blocking).
-
-    Returns the stderr content with ffmpeg's boilerplate banner stripped,
-    so log output stays focused on the actual error.
+    """Read whatever ffmpeg has written to stderr so far (best-effort).
+
+    ffmpeg's stderr must be drained *incrementally*. A stalled-but-still-alive
+    ffmpeg — the typical P2S RTSP failure, where it connects but never produces
+    a frame — never closes stderr, so a plain ``stderr.read()`` (read-to-EOF)
+    blocks until the wait_for timeout and returns nothing, discarding the
+    banner + stream-analysis lines ffmpeg already printed. Reading in bounded
+    chunks returns the buffered output promptly whether or not ffmpeg has
+    exited. Returns the content with ffmpeg's boilerplate banner stripped.
     """
     if not process or not process.stderr:
         return None
+    chunks: list[bytes] = []
+    total = 0
+    cap = 65536
     try:
-        data = await asyncio.wait_for(process.stderr.read(), timeout=2.0)
-        if not data:
-            return None
-        return _summarize_ffmpeg_stderr(data.decode(errors="replace")) or None
-    except (TimeoutError, Exception):
+        while total < cap:
+            chunk = await asyncio.wait_for(process.stderr.read(8192), timeout=2.0)
+            if not chunk:
+                break  # EOF — ffmpeg has exited
+            chunks.append(chunk)
+            total += len(chunk)
+    except Exception:
+        # Timed out waiting for more data — ffmpeg is alive but quiet now.
+        # Fall through and return whatever it already printed.
+        pass
+    if not chunks:
         return None
+    return _summarize_ffmpeg_stderr(b"".join(chunks).decode(errors="replace")) or None
 
 
 async def generate_rtsp_mjpeg_stream(
@@ -365,9 +380,19 @@ async def generate_rtsp_mjpeg_stream(
         _disconnect_events[stream_id] = disconnect_event
 
     logger.info(
-        "Starting RTSP camera stream for %s (stream_id=%s, model=%s, fps=%s)", ip_address, stream_id, model, fps
+        "Starting RTSP camera stream for %s (stream_id=%s, model=%s, fps=%s, probesize=%s, analyzeduration=%s)",
+        ip_address,
+        stream_id,
+        model,
+        fps,
+        profile.probesize,
+        profile.analyzeduration,
     )
-    logger.debug("ffmpeg command: %s ... (url hidden)", ffmpeg)
+    # Log the full argv so a support bundle shows the actual ffmpeg flags
+    # (probesize, analyzeduration, transport, ...). Only camera_url carries a
+    # secret (the access code), so redact just that one element.
+    _redacted_cmd = ["rtsp://<redacted>/streaming/live/1" if a == camera_url else a for a in cmd]
+    logger.debug("ffmpeg command: %s", " ".join(_redacted_cmd))
 
     # On Windows, spawn ffmpeg in its own process group so that
     # terminate() doesn't broadcast CTRL_C_EVENT to uvicorn (#605).

+ 55 - 1
backend/tests/unit/test_camera_stderr_summary.py

@@ -7,7 +7,9 @@ single click produced 555 lines across 30 retries. The helper strips the
 banner so logs stay focused on the real error.
 """
 
-from backend.app.api.routes.camera import _summarize_ffmpeg_stderr
+import asyncio
+
+from backend.app.api.routes.camera import _read_ffmpeg_stderr, _summarize_ffmpeg_stderr
 
 _FAKE_BANNER = """ffmpeg version 7.1.3-0+deb13u1 Copyright (c) 2000-2025 the FFmpeg developers
   built with gcc 14 (Debian 14.2.0-19)
@@ -66,3 +68,55 @@ def test_drops_blank_lines():
 def test_banner_only_returns_empty():
     """If ffmpeg prints only the banner (no errors), the summary should be empty."""
     assert _summarize_ffmpeg_stderr(_FAKE_BANNER) == ""
+
+
+# --- _read_ffmpeg_stderr (#1395) -------------------------------------------
+# A stalled-but-alive ffmpeg (the P2S RTSP failure) never closes stderr, so a
+# read-to-EOF discarded everything it had already printed. _read_ffmpeg_stderr
+# now drains incrementally and must return that buffered output.
+
+
+class _FakeProcess:
+    """Minimal stand-in for asyncio.subprocess.Process — only .stderr is read."""
+
+    def __init__(self, stderr):
+        self.stderr = stderr
+
+
+def _reader_with(data: bytes, *, eof: bool) -> asyncio.StreamReader:
+    reader = asyncio.StreamReader()
+    if data:
+        reader.feed_data(data)
+    if eof:
+        reader.feed_eof()
+    return reader
+
+
+async def test_read_stderr_captures_output_from_a_running_ffmpeg():
+    """The #1395 regression: ffmpeg is alive and has NOT closed stderr (no EOF).
+    The output it already printed must still be returned, not discarded while
+    waiting for an EOF that never arrives."""
+    stderr = _FAKE_BANNER + "[rtsp @ 0x5] Could not find codec parameters\n"
+    proc = _FakeProcess(_reader_with(stderr.encode(), eof=False))
+    result = await _read_ffmpeg_stderr(proc)
+    assert result is not None
+    assert "Could not find codec parameters" in result
+    assert "ffmpeg version" not in result  # banner still stripped
+
+
+async def test_read_stderr_captures_output_from_an_exited_ffmpeg():
+    stderr = _FAKE_BANNER + "Error opening input: Connection refused\n"
+    proc = _FakeProcess(_reader_with(stderr.encode(), eof=True))
+    result = await _read_ffmpeg_stderr(proc)
+    assert result is not None
+    assert "Connection refused" in result
+
+
+async def test_read_stderr_returns_none_when_no_stderr_pipe():
+    assert await _read_ffmpeg_stderr(_FakeProcess(None)) is None
+
+
+async def test_read_stderr_returns_none_for_banner_only_output():
+    """Banner with no actionable lines summarizes to empty -> None."""
+    proc = _FakeProcess(_reader_with(_FAKE_BANNER.encode(), eof=True))
+    assert await _read_ffmpeg_stderr(proc) is None

Некоторые файлы не были показаны из-за большого количества измененных файлов