Explorar o código

fix(archives): scan_timelapse picked stale video at false offset (#1278)

  scan_timelapse's Strategy 2 matched filename timestamps against both
  archive.started_at and archive.completed_at across seven hypothesised tz
  offsets. The filename is always print-START time, so the end-time branch
  was a semantic mistake — and the dense offset set [0, +-1, +-7, +-8]
  let an unrelated video coincidentally land within minutes of any later
  archive at some offset.

  Extract Strategy 2 into _match_timelapse_by_timestamp(): compare only
  against start time, and refuse to auto-pick when the next-best different
  video is within a 15-minute ambiguity margin. The route then returns
  available_files and the frontend's existing manual-selection dialog
  takes over — which is the fallback the reporter explicitly asked for.

  Surfaces in LAN-Only mode where the printer can't reach NTP and its
  clock drifts (e.g. P2S filenames in CST while server is in UTC, the
  8h offset that exposed this bug).
maziggy hai 2 semanas
pai
achega
4ee4bdb0d3
Modificáronse 3 ficheiros con 256 adicións e 60 borrados
  1. 0 0
      CHANGELOG.md
  2. 79 60
      backend/app/api/routes/archives.py
  3. 177 0
      backend/tests/unit/test_timelapse_match.py

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 0 - 0
CHANGELOG.md


+ 79 - 60
backend/app/api/routes/archives.py

@@ -1,9 +1,10 @@
 import io
 import json
 import logging
+import re as _re
 import zipfile
 from collections import defaultdict
-from datetime import date, datetime, time, timezone
+from datetime import date, datetime, time, timedelta, timezone
 from decimal import ROUND_HALF_UP, Decimal
 from pathlib import Path
 
@@ -48,6 +49,74 @@ def _safe_filename(filename: str) -> str:
     return Path(filename.replace("\\", "/")).name
 
 
+_TIMELAPSE_FILENAME_TS_RE = _re.compile(r"(\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2})")
+_DEFAULT_TIMELAPSE_OFFSETS_HOURS: tuple[int, ...] = (0, 8, -8, 7, -7, 1, -1)
+_DEFAULT_TIMELAPSE_TOLERANCE = timedelta(hours=4)
+_DEFAULT_TIMELAPSE_AMBIGUITY_MARGIN = timedelta(minutes=15)
+
+
+def _match_timelapse_by_timestamp(
+    video_files: list[dict],
+    archive_start: datetime | None,
+    *,
+    tolerance: timedelta = _DEFAULT_TIMELAPSE_TOLERANCE,
+    ambiguity_margin: timedelta = _DEFAULT_TIMELAPSE_AMBIGUITY_MARGIN,
+    offsets_hours: tuple[int, ...] = _DEFAULT_TIMELAPSE_OFFSETS_HOURS,
+) -> tuple[dict | None, timedelta | None]:
+    """Pick the timelapse whose filename timestamp best matches the print start time.
+
+    Bambu timelapse filenames embed the printer-local START time (e.g.
+    "video_2026-05-08_09-41-29.mp4"). The printer's clock may be offset from the
+    server's — especially in LAN-Only mode where NTP is unreachable — so we try a
+    small set of common UTC offsets and keep the (video, offset) pair with the
+    smallest absolute distance from archive_start. We deliberately do NOT consider
+    archive_end here: the filename is start time, not end time, so comparing it to
+    completion is not a real signal (Strategy 3 handles end via file mtime).
+
+    Because the offset list densely covers a wide span, an unrelated video's
+    filename can coincidentally land near a later print's start at some offset.
+    To avoid that false positive, we require the best (video, offset) pair to
+    beat the next-best pair *from a different video* by at least `ambiguity_margin`.
+    When the top two candidates from different videos are too close to call,
+    we return None and let the caller fall back to manual selection.
+    """
+    if archive_start is None:
+        return None, None
+
+    # (diff, video) for every (video, offset) pair within tolerance.
+    candidates: list[tuple[timedelta, dict]] = []
+
+    for f in video_files:
+        fname = f.get("name", "")
+        m = _TIMELAPSE_FILENAME_TS_RE.search(fname)
+        if not m:
+            continue
+        try:
+            file_time = datetime.strptime(m.group(1), "%Y-%m-%d_%H-%M-%S")
+        except ValueError:
+            continue
+
+        for hour_offset in offsets_hours:
+            adjusted = file_time - timedelta(hours=hour_offset)
+            diff = abs(adjusted - archive_start)
+            if diff <= tolerance:
+                candidates.append((diff, f))
+
+    if not candidates:
+        return None, None
+
+    candidates.sort(key=lambda c: c[0])
+    best_diff, best_video = candidates[0]
+    best_name = best_video.get("name")
+
+    for diff, video in candidates[1:]:
+        if video.get("name") != best_name and (diff - best_diff) < ambiguity_margin:
+            # Another video matches almost as well — refuse to auto-pick.
+            return None, None
+
+    return best_video, best_diff
+
+
 def _validate_user_filter_permission(current_user: User | None, created_by_id: int | None):
     """Raise 403 if created_by_id filter is used without stats:filter_by_user permission."""
     if created_by_id is None or current_user is None:
@@ -1721,65 +1790,15 @@ async def scan_timelapse(
             matching_file = f
             break
 
-    # Strategy 2: Match by timestamp proximity
-    # Bambu timelapse filename uses the print START time (when recording began)
-    if not matching_file and (archive.started_at or archive.completed_at or archive.created_at):
-        import re
-        from datetime import datetime, timedelta
-
-        # Prefer started_at since video filename is the print start time
-        # Fall back to completed_at or created_at if started_at is not available
-        archive_start = archive.started_at
-        archive_end = archive.completed_at or archive.created_at
-        best_match = None
-        best_diff = timedelta(hours=24)  # Max 24 hour difference
-
-        for f in video_files:
-            fname = f.get("name", "")
-            # Parse timestamp from filename like "video_2025-11-24_03-17-40.mp4"
-            match = re.search(r"(\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2})", fname)
-            if match:
-                try:
-                    file_time = datetime.strptime(match.group(1), "%Y-%m-%d_%H-%M-%S")
-
-                    # Try multiple timezone offsets since printer timezone can vary
-                    # Common cases: local time (0), CST/UTC+8 (+8), or UTC (-local offset)
-                    for hour_offset in [0, 8, -8, 7, -7, 1, -1]:
-                        adjusted_file_time = file_time - timedelta(hours=hour_offset)
-
-                        # Check against start time (video filename = print start)
-                        if archive_start:
-                            diff = abs(adjusted_file_time - archive_start)
-                            if diff < best_diff:
-                                best_diff = diff
-                                best_match = f
-                                logger.debug(
-                                    f"Timelapse match candidate: {fname} with offset {hour_offset}h, "
-                                    f"diff from start: {diff}"
-                                )
-
-                        # Also check against end time with a buffer
-                        # (video timestamp should be BEFORE completion time)
-                        if archive_end:
-                            # The video timestamp should be within the print duration before completion
-                            if adjusted_file_time < archive_end:
-                                diff = archive_end - adjusted_file_time
-                                # Reasonable print duration: up to 48 hours
-                                if diff < timedelta(hours=48) and diff < best_diff:
-                                    best_diff = diff
-                                    best_match = f
-                                    logger.debug(
-                                        f"Timelapse match candidate (from end): {fname} with offset {hour_offset}h, "
-                                        f"diff: {diff}"
-                                    )
-
-                except ValueError:
-                    continue
-
-        # Accept match within 4 hours (more lenient for timezone issues)
-        if best_match and best_diff < timedelta(hours=4):
-            matching_file = best_match
-            logger.info("Matched timelapse by timestamp: %s (diff: %s)", best_match.get("name"), best_diff)
+    # Strategy 2: Match by timestamp proximity against print START time.
+    # Bambu timelapse filename embeds the print start time in printer-local clock.
+    # See _match_timelapse_by_timestamp for the offset-search rationale and why we
+    # intentionally don't try to match filename against end time here.
+    if not matching_file and archive.started_at:
+        candidate, diff = _match_timelapse_by_timestamp(video_files, archive.started_at)
+        if candidate is not None:
+            matching_file = candidate
+            logger.info("Matched timelapse by timestamp: %s (diff: %s)", candidate.get("name"), diff)
 
     # Strategy 3: Use file modification time from FTP listing
     # This handles cases where printer's filename timestamp is wrong but file mtime is correct

+ 177 - 0
backend/tests/unit/test_timelapse_match.py

@@ -0,0 +1,177 @@
+"""Unit tests for the timelapse-by-timestamp matcher used by /archives/scan.
+
+Regression coverage for #1278: when the printer cannot reach NTP (LAN-Only mode),
+its clock is offset from the server's, and an older video's filename can land just
+before a later print's completion. The previous matcher:
+
+1. Treated the filename as either start- or end-time evidence — semantically wrong
+   for a filename that's always print-start.
+2. Probed a dense set of timezone offsets, so an unrelated video could
+   coincidentally land within minutes of a later print at *some* offset.
+
+The new matcher matches only against start time and refuses to auto-pick when the
+top two candidates (from different videos) are within an ambiguity margin —
+forcing the manual-selection fallback the reporter explicitly asked for.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timedelta
+
+import pytest
+
+from backend.app.api.routes.archives import _match_timelapse_by_timestamp
+
+
+def _video(name: str, mtime: datetime | None = None) -> dict:
+    return {
+        "name": name,
+        "path": f"/timelapse/{name}",
+        "is_directory": False,
+        "size": 1024,
+        "mtime": mtime,
+    }
+
+
+class TestMatchTimelapseByTimestamp:
+    """Cover the bug from issue #1278 plus baseline cases."""
+
+    def test_issue_1278_archive2_refuses_to_auto_pick_ambiguous(self):
+        """Archive 2 (start 16:39:09) used to wrongly attach the older 09-41-29 video.
+
+        The wrong video matches at offset -7 (diff 2m20s), the correct video at
+        offset +8 (diff 3m33s). The two are within ~1 minute of each other —
+        too close to call. Matcher must return None so the route surfaces the
+        manual-selection list to the user.
+        """
+        videos = [
+            _video("video_2026-05-08_09-41-29.mp4"),  # belongs to Archive 1
+            _video("video_2026-05-09_00-42-42.mp4"),  # belongs to Archive 2 — correct
+        ]
+        archive_start = datetime(2026, 5, 8, 16, 39, 9)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is None
+        assert diff is None
+
+    def test_issue_1278_archive1_still_matches_unambiguously(self):
+        """Archive 1 (start 01:27:14) — only one candidate within tolerance,
+        so the matcher should still pick it cleanly."""
+        videos = [
+            _video("video_2026-05-08_09-41-29.mp4"),  # correct
+            _video("video_2026-05-09_00-42-42.mp4"),  # 15h+ away at any common offset
+        ]
+        archive_start = datetime(2026, 5, 8, 1, 27, 14)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_09-41-29.mp4"
+        assert diff is not None
+        assert diff < timedelta(minutes=20)
+
+    def test_archive2_resolves_when_stale_video_removed(self):
+        """If the user has cleaned up the stale Archive-1 video, Archive 2's correct
+        video is the only candidate and auto-match should succeed."""
+        videos = [_video("video_2026-05-09_00-42-42.mp4")]
+        archive_start = datetime(2026, 5, 8, 16, 39, 9)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-09_00-42-42.mp4"
+        assert diff is not None
+        assert diff < timedelta(minutes=5)
+
+    def test_no_match_when_outside_tolerance(self):
+        """All candidates outside the 4h tolerance → no match."""
+        videos = [_video("video_2026-05-08_09-41-29.mp4")]
+        # A week later, far beyond any offset's reach
+        archive_start = datetime(2026, 5, 15, 12, 0, 0)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is None
+        assert diff is None
+
+    def test_returns_none_when_started_at_missing(self):
+        """No archive start time = no signal; should return None."""
+        videos = [_video("video_2026-05-08_09-41-29.mp4")]
+
+        match, diff = _match_timelapse_by_timestamp(videos, None)
+
+        assert match is None
+        assert diff is None
+
+    def test_zero_offset_when_clocks_agree(self):
+        """When printer and server clocks agree, offset=0 should pick the video cleanly."""
+        videos = [_video("video_2026-05-08_16-40-00.mp4")]
+        archive_start = datetime(2026, 5, 8, 16, 39, 0)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_16-40-00.mp4"
+        assert diff == timedelta(minutes=1)
+
+    def test_skips_videos_without_timestamp_in_name(self):
+        """Non-standard names (e.g., manually uploaded) should be skipped, not crash."""
+        videos = [
+            _video("my_custom_video.mp4"),
+            _video("video_2026-05-08_16-40-00.mp4"),
+        ]
+        archive_start = datetime(2026, 5, 8, 16, 39, 0)
+
+        match, _diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_16-40-00.mp4"
+
+    def test_empty_video_list_returns_none(self):
+        match, diff = _match_timelapse_by_timestamp([], datetime(2026, 5, 8, 0, 0, 0))
+        assert match is None
+        assert diff is None
+
+    @pytest.mark.parametrize("offset_hours", [0, 1, -1, 7, -7, 8, -8])
+    def test_supports_common_timezone_offsets_with_single_candidate(self, offset_hours: int):
+        """Each offset in the search list must be able to produce a match when
+        only one video exists (so ambiguity check is vacuous)."""
+        archive_start = datetime(2026, 5, 8, 12, 0, 0)
+        # Printer's filename reflects archive_start in printer-local time
+        printer_time = archive_start + timedelta(hours=offset_hours)
+        videos = [_video(printer_time.strftime("video_%Y-%m-%d_%H-%M-%S.mp4"))]
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert diff == timedelta(0)
+
+    def test_returns_match_when_runner_up_is_same_video_different_offset(self):
+        """A single video matching at two offsets is not ambiguous — pick it."""
+        videos = [_video("video_2026-05-08_09-41-29.mp4")]
+        # +7h adjusted = 02:41:29; +8h adjusted = 01:41:29. Both within 4h of 01:27:14.
+        archive_start = datetime(2026, 5, 8, 1, 27, 14)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_09-41-29.mp4"
+        # Best is offset +8 → diff 14m15s
+        assert diff is not None
+        assert diff < timedelta(minutes=20)
+
+    def test_unambiguous_when_runner_up_is_well_separated(self):
+        """If the next-best different video is comfortably outside the ambiguity
+        margin, auto-pick the winner."""
+        videos = [
+            _video("video_2026-05-08_09-41-29.mp4"),  # +8h → 01:41:29, diff 14m15s
+            _video("video_2026-05-08_12-00-00.mp4"),  # +8h → 04:00:00, diff 2h32m
+        ]
+        archive_start = datetime(2026, 5, 8, 1, 27, 14)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_09-41-29.mp4"
+        assert diff is not None

Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio