Browse Source

fix(archives): scan_timelapse picked stale video at false offset (#1278)

  scan_timelapse's Strategy 2 matched filename timestamps against both
  archive.started_at and archive.completed_at across seven hypothesised tz
  offsets. The filename is always print-START time, so the end-time branch
  was a semantic mistake — and the dense offset set [0, +-1, +-7, +-8]
  let an unrelated video coincidentally land within minutes of any later
  archive at some offset.

  Extract Strategy 2 into _match_timelapse_by_timestamp(): compare only
  against start time, and refuse to auto-pick when the next-best different
  video is within a 15-minute ambiguity margin. The route then returns
  available_files and the frontend's existing manual-selection dialog
  takes over — which is the fallback the reporter explicitly asked for.

  Surfaces in LAN-Only mode where the printer can't reach NTP and its
  clock drifts (e.g. P2S filenames in CST while server is in UTC, the
  8h offset that exposed this bug).
maziggy 2 weeks ago
parent
commit
4ee4bdb0d3
3 changed files with 256 additions and 60 deletions
  1. 0 0
      CHANGELOG.md
  2. 79 60
      backend/app/api/routes/archives.py
  3. 177 0
      backend/tests/unit/test_timelapse_match.py

File diff suppressed because it is too large
+ 0 - 0
CHANGELOG.md


+ 79 - 60
backend/app/api/routes/archives.py

@@ -1,9 +1,10 @@
 import io
 import json
 import logging
+import re as _re
 import zipfile
 from collections import defaultdict
-from datetime import date, datetime, time, timezone
+from datetime import date, datetime, time, timedelta, timezone
 from decimal import ROUND_HALF_UP, Decimal
 from pathlib import Path
 
@@ -48,6 +49,74 @@ def _safe_filename(filename: str) -> str:
     return Path(filename.replace("\\", "/")).name
 
 
+_TIMELAPSE_FILENAME_TS_RE = _re.compile(r"(\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2})")
+_DEFAULT_TIMELAPSE_OFFSETS_HOURS: tuple[int, ...] = (0, 8, -8, 7, -7, 1, -1)
+_DEFAULT_TIMELAPSE_TOLERANCE = timedelta(hours=4)
+_DEFAULT_TIMELAPSE_AMBIGUITY_MARGIN = timedelta(minutes=15)
+
+
+def _match_timelapse_by_timestamp(
+    video_files: list[dict],
+    archive_start: datetime | None,
+    *,
+    tolerance: timedelta = _DEFAULT_TIMELAPSE_TOLERANCE,
+    ambiguity_margin: timedelta = _DEFAULT_TIMELAPSE_AMBIGUITY_MARGIN,
+    offsets_hours: tuple[int, ...] = _DEFAULT_TIMELAPSE_OFFSETS_HOURS,
+) -> tuple[dict | None, timedelta | None]:
+    """Pick the timelapse whose filename timestamp best matches the print start time.
+
+    Bambu timelapse filenames embed the printer-local START time (e.g.
+    "video_2026-05-08_09-41-29.mp4"). The printer's clock may be offset from the
+    server's — especially in LAN-Only mode where NTP is unreachable — so we try a
+    small set of common UTC offsets and keep the (video, offset) pair with the
+    smallest absolute distance from archive_start. We deliberately do NOT consider
+    archive_end here: the filename is start time, not end time, so comparing it to
+    completion is not a real signal (Strategy 3 handles end via file mtime).
+
+    Because the offset list densely covers a wide span, an unrelated video's
+    filename can coincidentally land near a later print's start at some offset.
+    To avoid that false positive, we require the best (video, offset) pair to
+    beat the next-best pair *from a different video* by at least `ambiguity_margin`.
+    When the top two candidates from different videos are too close to call,
+    we return None and let the caller fall back to manual selection.
+    """
+    if archive_start is None:
+        return None, None
+
+    # (diff, video) for every (video, offset) pair within tolerance.
+    candidates: list[tuple[timedelta, dict]] = []
+
+    for f in video_files:
+        fname = f.get("name", "")
+        m = _TIMELAPSE_FILENAME_TS_RE.search(fname)
+        if not m:
+            continue
+        try:
+            file_time = datetime.strptime(m.group(1), "%Y-%m-%d_%H-%M-%S")
+        except ValueError:
+            continue
+
+        for hour_offset in offsets_hours:
+            adjusted = file_time - timedelta(hours=hour_offset)
+            diff = abs(adjusted - archive_start)
+            if diff <= tolerance:
+                candidates.append((diff, f))
+
+    if not candidates:
+        return None, None
+
+    candidates.sort(key=lambda c: c[0])
+    best_diff, best_video = candidates[0]
+    best_name = best_video.get("name")
+
+    for diff, video in candidates[1:]:
+        if video.get("name") != best_name and (diff - best_diff) < ambiguity_margin:
+            # Another video matches almost as well — refuse to auto-pick.
+            return None, None
+
+    return best_video, best_diff
+
+
 def _validate_user_filter_permission(current_user: User | None, created_by_id: int | None):
     """Raise 403 if created_by_id filter is used without stats:filter_by_user permission."""
     if created_by_id is None or current_user is None:
@@ -1721,65 +1790,15 @@ async def scan_timelapse(
             matching_file = f
             break
 
-    # Strategy 2: Match by timestamp proximity
-    # Bambu timelapse filename uses the print START time (when recording began)
-    if not matching_file and (archive.started_at or archive.completed_at or archive.created_at):
-        import re
-        from datetime import datetime, timedelta
-
-        # Prefer started_at since video filename is the print start time
-        # Fall back to completed_at or created_at if started_at is not available
-        archive_start = archive.started_at
-        archive_end = archive.completed_at or archive.created_at
-        best_match = None
-        best_diff = timedelta(hours=24)  # Max 24 hour difference
-
-        for f in video_files:
-            fname = f.get("name", "")
-            # Parse timestamp from filename like "video_2025-11-24_03-17-40.mp4"
-            match = re.search(r"(\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2})", fname)
-            if match:
-                try:
-                    file_time = datetime.strptime(match.group(1), "%Y-%m-%d_%H-%M-%S")
-
-                    # Try multiple timezone offsets since printer timezone can vary
-                    # Common cases: local time (0), CST/UTC+8 (+8), or UTC (-local offset)
-                    for hour_offset in [0, 8, -8, 7, -7, 1, -1]:
-                        adjusted_file_time = file_time - timedelta(hours=hour_offset)
-
-                        # Check against start time (video filename = print start)
-                        if archive_start:
-                            diff = abs(adjusted_file_time - archive_start)
-                            if diff < best_diff:
-                                best_diff = diff
-                                best_match = f
-                                logger.debug(
-                                    f"Timelapse match candidate: {fname} with offset {hour_offset}h, "
-                                    f"diff from start: {diff}"
-                                )
-
-                        # Also check against end time with a buffer
-                        # (video timestamp should be BEFORE completion time)
-                        if archive_end:
-                            # The video timestamp should be within the print duration before completion
-                            if adjusted_file_time < archive_end:
-                                diff = archive_end - adjusted_file_time
-                                # Reasonable print duration: up to 48 hours
-                                if diff < timedelta(hours=48) and diff < best_diff:
-                                    best_diff = diff
-                                    best_match = f
-                                    logger.debug(
-                                        f"Timelapse match candidate (from end): {fname} with offset {hour_offset}h, "
-                                        f"diff: {diff}"
-                                    )
-
-                except ValueError:
-                    continue
-
-        # Accept match within 4 hours (more lenient for timezone issues)
-        if best_match and best_diff < timedelta(hours=4):
-            matching_file = best_match
-            logger.info("Matched timelapse by timestamp: %s (diff: %s)", best_match.get("name"), best_diff)
+    # Strategy 2: Match by timestamp proximity against print START time.
+    # Bambu timelapse filename embeds the print start time in printer-local clock.
+    # See _match_timelapse_by_timestamp for the offset-search rationale and why we
+    # intentionally don't try to match filename against end time here.
+    if not matching_file and archive.started_at:
+        candidate, diff = _match_timelapse_by_timestamp(video_files, archive.started_at)
+        if candidate is not None:
+            matching_file = candidate
+            logger.info("Matched timelapse by timestamp: %s (diff: %s)", candidate.get("name"), diff)
 
     # Strategy 3: Use file modification time from FTP listing
     # This handles cases where printer's filename timestamp is wrong but file mtime is correct

+ 177 - 0
backend/tests/unit/test_timelapse_match.py

@@ -0,0 +1,177 @@
+"""Unit tests for the timelapse-by-timestamp matcher used by /archives/scan.
+
+Regression coverage for #1278: when the printer cannot reach NTP (LAN-Only mode),
+its clock is offset from the server's, and an older video's filename can land just
+before a later print's completion. The previous matcher:
+
+1. Treated the filename as either start- or end-time evidence — semantically wrong
+   for a filename that's always print-start.
+2. Probed a dense set of timezone offsets, so an unrelated video could
+   coincidentally land within minutes of a later print at *some* offset.
+
+The new matcher matches only against start time and refuses to auto-pick when the
+top two candidates (from different videos) are within an ambiguity margin —
+forcing the manual-selection fallback the reporter explicitly asked for.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime, timedelta
+
+import pytest
+
+from backend.app.api.routes.archives import _match_timelapse_by_timestamp
+
+
+def _video(name: str, mtime: datetime | None = None) -> dict:
+    return {
+        "name": name,
+        "path": f"/timelapse/{name}",
+        "is_directory": False,
+        "size": 1024,
+        "mtime": mtime,
+    }
+
+
+class TestMatchTimelapseByTimestamp:
+    """Cover the bug from issue #1278 plus baseline cases."""
+
+    def test_issue_1278_archive2_refuses_to_auto_pick_ambiguous(self):
+        """Archive 2 (start 16:39:09) used to wrongly attach the older 09-41-29 video.
+
+        The wrong video matches at offset -7 (diff 2m20s), the correct video at
+        offset +8 (diff 3m33s). The two are within ~1 minute of each other —
+        too close to call. Matcher must return None so the route surfaces the
+        manual-selection list to the user.
+        """
+        videos = [
+            _video("video_2026-05-08_09-41-29.mp4"),  # belongs to Archive 1
+            _video("video_2026-05-09_00-42-42.mp4"),  # belongs to Archive 2 — correct
+        ]
+        archive_start = datetime(2026, 5, 8, 16, 39, 9)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is None
+        assert diff is None
+
+    def test_issue_1278_archive1_still_matches_unambiguously(self):
+        """Archive 1 (start 01:27:14) — only one candidate within tolerance,
+        so the matcher should still pick it cleanly."""
+        videos = [
+            _video("video_2026-05-08_09-41-29.mp4"),  # correct
+            _video("video_2026-05-09_00-42-42.mp4"),  # 15h+ away at any common offset
+        ]
+        archive_start = datetime(2026, 5, 8, 1, 27, 14)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_09-41-29.mp4"
+        assert diff is not None
+        assert diff < timedelta(minutes=20)
+
+    def test_archive2_resolves_when_stale_video_removed(self):
+        """If the user has cleaned up the stale Archive-1 video, Archive 2's correct
+        video is the only candidate and auto-match should succeed."""
+        videos = [_video("video_2026-05-09_00-42-42.mp4")]
+        archive_start = datetime(2026, 5, 8, 16, 39, 9)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-09_00-42-42.mp4"
+        assert diff is not None
+        assert diff < timedelta(minutes=5)
+
+    def test_no_match_when_outside_tolerance(self):
+        """All candidates outside the 4h tolerance → no match."""
+        videos = [_video("video_2026-05-08_09-41-29.mp4")]
+        # A week later, far beyond any offset's reach
+        archive_start = datetime(2026, 5, 15, 12, 0, 0)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is None
+        assert diff is None
+
+    def test_returns_none_when_started_at_missing(self):
+        """No archive start time = no signal; should return None."""
+        videos = [_video("video_2026-05-08_09-41-29.mp4")]
+
+        match, diff = _match_timelapse_by_timestamp(videos, None)
+
+        assert match is None
+        assert diff is None
+
+    def test_zero_offset_when_clocks_agree(self):
+        """When printer and server clocks agree, offset=0 should pick the video cleanly."""
+        videos = [_video("video_2026-05-08_16-40-00.mp4")]
+        archive_start = datetime(2026, 5, 8, 16, 39, 0)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_16-40-00.mp4"
+        assert diff == timedelta(minutes=1)
+
+    def test_skips_videos_without_timestamp_in_name(self):
+        """Non-standard names (e.g., manually uploaded) should be skipped, not crash."""
+        videos = [
+            _video("my_custom_video.mp4"),
+            _video("video_2026-05-08_16-40-00.mp4"),
+        ]
+        archive_start = datetime(2026, 5, 8, 16, 39, 0)
+
+        match, _diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_16-40-00.mp4"
+
+    def test_empty_video_list_returns_none(self):
+        match, diff = _match_timelapse_by_timestamp([], datetime(2026, 5, 8, 0, 0, 0))
+        assert match is None
+        assert diff is None
+
+    @pytest.mark.parametrize("offset_hours", [0, 1, -1, 7, -7, 8, -8])
+    def test_supports_common_timezone_offsets_with_single_candidate(self, offset_hours: int):
+        """Each offset in the search list must be able to produce a match when
+        only one video exists (so ambiguity check is vacuous)."""
+        archive_start = datetime(2026, 5, 8, 12, 0, 0)
+        # Printer's filename reflects archive_start in printer-local time
+        printer_time = archive_start + timedelta(hours=offset_hours)
+        videos = [_video(printer_time.strftime("video_%Y-%m-%d_%H-%M-%S.mp4"))]
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert diff == timedelta(0)
+
+    def test_returns_match_when_runner_up_is_same_video_different_offset(self):
+        """A single video matching at two offsets is not ambiguous — pick it."""
+        videos = [_video("video_2026-05-08_09-41-29.mp4")]
+        # +7h adjusted = 02:41:29; +8h adjusted = 01:41:29. Both within 4h of 01:27:14.
+        archive_start = datetime(2026, 5, 8, 1, 27, 14)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_09-41-29.mp4"
+        # Best is offset +8 → diff 14m15s
+        assert diff is not None
+        assert diff < timedelta(minutes=20)
+
+    def test_unambiguous_when_runner_up_is_well_separated(self):
+        """If the next-best different video is comfortably outside the ambiguity
+        margin, auto-pick the winner."""
+        videos = [
+            _video("video_2026-05-08_09-41-29.mp4"),  # +8h → 01:41:29, diff 14m15s
+            _video("video_2026-05-08_12-00-00.mp4"),  # +8h → 04:00:00, diff 2h32m
+        ]
+        archive_start = datetime(2026, 5, 8, 1, 27, 14)
+
+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
+
+        assert match is not None
+        assert match["name"] == "video_2026-05-08_09-41-29.mp4"
+        assert diff is not None

Some files were not shown because too many files changed in this diff