2 weeks ago · 4ee4bdb0d3
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/backend/app/api/routes/archives.py
+++ b/backend/app/api/routes/archives.py
@@ -1,9 +1,10 @@
 
				 import io
			
 
				 import json
			
 
				 import logging
			
 
				+import re as _re
			
 
				 import zipfile
			
 
				 from collections import defaultdict
			
 
				-from datetime import date, datetime, time, timezone
			
 
				+from datetime import date, datetime, time, timedelta, timezone
			
 
				 from decimal import ROUND_HALF_UP, Decimal
			
 
				 from pathlib import Path
			
 
				 
			
@@ -48,6 +49,74 @@ def _safe_filename(filename: str) -> str:
 
				     return Path(filename.replace("\\", "/")).name
			
 
				 
			
 
				 
			
 
				+_TIMELAPSE_FILENAME_TS_RE = _re.compile(r"(\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2})")
			
 
				+_DEFAULT_TIMELAPSE_OFFSETS_HOURS: tuple[int, ...] = (0, 8, -8, 7, -7, 1, -1)
			
 
				+_DEFAULT_TIMELAPSE_TOLERANCE = timedelta(hours=4)
			
 
				+_DEFAULT_TIMELAPSE_AMBIGUITY_MARGIN = timedelta(minutes=15)
			
 
				+
			
 
				+
			
 
				+def _match_timelapse_by_timestamp(
			
 
				+    video_files: list[dict],
			
 
				+    archive_start: datetime | None,
			
 
				+    *,
			
 
				+    tolerance: timedelta = _DEFAULT_TIMELAPSE_TOLERANCE,
			
 
				+    ambiguity_margin: timedelta = _DEFAULT_TIMELAPSE_AMBIGUITY_MARGIN,
			
 
				+    offsets_hours: tuple[int, ...] = _DEFAULT_TIMELAPSE_OFFSETS_HOURS,
			
 
				+) -> tuple[dict | None, timedelta | None]:
			
 
				+    """Pick the timelapse whose filename timestamp best matches the print start time.
			
 
				+
			
 
				+    Bambu timelapse filenames embed the printer-local START time (e.g.
			
 
				+    "video_2026-05-08_09-41-29.mp4"). The printer's clock may be offset from the
			
 
				+    server's — especially in LAN-Only mode where NTP is unreachable — so we try a
			
 
				+    small set of common UTC offsets and keep the (video, offset) pair with the
			
 
				+    smallest absolute distance from archive_start. We deliberately do NOT consider
			
 
				+    archive_end here: the filename is start time, not end time, so comparing it to
			
 
				+    completion is not a real signal (Strategy 3 handles end via file mtime).
			
 
				+
			
 
				+    Because the offset list densely covers a wide span, an unrelated video's
			
 
				+    filename can coincidentally land near a later print's start at some offset.
			
 
				+    To avoid that false positive, we require the best (video, offset) pair to
			
 
				+    beat the next-best pair *from a different video* by at least `ambiguity_margin`.
			
 
				+    When the top two candidates from different videos are too close to call,
			
 
				+    we return None and let the caller fall back to manual selection.
			
 
				+    """
			
 
				+    if archive_start is None:
			
 
				+        return None, None
			
 
				+
			
 
				+    # (diff, video) for every (video, offset) pair within tolerance.
			
 
				+    candidates: list[tuple[timedelta, dict]] = []
			
 
				+
			
 
				+    for f in video_files:
			
 
				+        fname = f.get("name", "")
			
 
				+        m = _TIMELAPSE_FILENAME_TS_RE.search(fname)
			
 
				+        if not m:
			
 
				+            continue
			
 
				+        try:
			
 
				+            file_time = datetime.strptime(m.group(1), "%Y-%m-%d_%H-%M-%S")
			
 
				+        except ValueError:
			
 
				+            continue
			
 
				+
			
 
				+        for hour_offset in offsets_hours:
			
 
				+            adjusted = file_time - timedelta(hours=hour_offset)
			
 
				+            diff = abs(adjusted - archive_start)
			
 
				+            if diff <= tolerance:
			
 
				+                candidates.append((diff, f))
			
 
				+
			
 
				+    if not candidates:
			
 
				+        return None, None
			
 
				+
			
 
				+    candidates.sort(key=lambda c: c[0])
			
 
				+    best_diff, best_video = candidates[0]
			
 
				+    best_name = best_video.get("name")
			
 
				+
			
 
				+    for diff, video in candidates[1:]:
			
 
				+        if video.get("name") != best_name and (diff - best_diff) < ambiguity_margin:
			
 
				+            # Another video matches almost as well — refuse to auto-pick.
			
 
				+            return None, None
			
 
				+
			
 
				+    return best_video, best_diff
			
 
				+
			
 
				+
			
 
				 def _validate_user_filter_permission(current_user: User | None, created_by_id: int | None):
			
 
				     """Raise 403 if created_by_id filter is used without stats:filter_by_user permission."""
			
 
				     if created_by_id is None or current_user is None:
			
@@ -1721,65 +1790,15 @@ async def scan_timelapse(
 
				             matching_file = f
			
 
				             break
			
 
				 
			
 
				-    # Strategy 2: Match by timestamp proximity
			
 
				-    # Bambu timelapse filename uses the print START time (when recording began)
			
 
				-    if not matching_file and (archive.started_at or archive.completed_at or archive.created_at):
			
 
				-        import re
			
 
				-        from datetime import datetime, timedelta
			
 
				-
			
 
				-        # Prefer started_at since video filename is the print start time
			
 
				-        # Fall back to completed_at or created_at if started_at is not available
			
 
				-        archive_start = archive.started_at
			
 
				-        archive_end = archive.completed_at or archive.created_at
			
 
				-        best_match = None
			
 
				-        best_diff = timedelta(hours=24)  # Max 24 hour difference
			
 
				-
			
 
				-        for f in video_files:
			
 
				-            fname = f.get("name", "")
			
 
				-            # Parse timestamp from filename like "video_2025-11-24_03-17-40.mp4"
			
 
				-            match = re.search(r"(\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2})", fname)
			
 
				-            if match:
			
 
				-                try:
			
 
				-                    file_time = datetime.strptime(match.group(1), "%Y-%m-%d_%H-%M-%S")
			
 
				-
			
 
				-                    # Try multiple timezone offsets since printer timezone can vary
			
 
				-                    # Common cases: local time (0), CST/UTC+8 (+8), or UTC (-local offset)
			
 
				-                    for hour_offset in [0, 8, -8, 7, -7, 1, -1]:
			
 
				-                        adjusted_file_time = file_time - timedelta(hours=hour_offset)
			
 
				-
			
 
				-                        # Check against start time (video filename = print start)
			
 
				-                        if archive_start:
			
 
				-                            diff = abs(adjusted_file_time - archive_start)
			
 
				-                            if diff < best_diff:
			
 
				-                                best_diff = diff
			
 
				-                                best_match = f
			
 
				-                                logger.debug(
			
 
				-                                    f"Timelapse match candidate: {fname} with offset {hour_offset}h, "
			
 
				-                                    f"diff from start: {diff}"
			
 
				-                                )
			
 
				-
			
 
				-                        # Also check against end time with a buffer
			
 
				-                        # (video timestamp should be BEFORE completion time)
			
 
				-                        if archive_end:
			
 
				-                            # The video timestamp should be within the print duration before completion
			
 
				-                            if adjusted_file_time < archive_end:
			
 
				-                                diff = archive_end - adjusted_file_time
			
 
				-                                # Reasonable print duration: up to 48 hours
			
 
				-                                if diff < timedelta(hours=48) and diff < best_diff:
			
 
				-                                    best_diff = diff
			
 
				-                                    best_match = f
			
 
				-                                    logger.debug(
			
 
				-                                        f"Timelapse match candidate (from end): {fname} with offset {hour_offset}h, "
			
 
				-                                        f"diff: {diff}"
			
 
				-                                    )
			
 
				-
			
 
				-                except ValueError:
			
 
				-                    continue
			
 
				-
			
 
				-        # Accept match within 4 hours (more lenient for timezone issues)
			
 
				-        if best_match and best_diff < timedelta(hours=4):
			
 
				-            matching_file = best_match
			
 
				-            logger.info("Matched timelapse by timestamp: %s (diff: %s)", best_match.get("name"), best_diff)
			
 
				+    # Strategy 2: Match by timestamp proximity against print START time.
			
 
				+    # Bambu timelapse filename embeds the print start time in printer-local clock.
			
 
				+    # See _match_timelapse_by_timestamp for the offset-search rationale and why we
			
 
				+    # intentionally don't try to match filename against end time here.
			
 
				+    if not matching_file and archive.started_at:
			
 
				+        candidate, diff = _match_timelapse_by_timestamp(video_files, archive.started_at)
			
 
				+        if candidate is not None:
			
 
				+            matching_file = candidate
			
 
				+            logger.info("Matched timelapse by timestamp: %s (diff: %s)", candidate.get("name"), diff)
			
 
				 
			
 
				     # Strategy 3: Use file modification time from FTP listing
			
 
				     # This handles cases where printer's filename timestamp is wrong but file mtime is correct
			
--- a/backend/tests/unit/test_timelapse_match.py
+++ b/backend/tests/unit/test_timelapse_match.py
@@ -0,0 +1,177 @@
 
				+"""Unit tests for the timelapse-by-timestamp matcher used by /archives/scan.
			
 
				+
			
 
				+Regression coverage for #1278: when the printer cannot reach NTP (LAN-Only mode),
			
 
				+its clock is offset from the server's, and an older video's filename can land just
			
 
				+before a later print's completion. The previous matcher:
			
 
				+
			
 
				+1. Treated the filename as either start- or end-time evidence — semantically wrong
			
 
				+   for a filename that's always print-start.
			
 
				+2. Probed a dense set of timezone offsets, so an unrelated video could
			
 
				+   coincidentally land within minutes of a later print at *some* offset.
			
 
				+
			
 
				+The new matcher matches only against start time and refuses to auto-pick when the
			
 
				+top two candidates (from different videos) are within an ambiguity margin —
			
 
				+forcing the manual-selection fallback the reporter explicitly asked for.
			
 
				+"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+from datetime import datetime, timedelta
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+from backend.app.api.routes.archives import _match_timelapse_by_timestamp
			
 
				+
			
 
				+
			
 
				+def _video(name: str, mtime: datetime | None = None) -> dict:
			
 
				+    return {
			
 
				+        "name": name,
			
 
				+        "path": f"/timelapse/{name}",
			
 
				+        "is_directory": False,
			
 
				+        "size": 1024,
			
 
				+        "mtime": mtime,
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+class TestMatchTimelapseByTimestamp:
			
 
				+    """Cover the bug from issue #1278 plus baseline cases."""
			
 
				+
			
 
				+    def test_issue_1278_archive2_refuses_to_auto_pick_ambiguous(self):
			
 
				+        """Archive 2 (start 16:39:09) used to wrongly attach the older 09-41-29 video.
			
 
				+
			
 
				+        The wrong video matches at offset -7 (diff 2m20s), the correct video at
			
 
				+        offset +8 (diff 3m33s). The two are within ~1 minute of each other —
			
 
				+        too close to call. Matcher must return None so the route surfaces the
			
 
				+        manual-selection list to the user.
			
 
				+        """
			
 
				+        videos = [
			
 
				+            _video("video_2026-05-08_09-41-29.mp4"),  # belongs to Archive 1
			
 
				+            _video("video_2026-05-09_00-42-42.mp4"),  # belongs to Archive 2 — correct
			
 
				+        ]
			
 
				+        archive_start = datetime(2026, 5, 8, 16, 39, 9)
			
 
				+
			
 
				+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
			
 
				+
			
 
				+        assert match is None
			
 
				+        assert diff is None
			
 
				+
			
 
				+    def test_issue_1278_archive1_still_matches_unambiguously(self):
			
 
				+        """Archive 1 (start 01:27:14) — only one candidate within tolerance,
			
 
				+        so the matcher should still pick it cleanly."""
			
 
				+        videos = [
			
 
				+            _video("video_2026-05-08_09-41-29.mp4"),  # correct
			
 
				+            _video("video_2026-05-09_00-42-42.mp4"),  # 15h+ away at any common offset
			
 
				+        ]
			
 
				+        archive_start = datetime(2026, 5, 8, 1, 27, 14)
			
 
				+
			
 
				+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
			
 
				+
			
 
				+        assert match is not None
			
 
				+        assert match["name"] == "video_2026-05-08_09-41-29.mp4"
			
 
				+        assert diff is not None
			
 
				+        assert diff < timedelta(minutes=20)
			
 
				+
			
 
				+    def test_archive2_resolves_when_stale_video_removed(self):
			
 
				+        """If the user has cleaned up the stale Archive-1 video, Archive 2's correct
			
 
				+        video is the only candidate and auto-match should succeed."""
			
 
				+        videos = [_video("video_2026-05-09_00-42-42.mp4")]
			
 
				+        archive_start = datetime(2026, 5, 8, 16, 39, 9)
			
 
				+
			
 
				+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
			
 
				+
			
 
				+        assert match is not None
			
 
				+        assert match["name"] == "video_2026-05-09_00-42-42.mp4"
			
 
				+        assert diff is not None
			
 
				+        assert diff < timedelta(minutes=5)
			
 
				+
			
 
				+    def test_no_match_when_outside_tolerance(self):
			
 
				+        """All candidates outside the 4h tolerance → no match."""
			
 
				+        videos = [_video("video_2026-05-08_09-41-29.mp4")]
			
 
				+        # A week later, far beyond any offset's reach
			
 
				+        archive_start = datetime(2026, 5, 15, 12, 0, 0)
			
 
				+
			
 
				+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
			
 
				+
			
 
				+        assert match is None
			
 
				+        assert diff is None
			
 
				+
			
 
				+    def test_returns_none_when_started_at_missing(self):
			
 
				+        """No archive start time = no signal; should return None."""
			
 
				+        videos = [_video("video_2026-05-08_09-41-29.mp4")]
			
 
				+
			
 
				+        match, diff = _match_timelapse_by_timestamp(videos, None)
			
 
				+
			
 
				+        assert match is None
			
 
				+        assert diff is None
			
 
				+
			
 
				+    def test_zero_offset_when_clocks_agree(self):
			
 
				+        """When printer and server clocks agree, offset=0 should pick the video cleanly."""
			
 
				+        videos = [_video("video_2026-05-08_16-40-00.mp4")]
			
 
				+        archive_start = datetime(2026, 5, 8, 16, 39, 0)
			
 
				+
			
 
				+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
			
 
				+
			
 
				+        assert match is not None
			
 
				+        assert match["name"] == "video_2026-05-08_16-40-00.mp4"
			
 
				+        assert diff == timedelta(minutes=1)
			
 
				+
			
 
				+    def test_skips_videos_without_timestamp_in_name(self):
			
 
				+        """Non-standard names (e.g., manually uploaded) should be skipped, not crash."""
			
 
				+        videos = [
			
 
				+            _video("my_custom_video.mp4"),
			
 
				+            _video("video_2026-05-08_16-40-00.mp4"),
			
 
				+        ]
			
 
				+        archive_start = datetime(2026, 5, 8, 16, 39, 0)
			
 
				+
			
 
				+        match, _diff = _match_timelapse_by_timestamp(videos, archive_start)
			
 
				+
			
 
				+        assert match is not None
			
 
				+        assert match["name"] == "video_2026-05-08_16-40-00.mp4"
			
 
				+
			
 
				+    def test_empty_video_list_returns_none(self):
			
 
				+        match, diff = _match_timelapse_by_timestamp([], datetime(2026, 5, 8, 0, 0, 0))
			
 
				+        assert match is None
			
 
				+        assert diff is None
			
 
				+
			
 
				+    @pytest.mark.parametrize("offset_hours", [0, 1, -1, 7, -7, 8, -8])
			
 
				+    def test_supports_common_timezone_offsets_with_single_candidate(self, offset_hours: int):
			
 
				+        """Each offset in the search list must be able to produce a match when
			
 
				+        only one video exists (so ambiguity check is vacuous)."""
			
 
				+        archive_start = datetime(2026, 5, 8, 12, 0, 0)
			
 
				+        # Printer's filename reflects archive_start in printer-local time
			
 
				+        printer_time = archive_start + timedelta(hours=offset_hours)
			
 
				+        videos = [_video(printer_time.strftime("video_%Y-%m-%d_%H-%M-%S.mp4"))]
			
 
				+
			
 
				+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
			
 
				+
			
 
				+        assert match is not None
			
 
				+        assert diff == timedelta(0)
			
 
				+
			
 
				+    def test_returns_match_when_runner_up_is_same_video_different_offset(self):
			
 
				+        """A single video matching at two offsets is not ambiguous — pick it."""
			
 
				+        videos = [_video("video_2026-05-08_09-41-29.mp4")]
			
 
				+        # +7h adjusted = 02:41:29; +8h adjusted = 01:41:29. Both within 4h of 01:27:14.
			
 
				+        archive_start = datetime(2026, 5, 8, 1, 27, 14)
			
 
				+
			
 
				+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
			
 
				+
			
 
				+        assert match is not None
			
 
				+        assert match["name"] == "video_2026-05-08_09-41-29.mp4"
			
 
				+        # Best is offset +8 → diff 14m15s
			
 
				+        assert diff is not None
			
 
				+        assert diff < timedelta(minutes=20)
			
 
				+
			
 
				+    def test_unambiguous_when_runner_up_is_well_separated(self):
			
 
				+        """If the next-best different video is comfortably outside the ambiguity
			
 
				+        margin, auto-pick the winner."""
			
 
				+        videos = [
			
 
				+            _video("video_2026-05-08_09-41-29.mp4"),  # +8h → 01:41:29, diff 14m15s
			
 
				+            _video("video_2026-05-08_12-00-00.mp4"),  # +8h → 04:00:00, diff 2h32m
			
 
				+        ]
			
 
				+        archive_start = datetime(2026, 5, 8, 1, 27, 14)
			
 
				+
			
 
				+        match, diff = _match_timelapse_by_timestamp(videos, archive_start)
			
 
				+
			
 
				+        assert match is not None
			
 
				+        assert match["name"] == "video_2026-05-08_09-41-29.mp4"
			
 
				+        assert diff is not None