Просмотр исходного кода

fix(printers): recover large-3mf metadata after FTP timeout (#972)

  Two-part root cause for missing photos/filament/cost on large prints
  (#972). The configured ftp_timeout was only plumbed through as the FTP
  socket timeout; the asyncio.wait_for wrapping run_in_executor stayed on
  its 60s hardcoded default, so the user's 300s setting never applied.
  Worse, asyncio.wait_for cannot cancel run_in_executor threads — after
  the 60s outer timeout fired, the executor thread kept running
  ftplib.retrbinary and frequently completed the download ~30–60s later,
  but by then the async wrapper had returned False. with_ftp_retry kept
  re-attempting the same path, each retry truncating the file the zombie
  thread had just written, and the archive was ultimately persisted as a
  fallback with no 3MF.

  download_file_async now accepts timeout at each call site (plumbed from
  ftp_timeout) and salvages post-timeout success via an explicit
  completion flag the executor thread sets only after download_to_file
  returns True. Per-attempt completion dict so a prot_p zombie can't
  flip the flag for a later prot_c attempt. A cosmetic // prefix in the
  directory-search download path is also fixed by replacing string
  concatenation with posixpath.join.
maziggy 1 месяц назад
Родитель
Сommit
1b43488016
4 измененных файлов с 161 добавлено и 26 удалено
  1. 1 0
      CHANGELOG.md
  2. 9 3
      backend/app/main.py
  3. 45 23
      backend/app/services/bambu_ftp.py
  4. 106 0
      backend/tests/unit/services/test_bambu_ftp.py

Разница между файлами не показана из-за своего большого размера
+ 1 - 0
CHANGELOG.md


+ 9 - 3
backend/app/main.py

@@ -1,5 +1,6 @@
 import asyncio
 import asyncio
 import logging
 import logging
+import posixpath
 import time
 import time
 from contextlib import asynccontextmanager
 from contextlib import asynccontextmanager
 from datetime import datetime, timedelta, timezone
 from datetime import datetime, timedelta, timezone
@@ -1741,6 +1742,7 @@ async def on_print_start(printer_id: int, data: dict):
                             printer.access_code,
                             printer.access_code,
                             remote_path,
                             remote_path,
                             temp_path,
                             temp_path,
+                            timeout=ftp_timeout,
                             socket_timeout=ftp_timeout,
                             socket_timeout=ftp_timeout,
                             printer_model=printer.model,
                             printer_model=printer.model,
                             max_retries=ftp_retry_count,
                             max_retries=ftp_retry_count,
@@ -1753,6 +1755,7 @@ async def on_print_start(printer_id: int, data: dict):
                             printer.access_code,
                             printer.access_code,
                             remote_path,
                             remote_path,
                             temp_path,
                             temp_path,
+                            timeout=ftp_timeout,
                             socket_timeout=ftp_timeout,
                             socket_timeout=ftp_timeout,
                             printer_model=printer.model,
                             printer_model=printer.model,
                         )
                         )
@@ -1795,25 +1798,28 @@ async def on_print_start(printer_id: int, data: dict):
                             logger.info("Found matching file in %s: %s", search_dir, fname)
                             logger.info("Found matching file in %s: %s", search_dir, fname)
                             temp_path = app_settings.archive_dir / "temp" / fname
                             temp_path = app_settings.archive_dir / "temp" / fname
                             temp_path.parent.mkdir(parents=True, exist_ok=True)
                             temp_path.parent.mkdir(parents=True, exist_ok=True)
+                            remote_full_path = posixpath.join(search_dir, fname)
                             if ftp_retry_enabled:
                             if ftp_retry_enabled:
                                 downloaded = await with_ftp_retry(
                                 downloaded = await with_ftp_retry(
                                     download_file_async,
                                     download_file_async,
                                     printer.ip_address,
                                     printer.ip_address,
                                     printer.access_code,
                                     printer.access_code,
-                                    f"{search_dir}/{fname}",
+                                    remote_full_path,
                                     temp_path,
                                     temp_path,
+                                    timeout=ftp_timeout,
                                     socket_timeout=ftp_timeout,
                                     socket_timeout=ftp_timeout,
                                     printer_model=printer.model,
                                     printer_model=printer.model,
                                     max_retries=ftp_retry_count,
                                     max_retries=ftp_retry_count,
                                     retry_delay=ftp_retry_delay,
                                     retry_delay=ftp_retry_delay,
-                                    operation_name=f"Download 3MF from {search_dir}/{fname}",
+                                    operation_name=f"Download 3MF from {remote_full_path}",
                                 )
                                 )
                             else:
                             else:
                                 downloaded = await download_file_async(
                                 downloaded = await download_file_async(
                                     printer.ip_address,
                                     printer.ip_address,
                                     printer.access_code,
                                     printer.access_code,
-                                    f"{search_dir}/{fname}",
+                                    remote_full_path,
                                     temp_path,
                                     temp_path,
+                                    timeout=ftp_timeout,
                                     socket_timeout=ftp_timeout,
                                     socket_timeout=ftp_timeout,
                                     printer_model=printer.model,
                                     printer_model=printer.model,
                                 )
                                 )

+ 45 - 23
backend/app/services/bambu_ftp.py

@@ -623,7 +623,15 @@ async def download_file_async(
     loop = asyncio.get_event_loop()
     loop = asyncio.get_event_loop()
     is_a1 = printer_model in BambuFTPClient.A1_MODELS if printer_model else False
     is_a1 = printer_model in BambuFTPClient.A1_MODELS if printer_model else False
 
 
-    def _download(force_prot_c: bool = False) -> bool:
+    # Per-attempt completion state: asyncio.wait_for cannot cancel
+    # run_in_executor threads, so on timeout the executor may still complete
+    # the download after we stop waiting. The thread flips `success` to True
+    # ONLY after the file is fully written — a post-timeout check lets us
+    # salvage the download without mistaking an in-progress partial write
+    # for a completed one. Each attempt gets its own dict so a zombie from
+    # an earlier attempt can't flip the flag for a later one.
+
+    def _download(force_prot_c: bool, completion: dict) -> bool:
         mode_str = "prot_c" if force_prot_c else "prot_p"
         mode_str = "prot_c" if force_prot_c else "prot_p"
         client = BambuFTPClient(
         client = BambuFTPClient(
             ip_address, access_code, timeout=socket_timeout, printer_model=printer_model, force_prot_c=force_prot_c
             ip_address, access_code, timeout=socket_timeout, printer_model=printer_model, force_prot_c=force_prot_c
@@ -632,39 +640,53 @@ async def download_file_async(
             try:
             try:
                 result = client.download_to_file(remote_path, local_path)
                 result = client.download_to_file(remote_path, local_path)
                 if result:
                 if result:
-                    # Cache the working mode
                     BambuFTPClient.cache_mode(ip_address, mode_str)
                     BambuFTPClient.cache_mode(ip_address, mode_str)
+                    completion["success"] = True
                 return result
                 return result
             finally:
             finally:
                 client.disconnect()
                 client.disconnect()
         return False
         return False
 
 
-    try:
-        # Check if we have a cached mode for this printer
-        cached_mode = BambuFTPClient._mode_cache.get(ip_address)
-
-        if cached_mode:
-            # Use cached mode
-            force_prot_c = cached_mode == "prot_c"
-            return await asyncio.wait_for(loop.run_in_executor(None, lambda: _download(force_prot_c)), timeout=timeout)
+    async def _run(force_prot_c: bool) -> bool:
+        completion = {"success": False}
+        try:
+            return await asyncio.wait_for(
+                loop.run_in_executor(None, lambda: _download(force_prot_c, completion)), timeout=timeout
+            )
+        except TimeoutError:
+            # Give the zombie executor thread a brief moment to finish if it
+            # was already close to done. Only salvage when the thread has
+            # signalled genuine success — checking file size alone would
+            # mistake an in-progress partial write for a completed download.
+            await asyncio.sleep(0.5)
+            if completion["success"] and local_path.exists() and local_path.stat().st_size > 0:
+                logger.info(
+                    "FTP download wait_for timed out after %ss for %s, but thread completed (%s bytes) — salvaging",
+                    timeout,
+                    remote_path,
+                    local_path.stat().st_size,
+                )
+                return True
+            logger.warning("FTP download timed out after %ss for %s", timeout, remote_path)
+            return False
 
 
-        # No cached mode - try prot_p first
-        result = await asyncio.wait_for(loop.run_in_executor(None, lambda: _download(False)), timeout=timeout)
+    # Check if we have a cached mode for this printer
+    cached_mode = BambuFTPClient._mode_cache.get(ip_address)
 
 
-        if result:
-            return True
+    if cached_mode:
+        force_prot_c = cached_mode == "prot_c"
+        return await _run(force_prot_c)
 
 
-        # Download failed - for A1 models, try prot_c fallback
-        if is_a1:
-            logger.info("FTP download failed with prot_p for A1 model, trying prot_c fallback...")
-            result = await asyncio.wait_for(loop.run_in_executor(None, lambda: _download(True)), timeout=timeout)
-            return result
+    # No cached mode - try prot_p first
+    if await _run(False):
+        return True
 
 
-        return False
+    # Download failed - for A1 models, try prot_c fallback
+    if is_a1:
+        logger.info("FTP download failed with prot_p for A1 model, trying prot_c fallback...")
+        return await _run(True)
 
 
-    except TimeoutError:
-        logger.warning("FTP download timed out after %ss for %s", timeout, remote_path)
-        return False
+    return False
 
 
 
 
 async def download_file_try_paths_async(
 async def download_file_try_paths_async(

+ 106 - 0
backend/tests/unit/services/test_bambu_ftp.py

@@ -702,6 +702,112 @@ class TestAsyncWrappers:
         )
         )
         assert result is True
         assert result is True
 
 
+    @pytest.mark.asyncio
+    async def test_download_file_async_timeout_salvages_completed_zombie(self, tmp_path, monkeypatch):
+        """Executor thread that completes after wait_for timeout is salvaged.
+
+        asyncio.wait_for cannot cancel run_in_executor threads, so the FTP
+        download may still complete after we give up waiting. If the thread
+        genuinely finished (signalled via completion["success"] and the file
+        is on disk), download_file_async should return True rather than False.
+
+        Regression for #972: A1 user with 14 MB 3MF hit the hardcoded 60s
+        timeout, but the download thread finished ~45s later. The successful
+        file was written to disk but the async wrapper returned False, so the
+        archive was created as a fallback with no 3MF data.
+        """
+        from backend.app.services import bambu_ftp
+
+        # Clear mode cache so prot_p path is exercised.
+        bambu_ftp.BambuFTPClient._mode_cache.pop("127.0.0.1", None)
+
+        local = tmp_path / "zombie.bin"
+        expected_content = b"late arrival but complete"
+
+        class FakeClient:
+            """Connects instantly, download_to_file sleeps past wait_for's
+            timeout then writes the file and returns True."""
+
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def connect(self):
+                return True
+
+            def download_to_file(self, remote_path, local_path):
+                time.sleep(0.4)  # longer than wait_for timeout=0.1
+                local_path.write_bytes(expected_content)
+                return True
+
+            def disconnect(self):
+                pass
+
+        monkeypatch.setattr(bambu_ftp, "BambuFTPClient", FakeClient)
+        monkeypatch.setattr(FakeClient, "_mode_cache", {}, raising=False)
+        monkeypatch.setattr(FakeClient, "A1_MODELS", {"A1"}, raising=False)
+
+        def _noop_cache(ip, mode):
+            pass
+
+        monkeypatch.setattr(FakeClient, "cache_mode", staticmethod(_noop_cache), raising=False)
+
+        result = await download_file_async(
+            "127.0.0.1",
+            "12345678",
+            "/cache/zombie.bin",
+            local,
+            timeout=0.1,
+            printer_model="X1C",
+        )
+        assert result is True
+        assert local.read_bytes() == expected_content
+
+    @pytest.mark.asyncio
+    async def test_download_file_async_timeout_no_salvage_when_incomplete(self, tmp_path, monkeypatch):
+        """Timeout returns False when thread has not signalled success.
+
+        A partial file on disk (mid-retrbinary) must NOT be mistaken for a
+        completed download — only the thread's explicit success flag permits
+        salvage.
+        """
+        from backend.app.services import bambu_ftp
+
+        bambu_ftp.BambuFTPClient._mode_cache.pop("127.0.0.1", None)
+
+        local = tmp_path / "partial.bin"
+
+        class FakeClient:
+            def __init__(self, *args, **kwargs):
+                pass
+
+            def connect(self):
+                return True
+
+            def download_to_file(self, remote_path, local_path):
+                # Simulate an in-progress partial write that never completes
+                # within the salvage grace period.
+                local_path.write_bytes(b"partial...")
+                time.sleep(2.0)
+                return True  # would complete eventually, but too late
+
+            def disconnect(self):
+                pass
+
+        monkeypatch.setattr(bambu_ftp, "BambuFTPClient", FakeClient)
+        monkeypatch.setattr(FakeClient, "_mode_cache", {}, raising=False)
+        monkeypatch.setattr(FakeClient, "A1_MODELS", set(), raising=False)
+        monkeypatch.setattr(FakeClient, "cache_mode", staticmethod(lambda ip, mode: None), raising=False)
+
+        result = await download_file_async(
+            "127.0.0.1",
+            "12345678",
+            "/cache/partial.bin",
+            local,
+            timeout=0.1,
+            printer_model="X1C",
+        )
+        assert result is False
+
     @pytest.mark.asyncio
     @pytest.mark.asyncio
     async def test_download_file_try_paths_first_succeeds(self, patch_ftp_port, tmp_path):
     async def test_download_file_try_paths_first_succeeds(self, patch_ftp_port, tmp_path):
         """download_file_try_paths_async succeeds on first path."""
         """download_file_try_paths_async succeeds on first path."""

Некоторые файлы не были показаны из-за большого количества измененных файлов