Explorar o código

fix(archives): handle fallback archives in source-3MF upload (#1531)

  Archives created from prints Bambuddy didn't archive (cloud / Handy /
  SD-card prints) carry file_path="". The two source-upload routes
  computed the destination as (base_dir / archive.file_path).parent /
  "source", which collapsed to base_dir.parent / "source" for fallback
  rows — sending the file to /app/source/ (outside the data volume,
  orphaned on container restart) and raising 500 on the final
  relative_to.

  Centralise the destination math in _resolve_source_3mf_path. Normal
  archives keep the <archive>/source/<filename> layout. Fallback
  archives land at <base_dir>/archive/no_source/<id>/<filename>, which
  stays inside the data volume and is addressable by every existing
  read site. The helper also asserts the resolved directory is under
  base_dir.resolve() so a corrupted row fails with a clear message
  instead of writing outside the volume.

  Both upload routes (upload_source_3mf and upload_source_3mf_by_name)
  now route through the helper. Two regression tests in
  TestUploadSourceThreeMF pin both branches.
maziggy hai 2 días
pai
achega
e9beb1e8fc
Modificáronse 3 ficheiros con 193 adicións e 18 borrados
  1. 0 0
      CHANGELOG.md
  2. 51 18
      backend/app/api/routes/archives.py
  3. 142 0
      backend/tests/integration/test_archives_api.py

A diferenza do arquivo foi suprimida porque é demasiado grande
+ 0 - 0
CHANGELOG.md


+ 51 - 18
backend/app/api/routes/archives.py

@@ -3905,6 +3905,51 @@ async def get_project_image(
 # =============================================================================
 
 
+def _resolve_source_3mf_path(archive: PrintArchive, source_filename: str) -> Path:
+    """Resolve where to write a source 3MF for ``archive``.
+
+    Normal archives nest the source under ``<archive_file_dir>/source/``.
+    "Fallback" archives (created in main.py when MQTT reports a print start
+    but Bambuddy never saw the source 3MF — cloud / Handy / pre-existing
+    SD-card prints) carry ``file_path=""``. Joining that with ``base_dir``
+    via the ``/`` operator silently yields ``base_dir`` itself, whose parent
+    is ``base_dir.parent`` — which sent the upload to ``/app/source/`` and
+    raised a 500 on the final ``relative_to`` (#1531). Fallback archives
+    now land under ``<base_dir>/archive/no_source/<archive_id>/`` instead,
+    which stays inside the data volume and remains addressable by every
+    read site that does ``base_dir / archive.source_3mf_path``.
+
+    The resolved directory is asserted to be inside ``base_dir`` even when
+    ``archive.file_path`` is populated, so a row corrupted by an old import
+    or manual SQL edit fails with a clear 500 instead of writing outside
+    the data volume.
+    """
+    if archive.file_path:
+        archive_file = settings.base_dir / archive.file_path
+        source_dir = archive_file.parent / "source"
+    else:
+        source_dir = settings.base_dir / "archive" / "no_source" / str(archive.id)
+
+    # Containment check via resolve() — catches absolute file_path, `..`
+    # traversal, and any other shape that escapes the data volume — but we
+    # return the *literal* source_dir below. Resolving the returned path
+    # would canonicalise away a symlinked DATA_DIR (legitimate on TrueNAS /
+    # QNAP / Synology storage pools, and any `-v /symlink:/app/data`
+    # mount), which would then make the caller's
+    # ``source_path.relative_to(settings.base_dir)`` raise because the
+    # left side is canonical and the right is the symlink path.
+    try:
+        source_dir.resolve().relative_to(settings.base_dir.resolve())
+    except ValueError as exc:
+        raise HTTPException(
+            500,
+            f"Archive {archive.id} resolves to a path outside the data directory; cannot attach source.",
+        ) from exc
+
+    source_dir.mkdir(parents=True, exist_ok=True)
+    return source_dir / source_filename
+
+
 @router.post("/{archive_id}/source")
 async def upload_source_3mf(
     archive_id: int,
@@ -3921,11 +3966,9 @@ async def upload_source_3mf(
     if not file.filename or not file.filename.endswith(".3mf"):
         raise HTTPException(400, "File must be a .3mf file")
 
-    # Get archive directory and create source subdirectory
-    file_path = settings.base_dir / archive.file_path
-    archive_dir = file_path.parent
-    source_dir = archive_dir / "source"
-    source_dir.mkdir(exist_ok=True)
+    # Save the source 3MF file - preserve original filename, strip directory components
+    source_filename = _safe_filename(file.filename)
+    source_path = _resolve_source_3mf_path(archive, source_filename)
 
     # Delete old source file if exists
     if archive.source_3mf_path:
@@ -3933,10 +3976,6 @@ async def upload_source_3mf(
         if old_source_path.exists():
             old_source_path.unlink()
 
-    # Save the source 3MF file - preserve original filename, strip directory components
-    source_filename = _safe_filename(file.filename)
-    source_path = source_dir / source_filename
-
     content = await file.read()
     # #1401: validate zip header on source 3MF uploads too — source files
     # are uploaded for reprint and slicing, so an invalid one breaks the
@@ -4128,11 +4167,9 @@ async def upload_source_3mf_by_name(
     if not archive:
         raise HTTPException(404, f"No archive found matching '{print_name}'")
 
-    # Get archive directory and create source subdirectory
-    file_path = settings.base_dir / archive.file_path
-    archive_dir = file_path.parent
-    source_dir = archive_dir / "source"
-    source_dir.mkdir(exist_ok=True)
+    # Save the source 3MF file - preserve original filename, strip directory components
+    source_filename = safe_filename
+    source_path = _resolve_source_3mf_path(archive, source_filename)
 
     # Delete old source file if exists
     if archive.source_3mf_path:
@@ -4140,10 +4177,6 @@ async def upload_source_3mf_by_name(
         if old_source_path.exists():
             old_source_path.unlink()
 
-    # Save the source 3MF file - preserve original filename, strip directory components
-    source_filename = safe_filename
-    source_path = source_dir / source_filename
-
     content = await file.read()
     # #1401: same zip-header check as the other upload routes — the
     # match-by-name endpoint is used by slicer post-processing scripts,

+ 142 - 0
backend/tests/integration/test_archives_api.py

@@ -3,6 +3,8 @@
 Tests the full request/response cycle for /api/v1/archives/ endpoints.
 """
 
+from pathlib import Path
+
 import pytest
 from httpx import AsyncClient
 
@@ -1157,3 +1159,143 @@ class TestArchiveF3DEndpoints:
         response = await async_client.delete("/api/v1/archives/tags/nonexistent-tag")
         assert response.status_code == 200
         assert response.json()["affected"] == 0
+
+
+class TestUploadSourceThreeMF:
+    """Regression for #1531: source-3MF upload on fallback archives."""
+
+    @staticmethod
+    def _minimal_3mf_bytes() -> bytes:
+        """Smallest valid .3mf — the upload path enforces a zip header check."""
+        import io
+        import zipfile
+
+        buf = io.BytesIO()
+        with zipfile.ZipFile(buf, "w") as zf:
+            zf.writestr("[Content_Types].xml", "<types/>")
+        return buf.getvalue()
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_fallback_archive_source_upload_lands_under_base_dir(
+        self, async_client: AsyncClient, archive_factory, printer_factory, monkeypatch, tmp_path
+    ):
+        """Fallback archive (file_path='') must accept a source upload and store it inside base_dir.
+
+        Pre-fix, ``Path(base_dir) / ''`` collapsed to ``base_dir`` and the
+        ``.parent`` walked out of the data volume, sending the file to
+        ``/app/source/...`` and crashing on ``relative_to``.
+        """
+        from backend.app.core.config import settings as app_settings
+
+        monkeypatch.setattr(app_settings, "base_dir", tmp_path)
+
+        printer = await printer_factory()
+        archive = await archive_factory(
+            printer.id,
+            print_name="Cloud Print",
+            file_path="",  # fallback archive — no source 3MF was archived
+            filename="Cloud Print.3mf",
+        )
+
+        files = {"file": ("cloud_print.3mf", self._minimal_3mf_bytes(), "application/octet-stream")}
+        response = await async_client.post(f"/api/v1/archives/{archive.id}/source", files=files)
+
+        assert response.status_code == 200, response.text
+        payload = response.json()
+        rel = payload["source_3mf_path"]
+        # Stored as a relative path inside base_dir.
+        assert not rel.startswith("/"), f"source_3mf_path should be relative, got {rel!r}"
+        # File physically landed under base_dir (NOT escaped to /app/source/).
+        assert (tmp_path / rel).is_file()
+        # Deterministic fallback location keyed off archive id.
+        assert rel == f"archive/no_source/{archive.id}/cloud_print.3mf"
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_normal_archive_source_upload_unchanged(
+        self, async_client: AsyncClient, archive_factory, printer_factory, monkeypatch, tmp_path
+    ):
+        """Normal archive (file_path set) still nests the source under <archive>/source/."""
+        from backend.app.core.config import settings as app_settings
+
+        monkeypatch.setattr(app_settings, "base_dir", tmp_path)
+
+        printer = await printer_factory()
+        # archive_factory's default file_path is "archives/test/test_print.gcode.3mf".
+        archive = await archive_factory(printer.id, print_name="Real Print")
+
+        files = {"file": ("real_print.3mf", self._minimal_3mf_bytes(), "application/octet-stream")}
+        response = await async_client.post(f"/api/v1/archives/{archive.id}/source", files=files)
+
+        assert response.status_code == 200, response.text
+        rel = response.json()["source_3mf_path"]
+        assert rel == "archives/test/source/real_print.3mf"
+        assert (tmp_path / rel).is_file()
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_symlinked_data_dir_upload_succeeds(
+        self, async_client: AsyncClient, archive_factory, printer_factory, monkeypatch, tmp_path
+    ):
+        """Regression: DATA_DIR that's a symlink to the real storage must not break the upload.
+
+        Common on TrueNAS / Synology / QNAP storage pools, and any
+        ``-v /symlinked/host/path:/app/data`` mount. The helper resolves
+        only for the containment check and returns literal paths so the
+        caller's ``relative_to(settings.base_dir)`` doesn't trip over a
+        canonical-vs-symlink mismatch.
+        """
+        from backend.app.core.config import settings as app_settings
+
+        real_dir = tmp_path / "real_storage"
+        real_dir.mkdir()
+        symlink_dir = tmp_path / "data_via_symlink"
+        symlink_dir.symlink_to(real_dir)
+        monkeypatch.setattr(app_settings, "base_dir", symlink_dir)
+
+        printer = await printer_factory()
+        archive = await archive_factory(
+            printer.id,
+            print_name="Symlinked Print",
+            file_path="archives/X1C/print.gcode.3mf",
+            filename="print.gcode.3mf",
+        )
+
+        files = {"file": ("print.3mf", self._minimal_3mf_bytes(), "application/octet-stream")}
+        response = await async_client.post(f"/api/v1/archives/{archive.id}/source", files=files)
+
+        assert response.status_code == 200, response.text
+        rel = response.json()["source_3mf_path"]
+        assert rel == "archives/X1C/source/print.3mf"
+        # Reachable via both the symlink and the canonical path.
+        assert (symlink_dir / rel).is_file()
+        assert (real_dir / rel).is_file()
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_absolute_file_path_rejected_with_clear_500(
+        self, async_client: AsyncClient, archive_factory, printer_factory, monkeypatch, tmp_path
+    ):
+        """A row whose file_path is absolute (corrupted by old import / manual edit)
+        must fail with the explicit "outside the data directory" message, not silently
+        write outside base_dir."""
+        from backend.app.core.config import settings as app_settings
+
+        monkeypatch.setattr(app_settings, "base_dir", tmp_path)
+
+        printer = await printer_factory()
+        archive = await archive_factory(
+            printer.id,
+            print_name="Corrupt Path",
+            file_path="/tmp/totally_outside.gcode.3mf",
+            filename="totally_outside.gcode.3mf",
+        )
+
+        files = {"file": ("totally_outside.3mf", self._minimal_3mf_bytes(), "application/octet-stream")}
+        response = await async_client.post(f"/api/v1/archives/{archive.id}/source", files=files)
+
+        assert response.status_code == 500
+        assert "outside the data directory" in response.json()["detail"]
+        # Did not write anything under the bogus /tmp/source/ either.
+        assert not (Path("/tmp") / "source").exists() or not (Path("/tmp") / "source" / "totally_outside.3mf").exists()

Algúns arquivos non se mostraron porque demasiados arquivos cambiaron neste cambio