Browse Source

fix(#1112): cross-boundary file move actually relocates bytes

  @Carter3DP's report on 0.2.4b1: a file moved into an external (NAS)
  folder showed up in Bambuddy under that folder but was never written
  to the mount. Traced to move_files only updating file.folder_id in
  the DB while leaving the bytes in library_files_dir/. Direct upload
  to a writable external folder was already fixed in 0.2.4b1; the move
  path was not.

  Cross-boundary moves now physically relocate the bytes through a new
  _move_file_bytes helper. Same-boundary moves (managed -> managed)
  keep the existing DB-only fast path because a managed file's on-disk
  location doesn't depend on which managed folder owns it.

  Four flows:
    - managed   -> external: copy to <mount>/<filename>, set
                             is_external=True, store the absolute path,
                             unlink the managed source
    - external  -> managed:  copy to internal storage with a fresh UUID
                             name, set is_external=False, store the
                             relative path, unlink the external source,
                             recompute file_hash (scan-tracked rows
                             carry file_hash=None)
    - external  -> external: same shape as managed -> external
    - managed   -> managed:  DB-only

  Copy-then-unlink ordering means a partial copy followed by a failed
  unlink leaves both copies on disk rather than losing the source if
  the target write fails halfway through on a flaky NAS mount. Failed
  shutil.copy2 cleans up partial dest before raising.

  Defence-in-depth skips:
    - source on a read-only external mount (move = delete-on-source
      which a RO mount can't fulfil)
    - filename collision on the target mount
    - traversal-style filenames after Path.resolve()
    - missing source on disk
    - os.access(W_OK) on the target mount

  Each skip carries a structured {file_id, code, reason} entry in a new
  skipped_reasons field on the response so the UI can surface "5 of 10
  skipped: 3 collisions, 2 missing on disk" instead of a blank number.
  The {moved, skipped} numeric counters are preserved so existing
  frontend code keeps working.

  6 new integration tests in test_external_folders_api.py::
  TestCrossBoundaryMove covering: managed -> external relocates bytes
  (the actual fix), external -> managed relocates bytes including hash
  recompute, name collision skip with the pre-existing target file
  intact, source-readonly skip, managed -> managed stays DB-only, and
  skipped_reasons always present.
maziggy 1 month ago
parent
commit
d81e4853ec
3 changed files with 420 additions and 13 deletions
  1. 0 0
      CHANGELOG.md
  2. 174 13
      backend/app/api/routes/library.py
  3. 246 0
      backend/tests/integration/test_external_folders_api.py

File diff suppressed because it is too large
+ 0 - 0
CHANGELOG.md


+ 174 - 13
backend/app/api/routes/library.py

@@ -2,6 +2,7 @@
 
 import base64
 import binascii
+import contextlib
 import hashlib
 import logging
 import os
@@ -185,6 +186,106 @@ def _stored_file_path(abs_path: Path, is_external: bool) -> str:
     return str(abs_path) if is_external else to_relative_path(abs_path)
 
 
+class _MoveSkip(Exception):
+    """Signalled by ``_move_file_bytes`` to skip a file with a user-visible reason.
+
+    Carries an optional `code` for machine-friendly grouping (the
+    front-end can localise it) and a fallback English `reason` for logs.
+    """
+
+    def __init__(self, code: str, reason: str):
+        super().__init__(reason)
+        self.code = code
+        self.reason = reason
+
+
+def _resolve_source_disk_path(file: LibraryFile) -> Path | None:
+    """Return the absolute on-disk path for an existing LibraryFile, or None
+    if it can't be located (legacy DB row, deleted file, etc.)."""
+    if file.is_external:
+        return Path(file.file_path) if file.file_path else None
+    return to_absolute_path(file.file_path)
+
+
+def _move_file_bytes(file: LibraryFile, target_folder: LibraryFolder | None) -> str:
+    """Physically relocate `file`'s bytes to match `target_folder`.
+
+    Used by the move endpoint when source/target straddle the
+    managed↔external boundary (#1112 follow-up — the prior implementation
+    updated the DB row's ``folder_id`` but never moved the bytes, so a
+    file moved to an external SMB folder showed up in Bambuddy's UI but
+    not on the NAS).
+
+    Returns the new ``file_path`` value to persist (relative for managed
+    targets, absolute for external targets — matches the upload + scan
+    paths). Raises ``_MoveSkip`` for any condition that would make the
+    move unsafe (target unwritable, filename collision, source missing).
+
+    The copy-then-unlink ordering means a partial copy followed by a
+    failed unlink leaves both the source and the dest on disk — better
+    than the symmetric "rename or move" which would lose the source if
+    the target write didn't complete on a flaky mount. The DB row stays
+    pointed at the source until the caller commits the new ``file_path``.
+    """
+    src = _resolve_source_disk_path(file)
+    if not src or not src.exists():
+        raise _MoveSkip("source_missing", "source file missing on disk")
+
+    target_is_external = target_folder is not None and target_folder.is_external
+
+    if target_is_external:
+        if target_folder.external_readonly:
+            # Already blocked at top level, but defence-in-depth.
+            raise _MoveSkip("target_readonly", "target external folder is read-only")
+        if not target_folder.external_path:
+            raise _MoveSkip("target_misconfigured", "target external folder has no path")
+        ext_dir = Path(target_folder.external_path)
+        if not ext_dir.exists() or not ext_dir.is_dir():
+            raise _MoveSkip("target_inaccessible", f"target path not accessible: {ext_dir}")
+        if not os.access(ext_dir, os.W_OK):
+            raise _MoveSkip("target_unwritable", f"target path not writable: {ext_dir}")
+        dest = (ext_dir / file.filename).resolve()
+        try:
+            dest.relative_to(ext_dir.resolve())
+        except ValueError:
+            raise _MoveSkip("invalid_filename", f"unsafe filename: {file.filename!r}") from None
+        if dest.exists():
+            raise _MoveSkip("name_collision", f"a file named {file.filename!r} already exists in target")
+        try:
+            shutil.copy2(src, dest)
+        except OSError as e:
+            # Clean up partial dest so a retry can succeed.
+            with contextlib.suppress(OSError):
+                dest.unlink(missing_ok=True)
+            raise _MoveSkip("copy_failed", f"copy failed: {e}") from e
+    else:
+        # → managed (root or non-external folder): generate a fresh UUID
+        # filename in the internal store so we don't collide with another
+        # file that happens to share `filename`.
+        ext = src.suffix.lower()
+        dest = get_library_files_dir() / f"{uuid.uuid4().hex}{ext}"
+        try:
+            shutil.copy2(src, dest)
+        except OSError as e:
+            with contextlib.suppress(OSError):
+                dest.unlink(missing_ok=True)
+            raise _MoveSkip("copy_failed", f"copy failed: {e}") from e
+
+    # Copy succeeded — unlink the original. A failure here leaves an
+    # orphan on disk but the DB row is consistent against the new dest.
+    try:
+        src.unlink(missing_ok=True)
+    except OSError as e:
+        logger.warning(
+            "Move: copied %s → %s but couldn't remove source: %s",
+            src,
+            dest,
+            e,
+        )
+
+    return _stored_file_path(dest, is_external=target_is_external)
+
+
 def _clean_3mf_metadata(obj):
     """Strip bytes and thumbnail-carrier keys so the payload is JSON-storable.
 
@@ -2838,11 +2939,20 @@ async def move_files(
 ):
     """Move multiple files to a folder.
 
-    Files not owned by the user are skipped (unless user has *_all permission).
+    Cross-boundary moves (managed ↔ external, or external ↔ external)
+    physically relocate the bytes — see ``_move_file_bytes``. Same-boundary
+    moves stay DB-only because the file's on-disk location doesn't depend
+    on which managed folder owns it.
+
+    Files not owned by the user are skipped (unless user has ``*_all``
+    permission). Each skip carries a structured reason so the UI can
+    surface "5 of 10 files were skipped: 3 had filename collisions on
+    the NAS, 2 are no longer on disk" rather than a blank "skipped: 5".
     """
     user, can_modify_all = auth_result
 
     # Verify folder exists if specified
+    target_folder: LibraryFolder | None = None
     if data.folder_id is not None:
         folder_result = await db.execute(select(LibraryFolder).where(LibraryFolder.id == data.folder_id))
         target_folder = folder_result.scalar_one_or_none()
@@ -2851,25 +2961,76 @@ async def move_files(
         if target_folder.is_external and target_folder.external_readonly:
             raise HTTPException(status_code=403, detail="Cannot move files to a read-only external folder")
 
-    # Update files
+    target_is_external = target_folder is not None and target_folder.is_external
+
     moved = 0
     skipped = 0
+    skipped_reasons: list[dict] = []
+
     for file_id in data.file_ids:
-        result = await db.execute(LibraryFile.active().where(LibraryFile.id == file_id))
+        result = await db.execute(
+            LibraryFile.active().options(selectinload(LibraryFile.folder)).where(LibraryFile.id == file_id)
+        )
         file = result.scalar_one_or_none()
-        if file:
-            # Ownership check
-            if not can_modify_all and file.created_by_id != user.id:
-                skipped += 1
-                continue
-            # Cannot move external files out of their folder
-            if file.is_external:
-                skipped += 1
-                continue
+        if not file:
+            continue
+        # Ownership check
+        if not can_modify_all and file.created_by_id != user.id:
+            skipped += 1
+            skipped_reasons.append({"file_id": file_id, "code": "not_owner", "reason": "not the file owner"})
+            continue
+
+        # No bytes need to move when both ends are managed (same-boundary).
+        if not file.is_external and not target_is_external:
             file.folder_id = data.folder_id
             moved += 1
+            continue
+
+        # Block moves out of a read-only external mount. The user only has
+        # read access to the source, and a move is semantically a delete on
+        # the source — which a read-only mount can't fulfil. Without this
+        # guard we'd succeed at copying to the target, fail to unlink the
+        # source, and the same file would now exist in two places (with
+        # the DB pointing at only one).
+        if file.is_external and file.folder is not None and file.folder.external_readonly:
+            skipped += 1
+            skipped_reasons.append(
+                {"file_id": file_id, "code": "source_readonly", "reason": "source is on a read-only external folder"}
+            )
+            continue
+
+        # Otherwise relocate the bytes, then update the DB row to match.
+        try:
+            new_file_path = _move_file_bytes(file, target_folder)
+        except _MoveSkip as e:
+            skipped += 1
+            skipped_reasons.append({"file_id": file_id, "code": e.code, "reason": e.reason})
+            continue
 
-    return {"status": "success", "moved": moved, "skipped": skipped}
+        file.is_external = target_is_external
+        file.folder_id = data.folder_id
+        file.file_path = new_file_path
+        # External rows historically carry `file_hash=None` (scan skips
+        # hashing). When pulling an external file into managed storage,
+        # compute the hash so dedup detection works for future uploads
+        # of the same content.
+        if not target_is_external and file.file_hash is None:
+            try:
+                abs_path = to_absolute_path(new_file_path)
+                if abs_path:
+                    file.file_hash = calculate_file_hash(abs_path)
+            except OSError:
+                pass  # leave hash null; dedup just won't match this row
+        moved += 1
+
+    await db.commit()
+
+    return {
+        "status": "success",
+        "moved": moved,
+        "skipped": skipped,
+        "skipped_reasons": skipped_reasons,
+    }
 
 
 @router.post("/bulk-delete", response_model=BulkDeleteResponse)

+ 246 - 0
backend/tests/integration/test_external_folders_api.py

@@ -710,3 +710,249 @@ class TestExternalFolderWritableUpload:
         assert row.is_external is False
         # Internal storage: file_path is UUID-scoped, stored as a relative path.
         assert not row.file_path.startswith("/")
+
+
+class TestCrossBoundaryMove:
+    """#1112 follow-up: moving files between managed and external folders
+    must physically relocate the bytes, not just shuffle the DB ``folder_id``.
+
+    Pre-fix symptom (reported by @Carter3DP after testing 0.2.4b1): a file
+    moved from a managed folder to a NAS-backed external folder showed up
+    in Bambuddy's UI under the external folder but was never written to
+    the NAS — so the SMB mount and Bambuddy disagreed about what was
+    actually there.
+    """
+
+    @pytest.fixture
+    def external_dir(self, tmp_path):
+        ext_dir = tmp_path / "writable_share"
+        ext_dir.mkdir()
+        return ext_dir
+
+    @pytest.fixture
+    async def writable_folder(self, async_client, db_session, external_dir):
+        data = {"name": "Writable NAS", "external_path": str(external_dir), "readonly": False}
+        response = await async_client.post("/api/v1/library/folders/external", json=data)
+        assert response.status_code == 200
+        return response.json()
+
+    @pytest.fixture
+    async def readonly_folder(self, async_client, db_session, tmp_path):
+        ro_dir = tmp_path / "ro_share"
+        ro_dir.mkdir()
+        (ro_dir / "stranded.gcode").write_text("G28")
+        data = {"name": "Read-only NAS", "external_path": str(ro_dir), "readonly": True}
+        response = await async_client.post("/api/v1/library/folders/external", json=data)
+        assert response.status_code == 200
+        # Populate via scan so the file gets a DB row with is_external=True.
+        scan = await async_client.post(f"/api/v1/library/folders/{response.json()['id']}/scan")
+        assert scan.status_code == 200
+        return response.json()
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_managed_to_external_relocates_bytes(
+        self, async_client: AsyncClient, db_session, writable_folder, external_dir
+    ):
+        """The actual #1112 fix: managed → external must write the bytes
+        to the NAS mount AND drop them from internal storage. Pre-fix the
+        DB row flipped to the new folder but the bytes stayed put."""
+        import io
+
+        from backend.app.api.routes.library import to_absolute_path
+        from backend.app.models.library import LibraryFile
+
+        upload = await async_client.post(
+            "/api/v1/library/files",
+            files={"file": ("ship_me.stl", io.BytesIO(b"original-bytes"), "application/octet-stream")},
+        )
+        assert upload.status_code == 200
+        file_id = upload.json()["id"]
+
+        # Snapshot the pre-move on-disk path so we can verify it's gone after.
+        pre = await db_session.get(LibraryFile, file_id)
+        await db_session.refresh(pre)
+        managed_disk_path = to_absolute_path(pre.file_path)
+        assert managed_disk_path is not None and managed_disk_path.exists()
+
+        response = await async_client.post(
+            "/api/v1/library/files/move",
+            json={"file_ids": [file_id], "folder_id": writable_folder["id"]},
+        )
+        assert response.status_code == 200, response.text
+        body = response.json()
+        assert body["moved"] == 1
+        assert body["skipped"] == 0
+
+        # Bytes are on the NAS mount.
+        on_nas = external_dir / "ship_me.stl"
+        assert on_nas.exists()
+        assert on_nas.read_bytes() == b"original-bytes"
+
+        # Internal copy is gone.
+        assert not managed_disk_path.exists(), "managed source must be removed after the move"
+
+        # DB row matches reality.
+        await db_session.refresh(pre)
+        assert pre.is_external is True
+        assert pre.folder_id == writable_folder["id"]
+        assert pre.file_path == str(on_nas.resolve())
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_external_to_managed_relocates_bytes(
+        self, async_client: AsyncClient, db_session, writable_folder, external_dir
+    ):
+        """Symmetric direction: external → managed copies the bytes into
+        internal storage with a UUID name, deletes the source on the
+        mount, and recomputes the file hash (since scan stores
+        ``file_hash=None`` for external rows)."""
+        import io
+
+        from backend.app.models.library import LibraryFile
+
+        # Plant a file on the writable mount and let upload give it a row.
+        upload = await async_client.post(
+            f"/api/v1/library/files?folder_id={writable_folder['id']}",
+            files={"file": ("relocate_me.stl", io.BytesIO(b"nas-bytes"), "application/octet-stream")},
+        )
+        assert upload.status_code == 200
+        file_id = upload.json()["id"]
+        ext_disk = external_dir / "relocate_me.stl"
+        assert ext_disk.exists()
+
+        response = await async_client.post(
+            "/api/v1/library/files/move",
+            json={"file_ids": [file_id], "folder_id": None},
+        )
+        assert response.status_code == 200
+        assert response.json()["moved"] == 1
+
+        db_session.expire_all()
+        row = await db_session.get(LibraryFile, file_id)
+        assert row.is_external is False
+        assert row.folder_id is None
+        assert not row.file_path.startswith("/"), "managed file_path must be relative"
+        assert not ext_disk.exists(), "external source must be removed after the move"
+        # Hash filled in for the now-managed row so future dedup works.
+        assert row.file_hash is not None and len(row.file_hash) == 64
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_managed_to_external_collision_skips_with_reason(
+        self, async_client: AsyncClient, db_session, writable_folder, external_dir
+    ):
+        """A name collision on the target external mount must skip the
+        move with a structured reason — not silently overwrite a file
+        that's already on the NAS."""
+        import io
+
+        # Pre-existing file on the mount with the same name as the upload.
+        (external_dir / "duplicate.stl").write_bytes(b"pre-existing")
+
+        upload = await async_client.post(
+            "/api/v1/library/files",
+            files={"file": ("duplicate.stl", io.BytesIO(b"new-bytes"), "application/octet-stream")},
+        )
+        assert upload.status_code == 200
+        file_id = upload.json()["id"]
+
+        response = await async_client.post(
+            "/api/v1/library/files/move",
+            json={"file_ids": [file_id], "folder_id": writable_folder["id"]},
+        )
+        assert response.status_code == 200
+        body = response.json()
+        assert body["moved"] == 0
+        assert body["skipped"] == 1
+        reasons = body["skipped_reasons"]
+        assert len(reasons) == 1
+        assert reasons[0]["file_id"] == file_id
+        assert reasons[0]["code"] == "name_collision"
+        # Pre-existing target file is intact.
+        assert (external_dir / "duplicate.stl").read_bytes() == b"pre-existing"
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_external_readonly_source_skips(self, async_client: AsyncClient, db_session, readonly_folder):
+        """A read-only mount allows reading but not deletes, and a move
+        is semantically a delete on the source. Skip with
+        ``source_readonly`` so the file isn't duplicated by half-moving."""
+        listing = await async_client.get(f"/api/v1/library/files?folder_id={readonly_folder['id']}")
+        assert listing.status_code == 200
+        ext_file_id = listing.json()[0]["id"]
+
+        response = await async_client.post(
+            "/api/v1/library/files/move",
+            json={"file_ids": [ext_file_id], "folder_id": None},
+        )
+        assert response.status_code == 200
+        body = response.json()
+        assert body["moved"] == 0
+        assert body["skipped"] == 1
+        assert body["skipped_reasons"][0]["code"] == "source_readonly"
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_managed_to_managed_remains_db_only(self, async_client: AsyncClient, db_session):
+        """Same-boundary moves (managed → managed) keep the existing
+        DB-only fast path — no shutil.copy, no UUID rename. The original
+        file_path stays the same, only ``folder_id`` changes."""
+        import io
+
+        from backend.app.models.library import LibraryFile
+
+        sub = await async_client.post(
+            "/api/v1/library/folders",
+            json={"name": "subfolder", "parent_id": None},
+        )
+        assert sub.status_code == 200
+        target_id = sub.json()["id"]
+
+        upload = await async_client.post(
+            "/api/v1/library/files",
+            files={"file": ("part.stl", io.BytesIO(b"x"), "application/octet-stream")},
+        )
+        assert upload.status_code == 200
+        file_id = upload.json()["id"]
+        pre = await db_session.get(LibraryFile, file_id)
+        await db_session.refresh(pre)
+        original_path = pre.file_path
+
+        response = await async_client.post(
+            "/api/v1/library/files/move",
+            json={"file_ids": [file_id], "folder_id": target_id},
+        )
+        assert response.status_code == 200
+        assert response.json()["moved"] == 1
+
+        db_session.expire_all()
+        post = await db_session.get(LibraryFile, file_id)
+        assert post.folder_id == target_id
+        assert post.is_external is False
+        assert post.file_path == original_path  # bytes never moved
+
+    @pytest.mark.asyncio
+    @pytest.mark.integration
+    async def test_skipped_reasons_field_present_even_when_empty(self, async_client: AsyncClient, db_session):
+        """Backwards-compatible response shape: ``skipped_reasons`` is
+        always present (empty list when nothing skipped) so frontend
+        code can treat it as the source of truth without optional-chain
+        gymnastics."""
+        import io
+
+        upload = await async_client.post(
+            "/api/v1/library/files",
+            files={"file": ("trivial.stl", io.BytesIO(b"x"), "application/octet-stream")},
+        )
+        assert upload.status_code == 200
+        file_id = upload.json()["id"]
+
+        response = await async_client.post(
+            "/api/v1/library/files/move",
+            json={"file_ids": [file_id], "folder_id": None},
+        )
+        assert response.status_code == 200
+        body = response.json()
+        assert "skipped_reasons" in body
+        assert body["skipped_reasons"] == []

Some files were not shown because too many files changed in this diff