Browse Source

fix(spoolbuddy): lower /update permission to INVENTORY_UPDATE so kiosk's own Settings -> Update button works

  The kiosk's Settings -> Update Daemon button returned "API keys cannot
  be used for administrative operations" because POST /spoolbuddy/devices/
  {id}/update was gated on Permission.SETTINGS_UPDATE, and SETTINGS_UPDATE
  is in the _APIKEY_DENIED_PERMISSIONS deny-list introduced by PR #1241.
  Every kiosk-side request tripped the deny-list before the API key's
  scope set (Read / Print Queue / Control / Legacy) was even consulted.

  Same root cause as the four QuickMenu System buttons fixed in 0.2.4b3
  (Restart Daemon / Restart Browser / Reboot / Shutdown). Missed /update
  in that audit on the reasoning "replaces the daemon binary, different
  threat surface" — but that's wrong: restart_daemon already replaces
  the running daemon process, so daemon-replacement is not a step up in
  blast radius. The SSH update is also strictly scoped to the one device
  the operator physically controls (git fetch + pip install + systemctl
  restart on that host) — same threat profile as the system commands
  already running on INVENTORY_UPDATE.

  Lower /spoolbuddy/devices/{id}/update from SETTINGS_UPDATE to
  INVENTORY_UPDATE so it aligns with the rest of the kiosk-scoped routes
  (calibration/tare, display, cancel-write, system/command,
  system/command-result, update-status). The main Bambuddy in-app updater
  at POST /api/v1/updates/apply keeps SETTINGS_UPDATE — that one runs on
  the Bambuddy host and is correctly fenced behind the deny-list.
maziggy 2 weeks ago
parent
commit
f5ecc61cda

File diff suppressed because it is too large
+ 3 - 0
CHANGELOG.md


+ 8 - 1
backend/app/api/routes/spoolbuddy.py

@@ -1319,7 +1319,14 @@ async def trigger_daemon_update(
     device_id: str,
     device_id: str,
     req: dict | None = None,
     req: dict | None = None,
     db: AsyncSession = Depends(get_db),
     db: AsyncSession = Depends(get_db),
-    _: User | None = RequirePermissionIfAuthEnabled(Permission.SETTINGS_UPDATE),
+    # Aligns with the rest of the kiosk-scoped device routes (calibration,
+    # display, cancel-write, system/command — all INVENTORY_UPDATE).
+    # SETTINGS_UPDATE is on the API-key deny-list, which blocks the Update
+    # button from the kiosk's own Settings page even when the operator has
+    # physical access. Update only acts on the device the operator already
+    # controls (git fetch + pip install + systemctl restart on that one
+    # host) — same blast radius as the restart_daemon command.
+    _: User | None = RequirePermissionIfAuthEnabled(Permission.INVENTORY_UPDATE),
 ):
 ):
     """Trigger a SpoolBuddy update over SSH.
     """Trigger a SpoolBuddy update over SSH.
 
 

+ 5 - 0
backend/app/core/database.py

@@ -1564,6 +1564,11 @@ async def run_migrations(conn):
 
 
     # Migration: Add SSH host key for TOFU verification (H1 security fix)
     # Migration: Add SSH host key for TOFU verification (H1 security fix)
     await _safe_execute(conn, "ALTER TABLE spoolbuddy_devices ADD COLUMN ssh_host_key VARCHAR(500)")
     await _safe_execute(conn, "ALTER TABLE spoolbuddy_devices ADD COLUMN ssh_host_key VARCHAR(500)")
+    # Migration: Widen ssh_host_key from VARCHAR(500) to TEXT — RSA-3072+ host keys
+    # in OpenSSH format exceed 500 chars (RSA-4096 ~720 chars). PostgreSQL enforces
+    # the limit and rejects the UPDATE; SQLite ignores VARCHAR length so no-op there.
+    if not is_sqlite():
+        await _safe_execute(conn, "ALTER TABLE spoolbuddy_devices ALTER COLUMN ssh_host_key TYPE TEXT")
 
 
     # Migration: Convert ams_labels table from (printer_id, ams_id) key to ams_serial_number key
     # Migration: Convert ams_labels table from (printer_id, ams_id) key to ams_serial_number key
     # Labels are now keyed by AMS serial number so they persist when the AMS is moved to another printer.
     # Labels are now keyed by AMS serial number so they persist when the AMS is moved to another printer.

+ 1 - 1
backend/app/models/spoolbuddy_device.py

@@ -37,6 +37,6 @@ class SpoolBuddyDevice(Base):
     scale_ok: Mapped[bool] = mapped_column(Boolean, default=False)
     scale_ok: Mapped[bool] = mapped_column(Boolean, default=False)
     uptime_s: Mapped[int] = mapped_column(Integer, default=0)
     uptime_s: Mapped[int] = mapped_column(Integer, default=0)
     system_stats: Mapped[str | None] = mapped_column(Text, nullable=True)
     system_stats: Mapped[str | None] = mapped_column(Text, nullable=True)
-    ssh_host_key: Mapped[str | None] = mapped_column(String(500), nullable=True)
+    ssh_host_key: Mapped[str | None] = mapped_column(Text, nullable=True)
     created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
     created_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now())
     updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), onupdate=func.now())
     updated_at: Mapped[datetime] = mapped_column(DateTime, server_default=func.now(), onupdate=func.now())

+ 6 - 4
backend/app/services/spoolbuddy_ssh.py

@@ -218,8 +218,10 @@ async def perform_ssh_update(device_id: str, ip_address: str, install_path: str
     known_hosts: asyncssh.SSHKnownHosts | None = None
     known_hosts: asyncssh.SSHKnownHosts | None = None
     if stored_host_key:
     if stored_host_key:
         try:
         try:
-            known_hosts = asyncssh.import_known_hosts(f"{ip_address} {stored_host_key}\n".encode())
-        except (ValueError, asyncssh.Error) as exc:
+            # asyncssh.import_known_hosts() expects str — passing bytes crashes
+            # inside its line-by-line parser with a TypeError.
+            known_hosts = asyncssh.import_known_hosts(f"{ip_address} {stored_host_key}\n")
+        except (ValueError, TypeError, asyncssh.Error) as exc:
             logger.warning(
             logger.warning(
                 "Could not parse stored SSH host key for %s, falling back to TOFU: %s",
                 "Could not parse stored SSH host key for %s, falling back to TOFU: %s",
                 device_id,
                 device_id,
@@ -269,8 +271,8 @@ async def perform_ssh_update(device_id: str, ip_address: str, install_path: str
                     await db.commit()
                     await db.commit()
             logger.info("TOFU: stored SSH host key for SpoolBuddy %s", device_id)
             logger.info("TOFU: stored SSH host key for SpoolBuddy %s", device_id)
             try:
             try:
-                known_hosts = asyncssh.import_known_hosts(f"{ip_address} {observed_key}\n".encode())
-            except (ValueError, asyncssh.Error) as exc:
+                known_hosts = asyncssh.import_known_hosts(f"{ip_address} {observed_key}\n")
+            except (ValueError, TypeError, asyncssh.Error) as exc:
                 logger.error(
                 logger.error(
                     "TOFU: could not parse just-stored host key for %s; "
                     "TOFU: could not parse just-stored host key for %s; "
                     "remaining SSH steps in this run will not verify host key: %s",
                     "remaining SSH steps in this run will not verify host key: %s",

+ 23 - 11
backend/tests/integration/test_settings_api_key_scrubbing.py

@@ -107,15 +107,17 @@ class TestSettingsScrubForApiKey:
 
 
 
 
 class TestRceEndpointPermissions:
 class TestRceEndpointPermissions:
-    """T-Gap 2 (revised): system_command was originally gated on SETTINGS_UPDATE
-    but that locked out kiosk operators (who hold INVENTORY_UPDATE-only keys)
-    from the QuickMenu's Restart-Daemon / Restart-Browser / Reboot / Shutdown
-    buttons — the only way to recover the kiosk from the kiosk itself. Risk
-    is bounded: only the 4 named commands are accepted (no RCE), reboot and
-    shutdown require physical-access recovery, and the same operator already
-    controls printers + weighs spools on the same device. The /update route
-    (full firmware upgrade) keeps SETTINGS_UPDATE because it can replace the
-    daemon binary, which is a different threat surface."""
+    """T-Gap 2 (revised): system_command and /update were originally gated on
+    SETTINGS_UPDATE but that locked out kiosk operators (who hold
+    INVENTORY_UPDATE-only keys) from the QuickMenu's Restart-Daemon /
+    Restart-Browser / Reboot / Shutdown buttons and the Settings → Update
+    button — the only ways to recover or update the kiosk from the kiosk
+    itself. Risk is bounded the same way for both: actions are scoped to a
+    single SpoolBuddy device that the operator already physically controls,
+    daemon-replacement via /update has the same blast radius as
+    restart_daemon (which also replaces the running process), and the
+    same operator already controls printers + weighs spools on the same
+    device. Both routes are now on INVENTORY_UPDATE."""
 
 
     @pytest.fixture
     @pytest.fixture
     async def auth_enabled(self, db_session):
     async def auth_enabled(self, db_session):
@@ -184,7 +186,7 @@ class TestRceEndpointPermissions:
 
 
     @pytest.mark.asyncio
     @pytest.mark.asyncio
     @pytest.mark.integration
     @pytest.mark.integration
-    async def test_trigger_update_requires_settings_update(
+    async def test_trigger_update_accepts_inventory_update(
         self,
         self,
         async_client: AsyncClient,
         async_client: AsyncClient,
         db_session,
         db_session,
@@ -192,9 +194,19 @@ class TestRceEndpointPermissions:
         inventory_only_api_key,
         inventory_only_api_key,
         spoolbuddy_device,
         spoolbuddy_device,
     ):
     ):
+        """T-Gap 2 (revised): /update was lowered from SETTINGS_UPDATE to
+        INVENTORY_UPDATE so the kiosk's own Settings → Update button works.
+        The deny-list (SETTINGS_UPDATE in _APIKEY_DENIED_PERMISSIONS) was
+        returning "API keys cannot be used for administrative operations"
+        for any kiosk-side request to update the daemon. An inventory-only
+        key must NOT 403 — it should reach the device-state check (and 409
+        for offline device, since the fixture doesn't set last_seen).
+        """
         resp = await async_client.post(
         resp = await async_client.post(
             f"/api/v1/spoolbuddy/devices/{spoolbuddy_device.device_id}/update",
             f"/api/v1/spoolbuddy/devices/{spoolbuddy_device.device_id}/update",
             json={},
             json={},
             headers={"X-API-Key": inventory_only_api_key},
             headers={"X-API-Key": inventory_only_api_key},
         )
         )
-        assert resp.status_code == 403
+        # Permission accepted — fails on device-state, not on auth.
+        assert resp.status_code == 409
+        assert "offline" in resp.json()["detail"].lower()

+ 45 - 0
backend/tests/unit/test_spoolbuddy_ssh.py

@@ -672,3 +672,48 @@ async def test_perform_ssh_update_corrupt_stored_key_falls_back_to_tofu(tmp_path
     # Broadcast must show success, not error
     # Broadcast must show success, not error
     error_broadcasts = [c for c in mock_ws.broadcast.call_args_list if c[0][0].get("update_status") == "error"]
     error_broadcasts = [c for c in mock_ws.broadcast.call_args_list if c[0][0].get("update_status") == "error"]
     assert not error_broadcasts, f"Got unexpected error broadcast: {error_broadcasts}"
     assert not error_broadcasts, f"Got unexpected error broadcast: {error_broadcasts}"
+
+
+@pytest.mark.asyncio
+async def test_perform_ssh_update_passes_str_not_bytes_to_import_known_hosts(tmp_path):
+    """asyncssh.import_known_hosts() is a str-only API — passing bytes crashes
+    inside its line parser (`line.startswith('#')` against a bytes line raises
+    TypeError). Pin both call sites — the stored-key parse and the just-stored
+    TOFU re-parse — to ensure we never re-introduce the .encode() bug."""
+    ssh_dir = tmp_path / "spoolbuddy" / "ssh"
+    ssh_dir.mkdir(parents=True)
+    (ssh_dir / "id_ed25519").write_text("PRIVATE")
+    (ssh_dir / "id_ed25519.pub").write_text("PUBLIC")
+
+    captured_args: list[object] = []
+
+    def capture_import(arg):
+        captured_args.append(arg)
+        return MagicMock(name="known_hosts")
+
+    async def mock_ssh(ip, cmd, key, *, known_hosts=None, timeout=60):
+        # Surface a freshly observed key on the first call so the TOFU branch
+        # also re-imports — exercises the second call site too.
+        observed = "ssh-rsa AAAAOBSERVED first-tofu" if not captured_args else None
+        return 0, "ok", "", observed
+
+    mock_device, mock_ctx, mock_ws = _make_update_mocks(tmp_path)
+    mock_device.ssh_host_key = "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5 storedkey"
+
+    with (
+        patch("backend.app.services.spoolbuddy_ssh.settings") as mock_settings,
+        patch("backend.app.services.spoolbuddy_ssh._run_ssh_command", side_effect=mock_ssh),
+        patch("backend.app.services.spoolbuddy_ssh.detect_current_branch", return_value="main"),
+        patch(
+            "backend.app.services.spoolbuddy_ssh.asyncssh.import_known_hosts",
+            side_effect=capture_import,
+        ),
+        patch("backend.app.core.database.async_session", return_value=mock_ctx),
+        patch("backend.app.api.routes.spoolbuddy.ws_manager", mock_ws),
+    ):
+        mock_settings.base_dir = tmp_path
+        await perform_ssh_update("sb-test", "10.0.0.1")
+
+    assert captured_args, "import_known_hosts was never called"
+    for arg in captured_args:
+        assert isinstance(arg, str), f"asyncssh.import_known_hosts must receive str, got {type(arg).__name__}: {arg!r}"

+ 8 - 0
spoolbuddy/install/install.sh

@@ -528,6 +528,14 @@ SUDOERS
 download_spoolbuddy() {
 download_spoolbuddy() {
     if [[ -d "$INSTALL_PATH/.git" ]]; then
     if [[ -d "$INSTALL_PATH/.git" ]]; then
         info "Existing installation found, updating..."
         info "Existing installation found, updating..."
+        # Pre-emptively normalise tree ownership before git touches anything.
+        # Stray root-owned files (e.g. static/assets/* left by an earlier
+        # `sudo` run, or a frontend build that wrote as root) make
+        # `git reset --hard` fail with EACCES on the unlink/replace step,
+        # aborting the update before the post-install chown below ever
+        # runs. The script already runs as root (see check_root), so this
+        # is always safe.
+        chown -R "$SPOOLBUDDY_SERVICE_USER:$SPOOLBUDDY_SERVICE_USER" "$INSTALL_PATH"
         git config --global --add safe.directory "$INSTALL_PATH" 2>/dev/null || true
         git config --global --add safe.directory "$INSTALL_PATH" 2>/dev/null || true
         cd "$INSTALL_PATH"
         cd "$INSTALL_PATH"
         git remote set-url origin "$INSTALL_REPO" 2>/dev/null || true
         git remote set-url origin "$INSTALL_REPO" 2>/dev/null || true

Some files were not shown because too many files changed in this diff