spoolbuddy_ssh.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. """SSH-based update service for SpoolBuddy devices.
  2. Instead of the daemon updating itself (fragile: permission issues, self-modifying
  3. code, hardcoded branch), Bambuddy SSHes into the SpoolBuddy Pi and drives the
  4. update remotely: git fetch/checkout, pip install, systemctl restart.
  5. Uses `asyncssh` (pure-Python async SSH client) rather than shelling out to the
  6. OpenSSH `ssh` binary. The subprocess approach fails in Docker: both `ssh` and
  7. `ssh-keygen` call `getpwuid(getuid())` during startup and abort with
  8. "No user exists for uid <N>" when the container runs under a UID that is not
  9. listed in /etc/passwd (e.g. PUID=1000 on python:3.13-slim, which only has
  10. entries for root). asyncssh does all of its work in-process.
  11. """
  12. import asyncio
  13. import logging
  14. import os
  15. import shlex
  16. from pathlib import Path
  17. import asyncssh
  18. from cryptography.hazmat.primitives import serialization
  19. from cryptography.hazmat.primitives.asymmetric import ed25519
  20. from backend.app.core.config import settings
  21. logger = logging.getLogger(__name__)
  22. SSH_USER = "spoolbuddy"
  23. DEFAULT_INSTALL_PATH = "/opt/bambuddy"
  24. # Project root — where the `.git` directory lives for native installs and for
  25. # Docker containers that bind-mount the repo. This is intentionally distinct
  26. # from `settings.base_dir`, which points at the persistent *data* directory
  27. # (e.g. `DATA_DIR=/app/data` in Docker) and therefore never contains `.git`.
  28. # `backend/app/services/spoolbuddy_ssh.py` → parents[3] = project root.
  29. _APP_DIR = Path(__file__).resolve().parents[3]
  30. # Note for Docker: asyncssh.connect() internally calls getpass.getuser() to
  31. # resolve the *local* username for ~/.ssh/config host matching. Under an
  32. # arbitrary PUID with no /etc/passwd entry this would raise OSError. The
  33. # Dockerfile sets LOGNAME/USER/HOME so getpass.getuser() succeeds via env-var
  34. # lookup before ever touching the passwd database.
  35. def _get_ssh_key_dir() -> Path:
  36. """Return (and create if needed) the directory for SpoolBuddy SSH keys."""
  37. key_dir = settings.base_dir / "spoolbuddy" / "ssh"
  38. if not key_dir.exists():
  39. key_dir.mkdir(mode=0o700, parents=True)
  40. return key_dir
  41. async def get_or_create_keypair() -> tuple[Path, Path]:
  42. """Return (private_key_path, public_key_path), generating if missing.
  43. Uses the in-process `cryptography` library instead of shelling out to
  44. `ssh-keygen`. The subprocess approach fails inside Docker containers when
  45. the image runs under an arbitrary UID (e.g. PUID=1001) that is not listed
  46. in /etc/passwd — `ssh-keygen` calls `getpwuid()` for the current user's
  47. home directory and aborts with "no user exists for uid <N>".
  48. """
  49. key_dir = _get_ssh_key_dir()
  50. private_key = key_dir / "id_ed25519"
  51. public_key = key_dir / "id_ed25519.pub"
  52. if private_key.exists() and public_key.exists():
  53. return private_key, public_key
  54. logger.info("Generating SSH keypair for SpoolBuddy updates")
  55. priv_obj = ed25519.Ed25519PrivateKey.generate()
  56. pub_obj = priv_obj.public_key()
  57. private_bytes = priv_obj.private_bytes(
  58. encoding=serialization.Encoding.PEM,
  59. format=serialization.PrivateFormat.OpenSSH,
  60. encryption_algorithm=serialization.NoEncryption(),
  61. )
  62. public_bytes = pub_obj.public_bytes(
  63. encoding=serialization.Encoding.OpenSSH,
  64. format=serialization.PublicFormat.OpenSSH,
  65. )
  66. # OpenSSH public format has no comment field by default; append one to match
  67. # the previous ssh-keygen output so the authorized_keys line is identifiable.
  68. public_line = public_bytes + b" bambuddy-spoolbuddy\n"
  69. private_key.write_bytes(private_bytes)
  70. private_key.chmod(0o600)
  71. public_key.write_bytes(public_line)
  72. logger.info("SSH keypair generated at %s", key_dir)
  73. return private_key, public_key
  74. async def get_public_key() -> str:
  75. """Return the SSH public key content for pairing."""
  76. _, public_key = await get_or_create_keypair()
  77. return public_key.read_text().strip()
  78. def detect_current_branch() -> str:
  79. """Detect the git branch Bambuddy is running on.
  80. Reads `.git/HEAD` directly from the application root (``_APP_DIR``) rather
  81. than shelling out to `git`. The application root is deliberately distinct
  82. from ``settings.base_dir``: in Docker, ``base_dir`` points at the data
  83. volume (``/app/data``) which never contains ``.git``, while the repo is
  84. bind-mounted (or COPYd) to ``/app``. This works for native installs,
  85. bare Docker containers (no ``.git`` — fall through to the env var), and
  86. Docker containers that bind-mount the repo (``.git`` is present, no
  87. ``git`` binary required, and no ``getpwuid()`` call that could fail under
  88. an arbitrary PUID).
  89. Fallback order: ``.git/HEAD`` → ``GIT_BRANCH`` env var → ``"main"``.
  90. """
  91. git_path = _APP_DIR / ".git"
  92. try:
  93. if git_path.exists():
  94. # Git worktrees use a file containing `gitdir: <path>` instead of
  95. # a directory — follow the pointer.
  96. if git_path.is_file():
  97. content = git_path.read_text(encoding="utf-8").strip()
  98. if content.startswith("gitdir:"):
  99. git_path = (_APP_DIR / content.removeprefix("gitdir:").strip()).resolve()
  100. head_file = git_path / "HEAD"
  101. if head_file.is_file():
  102. head = head_file.read_text(encoding="utf-8").strip()
  103. # Normal case: `ref: refs/heads/<branch>`.
  104. # Detached HEAD stores a raw commit hash — fall through to env var.
  105. if head.startswith("ref: refs/heads/"):
  106. return head.removeprefix("ref: refs/heads/").strip()
  107. except OSError as exc:
  108. logger.debug("Could not read .git/HEAD, falling back: %s", exc)
  109. return os.environ.get("GIT_BRANCH", "main")
  110. async def _run_ssh_command(
  111. ip: str,
  112. command: str,
  113. private_key: Path,
  114. *,
  115. known_hosts: "asyncssh.SSHKnownHosts | None" = None,
  116. timeout: int = 60,
  117. ) -> tuple[int, str, str, str | None]:
  118. """Execute a command on a SpoolBuddy device via SSH.
  119. Uses asyncssh rather than the OpenSSH `ssh` binary — see module docstring
  120. for the Docker/PUID rationale.
  121. Returns (returncode, stdout, stderr, observed_host_key).
  122. observed_host_key is non-None only on a successful connection when known_hosts=None
  123. was passed. Callers are responsible for also checking whether a stored key already
  124. exists before persisting — use `observed_key and not stored_host_key` not just
  125. `observed_key is not None`.
  126. On connection failure rc=255; on timeout rc=-1.
  127. """
  128. observed_host_key: str | None = None
  129. try:
  130. async with asyncio.timeout(timeout):
  131. async with asyncssh.connect(
  132. host=ip,
  133. username=SSH_USER,
  134. client_keys=[str(private_key)],
  135. known_hosts=known_hosts,
  136. config=[], # do not load ~/.ssh/config — HOME may not resolve under arbitrary Docker PUIDs
  137. connect_timeout=10,
  138. ) as conn:
  139. if known_hosts is None:
  140. # TOFU first-use: capture the host key for storage
  141. server_key = conn.get_server_host_key()
  142. if server_key:
  143. observed_host_key = server_key.export_public_key("openssh").decode().strip()
  144. result = await conn.run(command, check=False)
  145. except asyncssh.HostKeyNotVerifiable:
  146. logger.error("SSH host key mismatch for %s — possible MITM attack", ip)
  147. return 255, "", "Host key mismatch — verify device identity before retrying", None
  148. except TimeoutError:
  149. return -1, "", "SSH command timed out", None
  150. except (asyncssh.Error, OSError) as exc:
  151. return 255, "", str(exc), None
  152. stdout = result.stdout if isinstance(result.stdout, str) else (result.stdout or b"").decode(errors="replace")
  153. stderr = result.stderr if isinstance(result.stderr, str) else (result.stderr or b"").decode(errors="replace")
  154. # asyncssh's exit_status is None when the remote closed without setting one
  155. returncode = result.exit_status if result.exit_status is not None else 0
  156. return returncode, stdout, stderr, observed_host_key
  157. async def perform_ssh_update(device_id: str, ip_address: str, install_path: str | None = None) -> None:
  158. """SSH into a SpoolBuddy device and update it to match Bambuddy's branch.
  159. Updates device.update_status/update_message in the DB and broadcasts
  160. progress via WebSocket at each step. Host key verification uses TOFU:
  161. the device's SSH public key is stored on first connect and verified on
  162. all subsequent connections.
  163. """
  164. from sqlalchemy import select
  165. from backend.app.api.routes.spoolbuddy import ws_manager
  166. from backend.app.core.database import async_session
  167. from backend.app.models.spoolbuddy_device import SpoolBuddyDevice
  168. install_path = install_path or DEFAULT_INSTALL_PATH
  169. branch = detect_current_branch()
  170. safe_branch = shlex.quote(branch)
  171. safe_path = shlex.quote(install_path)
  172. # Load the stored SSH host key for TOFU verification
  173. stored_host_key: str | None = None
  174. async with async_session() as db:
  175. result = await db.execute(select(SpoolBuddyDevice).where(SpoolBuddyDevice.device_id == device_id))
  176. dev = result.scalar_one_or_none()
  177. if dev:
  178. stored_host_key = dev.ssh_host_key
  179. known_hosts: asyncssh.SSHKnownHosts | None = None
  180. if stored_host_key:
  181. try:
  182. # asyncssh.import_known_hosts() expects str — passing bytes crashes
  183. # inside its line-by-line parser with a TypeError.
  184. known_hosts = asyncssh.import_known_hosts(f"{ip_address} {stored_host_key}\n")
  185. except (ValueError, TypeError, asyncssh.Error) as exc:
  186. logger.warning(
  187. "Could not parse stored SSH host key for %s, falling back to TOFU: %s",
  188. device_id,
  189. exc,
  190. )
  191. async def _update_progress(status: str, message: str) -> None:
  192. """Update device status in DB and broadcast via WebSocket."""
  193. async with async_session() as db:
  194. result = await db.execute(select(SpoolBuddyDevice).where(SpoolBuddyDevice.device_id == device_id))
  195. device = result.scalar_one_or_none()
  196. if device:
  197. device.update_status = status
  198. device.update_message = message[:255] if message else None
  199. if status in ("complete", "error"):
  200. device.pending_command = None
  201. await db.commit()
  202. await ws_manager.broadcast(
  203. {
  204. "type": "spoolbuddy_update",
  205. "device_id": device_id,
  206. "update_status": status,
  207. "update_message": message[:255] if message else None,
  208. }
  209. )
  210. try:
  211. private_key, _ = await get_or_create_keypair()
  212. # Step 1: Test SSH connectivity
  213. await _update_progress("updating", "Connecting via SSH...")
  214. rc, _, stderr, observed_key = await _run_ssh_command(
  215. ip_address, "echo ok", private_key, known_hosts=known_hosts
  216. )
  217. if rc != 0:
  218. await _update_progress("error", f"SSH connection failed: {stderr[:200]}")
  219. return
  220. # TOFU: persist host key on first successful connect
  221. if observed_key and not stored_host_key:
  222. async with async_session() as db:
  223. result = await db.execute(select(SpoolBuddyDevice).where(SpoolBuddyDevice.device_id == device_id))
  224. d = result.scalar_one_or_none()
  225. if d:
  226. d.ssh_host_key = observed_key
  227. await db.commit()
  228. logger.info("TOFU: stored SSH host key for SpoolBuddy %s", device_id)
  229. try:
  230. known_hosts = asyncssh.import_known_hosts(f"{ip_address} {observed_key}\n")
  231. except (ValueError, TypeError, asyncssh.Error) as exc:
  232. logger.error(
  233. "TOFU: could not parse just-stored host key for %s; "
  234. "remaining SSH steps in this run will not verify host key: %s",
  235. device_id,
  236. exc,
  237. )
  238. known_hosts = None
  239. # Step 2: Git fetch
  240. await _update_progress("updating", f"Fetching latest code (branch: {branch})...")
  241. rc, _, stderr, _ = await _run_ssh_command(
  242. ip_address,
  243. f"cd {safe_path} && git -c safe.directory={safe_path} fetch origin {safe_branch}",
  244. private_key,
  245. known_hosts=known_hosts,
  246. timeout=120,
  247. )
  248. if rc != 0:
  249. await _update_progress("error", f"git fetch failed: {stderr[:200]}")
  250. return
  251. # Step 3: Git checkout + reset
  252. await _update_progress("updating", "Applying update...")
  253. rc, _, stderr, _ = await _run_ssh_command(
  254. ip_address,
  255. f"cd {safe_path} && git -c safe.directory={safe_path} checkout {safe_branch} "
  256. f"&& git -c safe.directory={safe_path} reset --hard origin/{safe_branch}",
  257. private_key,
  258. known_hosts=known_hosts,
  259. )
  260. if rc != 0:
  261. await _update_progress("error", f"git checkout/reset failed: {stderr[:200]}")
  262. return
  263. # Step 4: Install dependencies
  264. await _update_progress("updating", "Installing dependencies...")
  265. venv_pip = shlex.quote(f"{install_path}/spoolbuddy/venv/bin/pip")
  266. rc, _, stderr, _ = await _run_ssh_command(
  267. ip_address,
  268. f"{venv_pip} install --upgrade spidev gpiod smbus2 httpx 2>&1",
  269. private_key,
  270. known_hosts=known_hosts,
  271. timeout=120,
  272. )
  273. if rc != 0:
  274. logger.warning("SpoolBuddy %s: pip install returned non-zero (continuing): %s", device_id, stderr[:200])
  275. # Step 5: Restart daemon
  276. await _update_progress("updating", "Restarting daemon...")
  277. rc, _, stderr, _ = await _run_ssh_command(
  278. ip_address,
  279. "sudo /usr/bin/systemctl restart spoolbuddy.service",
  280. private_key,
  281. known_hosts=known_hosts,
  282. )
  283. if rc != 0:
  284. await _update_progress("error", f"Service restart failed: {stderr[:200]}")
  285. return
  286. # Step 6: Clear browser cache and restart kiosk
  287. # Remove Chromium's Service Worker + cache storage to prevent stale frontend
  288. await _run_ssh_command(
  289. ip_address,
  290. "sudo find /home -maxdepth 5 -path '*/chromium/Default/Service Worker' -type d -exec rm -rf {} + 2>/dev/null; true",
  291. private_key,
  292. known_hosts=known_hosts,
  293. )
  294. rc, _, stderr, _ = await _run_ssh_command(
  295. ip_address,
  296. "sudo /usr/bin/systemctl restart getty@tty1.service",
  297. private_key,
  298. known_hosts=known_hosts,
  299. )
  300. if rc != 0:
  301. logger.warning("SpoolBuddy %s: kiosk restart failed (non-fatal): %s", device_id, stderr[:200])
  302. logger.info("SpoolBuddy %s: SSH update complete (branch=%s)", device_id, branch)
  303. await _update_progress("complete", f"Updated to {branch}")
  304. except Exception:
  305. logger.exception("SpoolBuddy %s: SSH update failed", device_id)
  306. await _update_progress("error", "Update failed due to an internal error")