tailscale.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. """Tailscale integration for virtual printer certificate provisioning.
  2. When Tailscale is present, provisions a Let's Encrypt certificate via
  3. `tailscale cert` for the machine's Tailscale FQDN. This cert is trusted
  4. by slicers without any manual CA installation, unlike the self-signed CA.
  5. Falls back gracefully when Tailscale is unavailable.
  6. """
  7. import asyncio
  8. import json
  9. import logging
  10. import os
  11. import re
  12. import shutil
  13. from dataclasses import dataclass, field
  14. from datetime import datetime, timezone
  15. from pathlib import Path
  16. from cryptography import x509
  17. logger = logging.getLogger(__name__)
  18. # Renew when fewer than this many days remain on the LE cert (LE issues 90-day certs;
  19. # Let's Encrypt recommends renewing at 30 days remaining)
  20. TS_CERT_EXPIRY_THRESHOLD_DAYS = 30
  21. # Defensive FQDN validation before passing to subprocess
  22. _FQDN_RE = re.compile(
  23. r"^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$",
  24. re.IGNORECASE,
  25. )
  26. # Detect tailnets where HTTPS cert generation is disabled — common for company/school
  27. # tailnets where the user is not a Tailscale admin.
  28. _HTTPS_DISABLED_RE = re.compile(
  29. r"(https? cert.*disabled|not enabled.*tailnet|cert.*not.*enabled)",
  30. re.IGNORECASE,
  31. )
  32. # Minimal environment for tailscale subprocess — passes OS/shell variables that
  33. # tailscale needs to locate its socket and config, but strips application secrets
  34. # (JWT keys, DB URLs, SMTP passwords, etc.) that the subprocess has no need for.
  35. _SUBPROCESS_ENV: dict[str, str] = {
  36. k: v
  37. for k, v in os.environ.items()
  38. if k
  39. in {
  40. "PATH",
  41. "HOME",
  42. "USER",
  43. "USERNAME",
  44. "LOGNAME",
  45. # Windows equivalents
  46. "USERPROFILE",
  47. "APPDATA",
  48. "LOCALAPPDATA",
  49. "PROGRAMFILES",
  50. "PROGRAMFILES(X86)",
  51. "SYSTEMROOT",
  52. "WINDIR",
  53. "COMPUTERNAME",
  54. "TEMP",
  55. "TMP",
  56. # Linux XDG dirs used by tailscale for socket/config
  57. "XDG_RUNTIME_DIR",
  58. "XDG_CONFIG_HOME",
  59. }
  60. }
  61. @dataclass
  62. class TailscaleStatus:
  63. """Runtime Tailscale availability and identity."""
  64. available: bool
  65. hostname: str # "myhost"
  66. tailnet_name: str # "tailnetname.ts.net"
  67. fqdn: str # "myhost.tailnetname.ts.net"
  68. tailscale_ips: list[str] = field(default_factory=list)
  69. error: str | None = None
  70. class TailscaleService:
  71. """Wraps Tailscale CLI commands for certificate provisioning.
  72. All methods are safe to call when Tailscale is absent — they return
  73. sensible defaults and never raise exceptions.
  74. """
  75. _docker_hint_logged: bool = False
  76. @classmethod
  77. def _log_docker_socket_hint(cls) -> None:
  78. """Log a one-time hint when running in Docker without the Tailscale socket mounted.
  79. Fires in both states: (a) tailscale binary missing and (b) binary present
  80. but the host socket isn't mounted into the container. The binary alone
  81. can't talk to the daemon — the host's tailscaled socket needs to be
  82. volume-mounted in docker-compose.yml.
  83. """
  84. if cls._docker_hint_logged:
  85. return
  86. if Path("/.dockerenv").exists() and not Path("/var/run/tailscale/tailscaled.sock").exists():
  87. logger.info(
  88. "Running in Docker but /var/run/tailscale/tailscaled.sock is not mounted. "
  89. "Add `- /var/run/tailscale/tailscaled.sock:/var/run/tailscale/tailscaled.sock` "
  90. "to docker-compose.yml (under volumes:) and run Tailscale on the host to enable "
  91. "Let's Encrypt certs for virtual printers."
  92. )
  93. cls._docker_hint_logged = True
  94. async def _run_tailscale(self, *args: str, timeout: float = 30.0) -> tuple[int | None, bytes, bytes]:
  95. """Run a tailscale subcommand and return (returncode, stdout, stderr).
  96. Resolves the binary to an absolute path to guard against PATH hijacking.
  97. Raises OSError if the binary cannot be found or launched.
  98. Raises asyncio.TimeoutError if the subprocess exceeds the timeout.
  99. """
  100. binary = shutil.which("tailscale")
  101. if not binary:
  102. raise OSError("tailscale binary not found")
  103. process = await asyncio.create_subprocess_exec(
  104. binary,
  105. *args,
  106. stdout=asyncio.subprocess.PIPE,
  107. stderr=asyncio.subprocess.PIPE,
  108. env=_SUBPROCESS_ENV,
  109. )
  110. try:
  111. stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
  112. except asyncio.TimeoutError:
  113. process.kill()
  114. await process.wait()
  115. raise
  116. return process.returncode, stdout, stderr
  117. async def get_status(self) -> TailscaleStatus:
  118. """Query Tailscale status and return machine identity.
  119. Runs: tailscale status --json
  120. Returns TailscaleStatus(available=False) if the binary is missing,
  121. the daemon is not running, or any other error occurs.
  122. """
  123. if not shutil.which("tailscale"):
  124. self._log_docker_socket_hint()
  125. return TailscaleStatus(
  126. available=False,
  127. hostname="",
  128. tailnet_name="",
  129. fqdn="",
  130. error="tailscale binary not found",
  131. )
  132. try:
  133. returncode, stdout, stderr = await self._run_tailscale("status", "--json", timeout=5.0)
  134. except OSError as e:
  135. return TailscaleStatus(
  136. available=False,
  137. hostname="",
  138. tailnet_name="",
  139. fqdn="",
  140. error=str(e),
  141. )
  142. if returncode is None or returncode != 0:
  143. # If the binary is present but the daemon socket is unreachable (e.g.
  144. # Docker without the socket mount), log the actionable hint rather than
  145. # just the opaque CLI stderr.
  146. self._log_docker_socket_hint()
  147. return TailscaleStatus(
  148. available=False,
  149. hostname="",
  150. tailnet_name="",
  151. fqdn="",
  152. error=stderr.decode(errors="replace").strip(),
  153. )
  154. try:
  155. data = json.loads(stdout)
  156. except json.JSONDecodeError as e:
  157. return TailscaleStatus(
  158. available=False,
  159. hostname="",
  160. tailnet_name="",
  161. fqdn="",
  162. error=f"JSON parse error: {e}",
  163. )
  164. self_info = data.get("Self", {})
  165. # DNSName includes trailing dot: "myhost.tailnetname.ts.net."
  166. fqdn = self_info.get("DNSName", "").rstrip(".")
  167. if not fqdn:
  168. return TailscaleStatus(
  169. available=False,
  170. hostname="",
  171. tailnet_name="",
  172. fqdn="",
  173. error="Tailscale not connected (no DNSName)",
  174. )
  175. # Split "myhost.tailnetname.ts.net" into hostname + tailnet_name
  176. parts = fqdn.split(".", 1)
  177. hostname = parts[0]
  178. tailnet_name = parts[1] if len(parts) > 1 else ""
  179. tailscale_ips = self_info.get("TailscaleIPs", [])
  180. logger.debug("Tailscale available: fqdn=%s, ips=%s", fqdn, tailscale_ips)
  181. return TailscaleStatus(
  182. available=True,
  183. hostname=hostname,
  184. tailnet_name=tailnet_name,
  185. fqdn=fqdn,
  186. tailscale_ips=tailscale_ips,
  187. )
  188. async def provision_cert(self, fqdn: str, cert_path: Path, key_path: Path) -> bool:
  189. """Request a Let's Encrypt certificate for the given Tailscale FQDN.
  190. Runs: tailscale cert --cert-file <cert_path> --key-file <key_path> <fqdn>
  191. Returns True on success, False on any error.
  192. """
  193. if not _FQDN_RE.match(fqdn):
  194. logger.warning("provision_cert: invalid FQDN %r, skipping", fqdn)
  195. return False
  196. # Ensure the target directory exists before tailscale cert writes to it
  197. cert_path.parent.mkdir(parents=True, exist_ok=True)
  198. logger.info("Provisioning Tailscale cert for %s -> %s", fqdn, cert_path)
  199. try:
  200. returncode, _, stderr = await self._run_tailscale(
  201. "cert",
  202. "--cert-file",
  203. str(cert_path),
  204. "--key-file",
  205. str(key_path),
  206. fqdn,
  207. timeout=60.0,
  208. )
  209. except OSError as e:
  210. logger.warning("tailscale cert failed (OS error): %s", e)
  211. return False
  212. if returncode is None or returncode != 0:
  213. err_text = stderr.decode(errors="replace").strip()
  214. if _HTTPS_DISABLED_RE.search(err_text):
  215. logger.warning(
  216. "Tailscale HTTPS certs are not enabled for this tailnet. "
  217. "Visit https://login.tailscale.com/admin/dns and enable HTTPS. "
  218. "Falling back to self-signed cert."
  219. )
  220. else:
  221. logger.warning("tailscale cert failed (exit %s): %s", returncode, err_text)
  222. return False
  223. # Restrict private key permissions
  224. try:
  225. key_path.chmod(0o600)
  226. except OSError as e:
  227. logger.warning("Could not set key permissions on %s: %s", key_path, e)
  228. # Verify the files are readable by the current process — on bare-metal, the
  229. # tailscale daemon or a prior sudo invocation may have left them root-owned.
  230. if not os.access(cert_path, os.R_OK) or not os.access(key_path, os.R_OK):
  231. logger.error(
  232. "Tailscale cert files at %s are not readable by this process. "
  233. "Fix with: sudo chown $(whoami):$(whoami) %s %s",
  234. cert_path.parent,
  235. cert_path,
  236. key_path,
  237. )
  238. return False
  239. logger.info("Tailscale cert provisioned: %s", cert_path)
  240. return True
  241. def cert_needs_renewal(self, cert_path: Path, fqdn: str | None = None) -> bool:
  242. """Check whether the certificate at cert_path needs to be renewed.
  243. Returns True if the file is absent, unreadable, expires within
  244. TS_CERT_EXPIRY_THRESHOLD_DAYS days, or if fqdn is given and does not
  245. appear in the certificate's Subject Alternative Names.
  246. """
  247. if not cert_path.exists():
  248. return True
  249. try:
  250. cert_pem = cert_path.read_bytes()
  251. # The file may contain a full chain; load only the first PEM block
  252. cert = x509.load_pem_x509_certificate(cert_pem)
  253. now = datetime.now(timezone.utc)
  254. days_remaining = (cert.not_valid_after_utc - now).days
  255. if days_remaining < TS_CERT_EXPIRY_THRESHOLD_DAYS:
  256. logger.info("Tailscale cert expires in %d days, renewal needed", days_remaining)
  257. return True
  258. # Validate that the cert covers the requested FQDN (guards against stale
  259. # cert after machine rename or tailnet migration). Case-insensitive per RFC 4343.
  260. if fqdn:
  261. try:
  262. san = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName)
  263. dns_names = san.value.get_values_for_type(x509.DNSName)
  264. if fqdn.lower() not in {n.lower() for n in dns_names}:
  265. logger.info(
  266. "Tailscale cert SAN mismatch (cert has %s, need %s), renewal needed",
  267. dns_names,
  268. fqdn,
  269. )
  270. return True
  271. except x509.ExtensionNotFound:
  272. logger.info("Tailscale cert has no SAN extension, renewal needed")
  273. return True
  274. logger.debug("Tailscale cert valid for %d more days", days_remaining)
  275. return False
  276. except (OSError, ValueError) as e:
  277. logger.warning("Could not read Tailscale cert %s: %s", cert_path, e)
  278. return True
  279. async def ensure_cert(self, fqdn: str, cert_path: Path, key_path: Path) -> bool:
  280. """Ensure a fresh certificate exists at cert_path.
  281. Skips provisioning if the cert is present, not near expiry, and covers fqdn.
  282. Returns True if a valid cert is now available.
  283. """
  284. if not self.cert_needs_renewal(cert_path, fqdn=fqdn):
  285. logger.debug("Tailscale cert is fresh, skipping provision")
  286. return True
  287. return await self.provision_cert(fqdn, cert_path, key_path)
  288. # Module-level singleton — import this in other modules
  289. tailscale_service = TailscaleService()