tailscale.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. """Tailscale integration for virtual printer certificate provisioning.
  2. When Tailscale is present, provisions a Let's Encrypt certificate via
  3. `tailscale cert` for the machine's Tailscale FQDN. This cert is trusted
  4. by slicers without any manual CA installation, unlike the self-signed CA.
  5. Falls back gracefully when Tailscale is unavailable.
  6. """
  7. import asyncio
  8. import json
  9. import logging
  10. import os
  11. import re
  12. import shutil
  13. from dataclasses import dataclass, field
  14. from datetime import datetime, timezone
  15. from pathlib import Path
  16. from cryptography import x509
  17. logger = logging.getLogger(__name__)
  18. # Renew when fewer than this many days remain on the LE cert (LE issues 90-day certs;
  19. # Let's Encrypt recommends renewing at 30 days remaining)
  20. TS_CERT_EXPIRY_THRESHOLD_DAYS = 30
  21. # Defensive FQDN validation before passing to subprocess
  22. _FQDN_RE = re.compile(
  23. r"^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)+$",
  24. re.IGNORECASE,
  25. )
  26. # Detect tailnets where HTTPS cert generation is disabled — common for company/school
  27. # tailnets where the user is not a Tailscale admin.
  28. _HTTPS_DISABLED_RE = re.compile(
  29. r"(https? cert.*disabled|not enabled.*tailnet|cert.*not.*enabled)",
  30. re.IGNORECASE,
  31. )
  32. # Minimal environment for tailscale subprocess — passes OS/shell variables that
  33. # tailscale needs to locate its socket and config, but strips application secrets
  34. # (JWT keys, DB URLs, SMTP passwords, etc.) that the subprocess has no need for.
  35. _SUBPROCESS_ENV: dict[str, str] = {
  36. k: v
  37. for k, v in os.environ.items()
  38. if k
  39. in {
  40. "PATH",
  41. "HOME",
  42. "USER",
  43. "USERNAME",
  44. "LOGNAME",
  45. # Windows equivalents
  46. "USERPROFILE",
  47. "APPDATA",
  48. "LOCALAPPDATA",
  49. "PROGRAMFILES",
  50. "PROGRAMFILES(X86)",
  51. "SYSTEMROOT",
  52. "WINDIR",
  53. "COMPUTERNAME",
  54. "TEMP",
  55. "TMP",
  56. # Linux XDG dirs used by tailscale for socket/config
  57. "XDG_RUNTIME_DIR",
  58. "XDG_CONFIG_HOME",
  59. }
  60. }
  61. @dataclass
  62. class TailscaleStatus:
  63. """Runtime Tailscale availability and identity."""
  64. available: bool
  65. hostname: str # "myhost"
  66. tailnet_name: str # "tailnetname.ts.net"
  67. fqdn: str # "myhost.tailnetname.ts.net"
  68. tailscale_ips: list[str] = field(default_factory=list)
  69. error: str | None = None
  70. class TailscaleService:
  71. """Wraps Tailscale CLI commands for certificate provisioning.
  72. All methods are safe to call when Tailscale is absent — they return
  73. sensible defaults and never raise exceptions.
  74. """
  75. _docker_hint_logged: bool = False
  76. @classmethod
  77. def _log_docker_socket_hint(cls) -> None:
  78. """Log a one-time hint when running in Docker without the Tailscale socket mounted."""
  79. if cls._docker_hint_logged:
  80. return
  81. if Path("/.dockerenv").exists() and not Path("/var/run/tailscale/tailscaled.sock").exists():
  82. logger.info(
  83. "Running in Docker but Tailscale socket not found. "
  84. "Mount /var/run/tailscale/tailscaled.sock to enable Tailscale."
  85. )
  86. cls._docker_hint_logged = True
  87. async def _run_tailscale(self, *args: str, timeout: float = 30.0) -> tuple[int | None, bytes, bytes]:
  88. """Run a tailscale subcommand and return (returncode, stdout, stderr).
  89. Resolves the binary to an absolute path to guard against PATH hijacking.
  90. Raises OSError if the binary cannot be found or launched.
  91. Raises asyncio.TimeoutError if the subprocess exceeds the timeout.
  92. """
  93. binary = shutil.which("tailscale")
  94. if not binary:
  95. raise OSError("tailscale binary not found")
  96. process = await asyncio.create_subprocess_exec(
  97. binary,
  98. *args,
  99. stdout=asyncio.subprocess.PIPE,
  100. stderr=asyncio.subprocess.PIPE,
  101. env=_SUBPROCESS_ENV,
  102. )
  103. try:
  104. stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
  105. except asyncio.TimeoutError:
  106. process.kill()
  107. await process.wait()
  108. raise
  109. return process.returncode, stdout, stderr
  110. async def get_status(self) -> TailscaleStatus:
  111. """Query Tailscale status and return machine identity.
  112. Runs: tailscale status --json
  113. Returns TailscaleStatus(available=False) if the binary is missing,
  114. the daemon is not running, or any other error occurs.
  115. """
  116. if not shutil.which("tailscale"):
  117. self._log_docker_socket_hint()
  118. return TailscaleStatus(
  119. available=False,
  120. hostname="",
  121. tailnet_name="",
  122. fqdn="",
  123. error="tailscale binary not found",
  124. )
  125. try:
  126. returncode, stdout, stderr = await self._run_tailscale("status", "--json", timeout=5.0)
  127. except OSError as e:
  128. return TailscaleStatus(
  129. available=False,
  130. hostname="",
  131. tailnet_name="",
  132. fqdn="",
  133. error=str(e),
  134. )
  135. if returncode is None or returncode != 0:
  136. return TailscaleStatus(
  137. available=False,
  138. hostname="",
  139. tailnet_name="",
  140. fqdn="",
  141. error=stderr.decode(errors="replace").strip(),
  142. )
  143. try:
  144. data = json.loads(stdout)
  145. except json.JSONDecodeError as e:
  146. return TailscaleStatus(
  147. available=False,
  148. hostname="",
  149. tailnet_name="",
  150. fqdn="",
  151. error=f"JSON parse error: {e}",
  152. )
  153. self_info = data.get("Self", {})
  154. # DNSName includes trailing dot: "myhost.tailnetname.ts.net."
  155. fqdn = self_info.get("DNSName", "").rstrip(".")
  156. if not fqdn:
  157. return TailscaleStatus(
  158. available=False,
  159. hostname="",
  160. tailnet_name="",
  161. fqdn="",
  162. error="Tailscale not connected (no DNSName)",
  163. )
  164. # Split "myhost.tailnetname.ts.net" into hostname + tailnet_name
  165. parts = fqdn.split(".", 1)
  166. hostname = parts[0]
  167. tailnet_name = parts[1] if len(parts) > 1 else ""
  168. tailscale_ips = self_info.get("TailscaleIPs", [])
  169. logger.debug("Tailscale available: fqdn=%s, ips=%s", fqdn, tailscale_ips)
  170. return TailscaleStatus(
  171. available=True,
  172. hostname=hostname,
  173. tailnet_name=tailnet_name,
  174. fqdn=fqdn,
  175. tailscale_ips=tailscale_ips,
  176. )
  177. async def provision_cert(self, fqdn: str, cert_path: Path, key_path: Path) -> bool:
  178. """Request a Let's Encrypt certificate for the given Tailscale FQDN.
  179. Runs: tailscale cert --cert-file <cert_path> --key-file <key_path> <fqdn>
  180. Returns True on success, False on any error.
  181. """
  182. if not _FQDN_RE.match(fqdn):
  183. logger.warning("provision_cert: invalid FQDN %r, skipping", fqdn)
  184. return False
  185. # Ensure the target directory exists before tailscale cert writes to it
  186. cert_path.parent.mkdir(parents=True, exist_ok=True)
  187. logger.info("Provisioning Tailscale cert for %s -> %s", fqdn, cert_path)
  188. try:
  189. returncode, _, stderr = await self._run_tailscale(
  190. "cert",
  191. "--cert-file",
  192. str(cert_path),
  193. "--key-file",
  194. str(key_path),
  195. fqdn,
  196. timeout=60.0,
  197. )
  198. except OSError as e:
  199. logger.warning("tailscale cert failed (OS error): %s", e)
  200. return False
  201. if returncode is None or returncode != 0:
  202. err_text = stderr.decode(errors="replace").strip()
  203. if _HTTPS_DISABLED_RE.search(err_text):
  204. logger.warning(
  205. "Tailscale HTTPS certs are not enabled for this tailnet. "
  206. "Visit https://login.tailscale.com/admin/dns and enable HTTPS. "
  207. "Falling back to self-signed cert."
  208. )
  209. else:
  210. logger.warning("tailscale cert failed (exit %s): %s", returncode, err_text)
  211. return False
  212. # Restrict private key permissions
  213. try:
  214. key_path.chmod(0o600)
  215. except OSError as e:
  216. logger.warning("Could not set key permissions on %s: %s", key_path, e)
  217. # Verify the files are readable by the current process — on bare-metal, the
  218. # tailscale daemon or a prior sudo invocation may have left them root-owned.
  219. if not os.access(cert_path, os.R_OK) or not os.access(key_path, os.R_OK):
  220. logger.error(
  221. "Tailscale cert files at %s are not readable by this process. "
  222. "Fix with: sudo chown $(whoami):$(whoami) %s %s",
  223. cert_path.parent,
  224. cert_path,
  225. key_path,
  226. )
  227. return False
  228. logger.info("Tailscale cert provisioned: %s", cert_path)
  229. return True
  230. def cert_needs_renewal(self, cert_path: Path, fqdn: str | None = None) -> bool:
  231. """Check whether the certificate at cert_path needs to be renewed.
  232. Returns True if the file is absent, unreadable, expires within
  233. TS_CERT_EXPIRY_THRESHOLD_DAYS days, or if fqdn is given and does not
  234. appear in the certificate's Subject Alternative Names.
  235. """
  236. if not cert_path.exists():
  237. return True
  238. try:
  239. cert_pem = cert_path.read_bytes()
  240. # The file may contain a full chain; load only the first PEM block
  241. cert = x509.load_pem_x509_certificate(cert_pem)
  242. now = datetime.now(timezone.utc)
  243. days_remaining = (cert.not_valid_after_utc - now).days
  244. if days_remaining < TS_CERT_EXPIRY_THRESHOLD_DAYS:
  245. logger.info("Tailscale cert expires in %d days, renewal needed", days_remaining)
  246. return True
  247. # Validate that the cert covers the requested FQDN (guards against stale
  248. # cert after machine rename or tailnet migration). Case-insensitive per RFC 4343.
  249. if fqdn:
  250. try:
  251. san = cert.extensions.get_extension_for_class(x509.SubjectAlternativeName)
  252. dns_names = san.value.get_values_for_type(x509.DNSName)
  253. if fqdn.lower() not in {n.lower() for n in dns_names}:
  254. logger.info(
  255. "Tailscale cert SAN mismatch (cert has %s, need %s), renewal needed",
  256. dns_names,
  257. fqdn,
  258. )
  259. return True
  260. except x509.ExtensionNotFound:
  261. logger.info("Tailscale cert has no SAN extension, renewal needed")
  262. return True
  263. logger.debug("Tailscale cert valid for %d more days", days_remaining)
  264. return False
  265. except (OSError, ValueError) as e:
  266. logger.warning("Could not read Tailscale cert %s: %s", cert_path, e)
  267. return True
  268. async def ensure_cert(self, fqdn: str, cert_path: Path, key_path: Path) -> bool:
  269. """Ensure a fresh certificate exists at cert_path.
  270. Skips provisioning if the cert is present, not near expiry, and covers fqdn.
  271. Returns True if a valid cert is now available.
  272. """
  273. if not self.cert_needs_renewal(cert_path, fqdn=fqdn):
  274. logger.debug("Tailscale cert is fresh, skipping provision")
  275. return True
  276. return await self.provision_cert(fqdn, cert_path, key_path)
  277. # Module-level singleton — import this in other modules
  278. tailscale_service = TailscaleService()