| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- """Pure helper functions for OIDC routes.
- Hosts the SSRF guard for admin-supplied icon URLs. Stricter than
- ``_spoolman_helpers.assert_safe_spoolman_url`` — Spoolman intentionally allows
- loopback/RFC-1918 (same-LAN topology) while OIDC icons must be reachable on
- the public internet (IdP-hosted), so private addresses there are SSRF probes.
- """
- from __future__ import annotations
- import ipaddress
- from urllib.parse import urlparse
- from backend.app.api.routes._url_safety import CLOUD_METADATA_IPS, NUMERIC_IP_RE, unwrap_ipv4_mapped
- def assert_safe_public_https_url(url: str) -> None:
- """Raise ValueError if *url* is unsafe to fetch as a public HTTPS resource.
- Used for OIDC provider icon URLs (#1333). Stricter than the Spoolman SSRF
- guard: also rejects loopback, private (RFC-1918), and link-local addresses
- because an OIDC icon legitimately lives only on the public internet.
- Checks performed:
- - Scheme must be ``https`` (no ``http://``, ``file://``, ``gopher://``, …).
- - Numeric-encoded IPv4 (decimal ``2130706433``, hex ``0x7f000001``) is
- rejected — libc and browsers parse those as valid addresses while
- Python's ``ipaddress`` raises ValueError, so they bypass the IP block
- below if not caught first.
- - Cloud-provider metadata endpoints (169.254.169.254, 100.100.100.200,
- fd00:ec2::254) — classic SSRF credential-exfil targets.
- - Loopback (127.0.0.0/8, ::1), private RFC-1918 (10/8, 172.16/12,
- 192.168/16) and link-local (169.254/16, fe80::/10) addresses.
- - Multicast (224.0.0.0/4, ff00::/8) and unspecified (0.0.0.0, ::).
- - IPv4-mapped IPv6 (``::ffff:127.0.0.1``) — unwrapped before the IP-class
- check so an attacker can't bypass via IPv6 encoding.
- Hostname-based addresses are accepted without DNS resolution (consistent
- with ``_validate_issuer_url`` policy — the operator is trusted to
- configure a sensible IdP host).
- """
- parsed = urlparse(url)
- if parsed.scheme.lower() != "https":
- raise ValueError("icon URL must use https://")
- hostname = (parsed.hostname or "").lower()
- if NUMERIC_IP_RE.match(hostname):
- raise ValueError("icon URL must not use numeric-encoded IP addresses")
- try:
- addr = ipaddress.ip_address(hostname)
- except ValueError:
- return # hostname — out of scope (no DNS check by design)
- effective = unwrap_ipv4_mapped(addr)
- if effective in CLOUD_METADATA_IPS:
- raise ValueError("icon URL must not point to a cloud metadata endpoint")
- # Order matters: 0.0.0.0 sets BOTH is_private and is_unspecified — check
- # the more-specific is_unspecified first so the error message points at
- # the actual misuse. Similarly 127.0.0.1 sets is_loopback and is_private
- # (private under IANA's reservation); is_loopback first is clearer.
- if effective.is_unspecified:
- raise ValueError("icon URL must not point to an unspecified address")
- if effective.is_loopback:
- raise ValueError("icon URL must not point to a loopback address")
- if effective.is_link_local:
- raise ValueError("icon URL must not point to a link-local address")
- if effective.is_multicast:
- raise ValueError("icon URL must not point to a multicast address")
- if effective.is_private:
- raise ValueError("icon URL must not point to a private (RFC-1918) address")
|