_url_safety.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. """Shared URL-safety primitives used by both SSRF guards in this package.
  2. The two top-level assertion functions —
  3. ``_spoolman_helpers.assert_safe_spoolman_url`` (Spoolman, deliberately allows
  4. loopback/RFC-1918 because same-LAN deployment is the standard topology) and
  5. ``_oidc_helpers.assert_safe_public_https_url`` (OIDC icons, must be reachable
  6. on the public internet, so loopback/private are rejected) — share the
  7. *data* (cloud-metadata IP set, numeric-encoded-IP regex) but not the
  8. *policy*. Only the data lives here. The functions stay in their respective
  9. modules with their distinct policies intact.
  10. """
  11. from __future__ import annotations
  12. import ipaddress
  13. import re
  14. # Cloud-provider metadata endpoints — the classic SSRF credential-exfil
  15. # targets. Both guards reject these unconditionally.
  16. CLOUD_METADATA_IPS = frozenset(
  17. {
  18. # AWS / GCP / Azure / Oracle / DigitalOcean IMDS
  19. ipaddress.ip_address("169.254.169.254"),
  20. # Alibaba Cloud metadata
  21. ipaddress.ip_address("100.100.100.200"),
  22. # AWS IMDS IPv6
  23. ipaddress.ip_address("fd00:ec2::254"),
  24. }
  25. )
  26. # libc and browsers parse numeric-encoded IP forms (decimal ``2130706433``
  27. # for 127.0.0.1, hex ``0x7f000001``) but Python's ``ipaddress.ip_address``
  28. # raises ValueError on these, so they slip past the IP-class checks if
  29. # not caught first. Used by both guards to reject up-front.
  30. NUMERIC_IP_RE = re.compile(r"^(0x[0-9a-f]+|[0-9]+)$", re.I)
  31. def unwrap_ipv4_mapped(
  32. addr: ipaddress.IPv4Address | ipaddress.IPv6Address,
  33. ) -> ipaddress.IPv4Address | ipaddress.IPv6Address:
  34. """Return the underlying IPv4 for an IPv4-mapped IPv6 address, else return *addr*.
  35. ``::ffff:127.0.0.1`` and similar mapped forms must be unwrapped before
  36. the per-class checks (``is_private``, ``is_loopback``, …) — otherwise
  37. an attacker can encode a blocked IPv4 address as an IPv6 literal to
  38. bypass the guard.
  39. """
  40. if isinstance(addr, ipaddress.IPv6Address) and addr.ipv4_mapped is not None:
  41. return addr.ipv4_mapped
  42. return addr