oidc_icon.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. """OIDC provider icon fetcher (#1333).
  2. Server-side proxy that fetches an admin-supplied icon URL and returns
  3. ``(bytes, content_type, etag)``. The bytes are cached in the
  4. ``oidc_providers.icon_data`` BLOB column so the SPA can serve them from
  5. ``/api/v1/auth/oidc/providers/{id}/icon`` (same-origin) — avoiding any
  6. loosening of the strict ``img-src 'self' data: blob:`` CSP.
  7. Pattern mirrors ``services/makerworld.fetch_thumbnail``:
  8. - ``follow_redirects=False`` so the SSRF host allowlist (here: assert_safe_public_https_url)
  9. isn't bypassed by a 302 to a private address.
  10. - MIME whitelist (PNG/JPEG/WebP/GIF). SVG is rejected in v1 — XML payloads
  11. carry too many corner cases (xlink, external refs) for an MVP.
  12. - ``application/octet-stream`` is accepted only if the URL path ends in a
  13. whitelisted image extension; the response Content-Type alone is not
  14. trusted because some CDNs serve images as octet-stream.
  15. - 1 MB hard cap (typical OIDC icons are 5-50 KB; 1 MB is generous).
  16. - 10s timeout, matching the OIDC discovery/JWKS timeouts in routes/mfa.py.
  17. """
  18. from __future__ import annotations
  19. import hashlib
  20. import logging
  21. from urllib.parse import urlparse
  22. import httpx
  23. logger = logging.getLogger(__name__)
  24. _MAX_ICON_BYTES = 1 * 1024 * 1024 # 1 MB
  25. _FETCH_TIMEOUT_SECONDS = 10.0
  26. # Content-Type whitelist. SVG is intentionally omitted — see module docstring.
  27. _ALLOWED_MIME_TYPES = frozenset(
  28. {
  29. "image/png",
  30. "image/jpeg",
  31. "image/webp",
  32. "image/gif",
  33. }
  34. )
  35. # Extension → MIME fallback for ``application/octet-stream`` responses.
  36. _EXT_TO_MIME = {
  37. ".png": "image/png",
  38. ".jpg": "image/jpeg",
  39. ".jpeg": "image/jpeg",
  40. ".webp": "image/webp",
  41. ".gif": "image/gif",
  42. }
  43. class OIDCIconError(Exception):
  44. """Base class for icon-fetch failures."""
  45. class OIDCIconUrlError(OIDCIconError):
  46. """The URL is invalid or rejected by the SSRF guard.
  47. Maps to a 400 Bad Request when surfaced at the API layer.
  48. """
  49. class OIDCIconUnavailableError(OIDCIconError):
  50. """The fetch reached the upstream but the response was unusable.
  51. Network timeouts, non-200 status, wrong content-type, oversized payload,
  52. redirects (we never follow), etc. Maps to a 400 at the API layer because
  53. the admin's input (the URL) is what's at fault.
  54. """
  55. def _resolve_content_type(upstream_type: str, url_path: str) -> str:
  56. """Map an upstream Content-Type to a whitelisted MIME, or raise.
  57. Three-step derivation:
  58. 1. Trust upstream ``image/*`` if it's in the allowlist.
  59. 2. Fall back to URL extension if upstream returned
  60. ``application/octet-stream`` (some CDNs do this with
  61. ``Content-Disposition: attachment; filename="…png"``).
  62. 3. Distinct error when the header is missing entirely (#1333 review)
  63. — empty quotes in a generic "unsupported content-type: ''" message
  64. was user-hostile.
  65. Extracted from ``fetch_icon`` so the dispatch logic is unit-testable
  66. without spinning up the streaming-mock harness.
  67. """
  68. if not upstream_type:
  69. raise OIDCIconUnavailableError("Icon URL response is missing a Content-Type header")
  70. if upstream_type in _ALLOWED_MIME_TYPES:
  71. return upstream_type
  72. if upstream_type == "application/octet-stream":
  73. path_lower = url_path.lower()
  74. for ext, mime in _EXT_TO_MIME.items():
  75. if path_lower.endswith(ext):
  76. return mime
  77. raise OIDCIconUnavailableError("Icon URL returned application/octet-stream with no image extension")
  78. raise OIDCIconUnavailableError(
  79. f"Icon URL returned unsupported content-type: {upstream_type!r} "
  80. "(allowed: image/png, image/jpeg, image/webp, image/gif)"
  81. )
  82. async def fetch_icon(url: str) -> tuple[bytes, str, str]:
  83. """Fetch ``url`` and return ``(bytes, content_type, etag)``.
  84. Streams the response body and aborts as soon as ``_MAX_ICON_BYTES`` is
  85. exceeded — never buffers more than one chunk past the cap, so a hostile
  86. or misconfigured IdP serving a 500 MB payload cannot OOM the server.
  87. Raises:
  88. OIDCIconUrlError: URL parsing/scheme issue OR ``httpx.InvalidURL``
  89. (validator should have caught these earlier; this is a
  90. defence-in-depth check).
  91. OIDCIconUnavailableError: upstream issue — timeout, non-200,
  92. redirect, wrong content-type, oversized payload, empty body.
  93. """
  94. try:
  95. parsed = urlparse(url)
  96. except ValueError as exc:
  97. raise OIDCIconUrlError(f"Invalid icon URL: {exc}") from exc
  98. if parsed.scheme.lower() != "https":
  99. # Pydantic validator + assert_safe_public_https_url catch this earlier,
  100. # but the service is the last defence — refuse non-HTTPS even if a
  101. # future code path bypassed the validators.
  102. raise OIDCIconUrlError("Icon URL must use https://")
  103. try:
  104. async with (
  105. httpx.AsyncClient(timeout=_FETCH_TIMEOUT_SECONDS) as client,
  106. client.stream("GET", url, follow_redirects=False) as response,
  107. ):
  108. if response.status_code != 200:
  109. # Any non-200 — including 301/302 redirects (we set follow_redirects=False
  110. # so the SSRF guard on the original URL isn't bypassed by a redirect
  111. # to a private address).
  112. raise OIDCIconUnavailableError(
  113. f"Icon URL returned HTTP {response.status_code} "
  114. "(redirects are not followed; the URL must respond with the image directly)"
  115. )
  116. upstream_type = response.headers.get("content-type", "").split(";")[0].strip().lower()
  117. content_type = _resolve_content_type(upstream_type, parsed.path)
  118. # Stream with early-exit at the size cap. Read in chunks so a
  119. # hostile 500 MB body never gets allocated whole — we raise
  120. # immediately when the running total crosses the cap.
  121. chunks: list[bytes] = []
  122. total = 0
  123. async for chunk in response.aiter_bytes():
  124. total += len(chunk)
  125. if total > _MAX_ICON_BYTES:
  126. raise OIDCIconUnavailableError(f"Icon exceeds {_MAX_ICON_BYTES // 1024} KB cap")
  127. chunks.append(chunk)
  128. payload = b"".join(chunks)
  129. except httpx.TimeoutException as exc:
  130. raise OIDCIconUnavailableError(f"Icon fetch timed out: {exc}") from exc
  131. except httpx.InvalidURL as exc:
  132. # ``httpx.InvalidURL`` is a sibling of ``httpx.HTTPError`` (verified:
  133. # MRO is ``InvalidURL → Exception``, no HTTPError in between). Fires
  134. # at send-time for URLs that ``urlparse`` accepts but httpx refuses —
  135. # typically null bytes or control chars. Map to URL-error path so
  136. # the admin sees a 400, not a 500.
  137. raise OIDCIconUrlError(f"Invalid icon URL: {exc}") from exc
  138. except httpx.HTTPError as exc:
  139. raise OIDCIconUnavailableError(f"Icon fetch failed: {exc}") from exc
  140. if not payload:
  141. raise OIDCIconUnavailableError("Icon URL returned an empty body")
  142. # SHA-256 hex is deterministic — identical bytes always yield the same
  143. # ETag so revalidation via If-None-Match works across server restarts.
  144. etag = hashlib.sha256(payload).hexdigest()
  145. return payload, content_type, etag