test_no_hardcoded_secrets.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. """GHSA-gc24-px2r-5qmf backstop: no hardcoded fallback secrets in source.
  2. The first half of GHSA-gc24-px2r-5qmf (CVSS 9.8) was a literal
  3. ``bambuddy-secret-key-change-in-production`` string used as the JWT
  4. signing key when ``JWT_SECRET_KEY`` was unset. Production Docker images
  5. shipped with that exact string — meaning anyone who pulled the image
  6. could forge admin tokens for any Bambuddy instance running unmodified.
  7. This test walks every source file in ``backend/app/`` at parse time and
  8. flags string literals that look like credential fallbacks. It is
  9. deliberately stricter than the actual exploit: any
  10. ``*-change-in-production`` / ``change-me`` / ``your-secret-here``
  11. shaped string is a code smell at a security boundary, regardless of
  12. whether the call site happens to enforce env-var presence today. The
  13. goal is to keep that string class out of the codebase entirely so
  14. future code paths cannot re-introduce the same vulnerability shape.
  15. If you need one of these strings as a test input (e.g. asserting that
  16. a forged token signed with the old leaked secret is *rejected*), use
  17. the ``ALLOWED_TEST_INPUT_PATTERNS`` allowlist below — never the
  18. production source.
  19. """
  20. from __future__ import annotations
  21. import ast
  22. from pathlib import Path
  23. import pytest
  24. # Substring patterns that should never appear in production source as
  25. # string literals. Case-insensitive substring match.
  26. FORBIDDEN_PATTERNS: tuple[str, ...] = (
  27. "change-in-production",
  28. "change-me-in-production",
  29. "your-secret-here",
  30. "your-secret-key",
  31. "default-secret-key",
  32. "insecure-default",
  33. "placeholder-secret",
  34. "replace-this-secret",
  35. # The exact leaked value from GHSA-gc24 — keep as a regression marker
  36. # so any reintroduction is caught loudly with the CVE number attached.
  37. "bambuddy-secret-key-change-in-production",
  38. )
  39. # Production-source files where these patterns are TOLERATED because they
  40. # document the historical leak (CHANGELOG / migration notes / security
  41. # advisory references) rather than being used as a credential fallback.
  42. # Add an entry with a `# reason: ...` comment, never silently.
  43. ALLOWED_PRODUCTION_FILES: frozenset[Path] = frozenset()
  44. def _python_files_under(root: Path) -> list[Path]:
  45. """Yield every .py file under ``root`` excluding caches and virtualenvs."""
  46. return [
  47. p
  48. for p in root.rglob("*.py")
  49. if "__pycache__" not in p.parts and ".venv" not in p.parts and "venv" not in p.parts
  50. ]
  51. def _string_literals_in(file_path: Path) -> list[tuple[int, str]]:
  52. """Return (lineno, value) for every string literal in ``file_path``.
  53. Uses ``ast`` to avoid false positives from comments / docstrings;
  54. docstrings are ``ast.Constant`` too but we explicitly include them
  55. because a docstring is not a safe place to put a credential either.
  56. Returns an empty list on syntax-error files rather than crashing —
  57. a parse failure means the file has a separate bug and we don't want
  58. this test to mask it.
  59. """
  60. try:
  61. tree = ast.parse(file_path.read_text(encoding="utf-8"))
  62. except (SyntaxError, UnicodeDecodeError):
  63. return []
  64. return [
  65. (node.lineno, node.value)
  66. for node in ast.walk(tree)
  67. if isinstance(node, ast.Constant) and isinstance(node.value, str)
  68. ]
  69. @pytest.mark.unit
  70. def test_no_hardcoded_secrets_in_production_source() -> None:
  71. """SEC-AUTH-3 (SECURITY.md): no credential-shaped fallback strings in backend/app/.
  72. Walks every Python source file under ``backend/app/``. Flags string
  73. literals matching any pattern in ``FORBIDDEN_PATTERNS``. Allowlisted
  74. files (e.g. tests asserting we reject the leaked GHSA-gc24 token)
  75. are exempt via ``ALLOWED_PRODUCTION_FILES``.
  76. Failure here means a code change has reintroduced the GHSA-gc24
  77. failure mode: a string literal that production code could fall
  78. back to as a credential, defeating the env-var-or-fail design of
  79. ``_get_jwt_secret()``.
  80. """
  81. repo_root = Path(__file__).resolve().parents[3]
  82. production_root = repo_root / "backend" / "app"
  83. assert production_root.is_dir(), f"Expected backend/app/ at {production_root}"
  84. findings: list[str] = []
  85. for src in _python_files_under(production_root):
  86. relative = src.relative_to(repo_root)
  87. if relative in ALLOWED_PRODUCTION_FILES:
  88. continue
  89. for lineno, literal in _string_literals_in(src):
  90. literal_lower = literal.lower()
  91. for pattern in FORBIDDEN_PATTERNS:
  92. if pattern in literal_lower:
  93. findings.append(f" {relative}:{lineno} contains forbidden pattern '{pattern}': {literal!r}")
  94. break
  95. assert not findings, (
  96. "Hardcoded credential-shaped strings found in production source — "
  97. "this is the GHSA-gc24-px2r-5qmf shape (CVSS 9.8 hardcoded JWT secret). "
  98. "See SECURITY.md rule 3 'No hardcoded fallback secrets'.\n\n" + "\n".join(findings)
  99. )
  100. @pytest.mark.unit
  101. def test_jwt_secret_loader_has_no_hardcoded_fallback() -> None:
  102. """SEC-AUTH-3 (SECURITY.md): _get_jwt_secret never returns a literal string.
  103. The post-GHSA-gc24 design of ``_get_jwt_secret`` reads from env, then
  104. file, then generates a random value via ``secrets.token_urlsafe``.
  105. No code path returns a string literal. This test asserts that
  106. structural property by walking the function's AST and confirming
  107. every ``return`` statement returns either a Name (variable) or a
  108. Call (function result), never an ast.Constant string literal.
  109. If this test fails, ``_get_jwt_secret`` has been modified to return
  110. a hardcoded value somewhere — likely as a "convenience default" that
  111. will end up in a shipped Docker image, which is exactly how the
  112. original GHSA-gc24 advisory happened.
  113. """
  114. repo_root = Path(__file__).resolve().parents[3]
  115. auth_module = repo_root / "backend" / "app" / "core" / "auth.py"
  116. tree = ast.parse(auth_module.read_text(encoding="utf-8"))
  117. loader: ast.FunctionDef | None = None
  118. for node in ast.walk(tree):
  119. if isinstance(node, ast.FunctionDef) and node.name == "_get_jwt_secret":
  120. loader = node
  121. break
  122. assert loader is not None, "_get_jwt_secret() not found in backend/app/core/auth.py — has it been renamed?"
  123. literal_returns: list[tuple[int, str]] = []
  124. for node in ast.walk(loader):
  125. if isinstance(node, ast.Return) and isinstance(node.value, ast.Constant) and isinstance(node.value.value, str):
  126. literal_returns.append((node.lineno, node.value.value))
  127. assert not literal_returns, (
  128. "_get_jwt_secret() has a string-literal return — this is the GHSA-gc24 vulnerability shape. "
  129. "Use os.environ + file storage + secrets.token_urlsafe; never return a hardcoded string.\n"
  130. + "\n".join(f" auth.py:{ln}: returns {val!r}" for ln, val in literal_returns)
  131. )