yunoadmin
/
bambuddy
同期ミラー https://github.com/maziggy/bambuddy


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
							"""GHSA-gc24-px2r-5qmf backstop: no hardcoded fallback secrets in source.

The first half of GHSA-gc24-px2r-5qmf (CVSS 9.8) was a literal
``bambuddy-secret-key-change-in-production`` string used as the JWT
signing key when ``JWT_SECRET_KEY`` was unset. Production Docker images
shipped with that exact string — meaning anyone who pulled the image
could forge admin tokens for any Bambuddy instance running unmodified.

This test walks every source file in ``backend/app/`` at parse time and
flags string literals that look like credential fallbacks. It is
deliberately stricter than the actual exploit: any
``*-change-in-production`` / ``change-me`` / ``your-secret-here``
shaped string is a code smell at a security boundary, regardless of
whether the call site happens to enforce env-var presence today. The
goal is to keep that string class out of the codebase entirely so
future code paths cannot re-introduce the same vulnerability shape.

If you need one of these strings as a test input (e.g. asserting that
a forged token signed with the old leaked secret is *rejected*), use
the ``ALLOWED_TEST_INPUT_PATTERNS`` allowlist below — never the
production source.
"""

from __future__ import annotations

import ast
from pathlib import Path

import pytest

# Substring patterns that should never appear in production source as
# string literals. Case-insensitive substring match.
FORBIDDEN_PATTERNS: tuple[str, ...] = (
    "change-in-production",
    "change-me-in-production",
    "your-secret-here",
    "your-secret-key",
    "default-secret-key",
    "insecure-default",
    "placeholder-secret",
    "replace-this-secret",
    # The exact leaked value from GHSA-gc24 — keep as a regression marker
    # so any reintroduction is caught loudly with the CVE number attached.
    "bambuddy-secret-key-change-in-production",
)

# Production-source files where these patterns are TOLERATED because they
# document the historical leak (CHANGELOG / migration notes / security
# advisory references) rather than being used as a credential fallback.
# Add an entry with a `# reason: ...` comment, never silently.
ALLOWED_PRODUCTION_FILES: frozenset[Path] = frozenset()


def _python_files_under(root: Path) -> list[Path]:
    """Yield every .py file under ``root`` excluding caches and virtualenvs."""
    return [
        p
        for p in root.rglob("*.py")
        if "__pycache__" not in p.parts and ".venv" not in p.parts and "venv" not in p.parts
    ]


def _string_literals_in(file_path: Path) -> list[tuple[int, str]]:
    """Return (lineno, value) for every string literal in ``file_path``.

    Uses ``ast`` to avoid false positives from comments / docstrings;
    docstrings are ``ast.Constant`` too but we explicitly include them
    because a docstring is not a safe place to put a credential either.
    Returns an empty list on syntax-error files rather than crashing —
    a parse failure means the file has a separate bug and we don't want
    this test to mask it.
    """
    try:
        tree = ast.parse(file_path.read_text(encoding="utf-8"))
    except (SyntaxError, UnicodeDecodeError):
        return []
    return [
        (node.lineno, node.value)
        for node in ast.walk(tree)
        if isinstance(node, ast.Constant) and isinstance(node.value, str)
    ]


@pytest.mark.unit
def test_no_hardcoded_secrets_in_production_source() -> None:
    """SEC-AUTH-3 (SECURITY.md): no credential-shaped fallback strings in backend/app/.

    Walks every Python source file under ``backend/app/``. Flags string
    literals matching any pattern in ``FORBIDDEN_PATTERNS``. Allowlisted
    files (e.g. tests asserting we reject the leaked GHSA-gc24 token)
    are exempt via ``ALLOWED_PRODUCTION_FILES``.

    Failure here means a code change has reintroduced the GHSA-gc24
    failure mode: a string literal that production code could fall
    back to as a credential, defeating the env-var-or-fail design of
    ``_get_jwt_secret()``.
    """
    repo_root = Path(__file__).resolve().parents[3]
    production_root = repo_root / "backend" / "app"
    assert production_root.is_dir(), f"Expected backend/app/ at {production_root}"

    findings: list[str] = []
    for src in _python_files_under(production_root):
        relative = src.relative_to(repo_root)
        if relative in ALLOWED_PRODUCTION_FILES:
            continue
        for lineno, literal in _string_literals_in(src):
            literal_lower = literal.lower()
            for pattern in FORBIDDEN_PATTERNS:
                if pattern in literal_lower:
                    findings.append(f"  {relative}:{lineno} contains forbidden pattern '{pattern}': {literal!r}")
                    break

    assert not findings, (
        "Hardcoded credential-shaped strings found in production source — "
        "this is the GHSA-gc24-px2r-5qmf shape (CVSS 9.8 hardcoded JWT secret). "
        "See SECURITY.md rule 3 'No hardcoded fallback secrets'.\n\n" + "\n".join(findings)
    )


@pytest.mark.unit
def test_jwt_secret_loader_has_no_hardcoded_fallback() -> None:
    """SEC-AUTH-3 (SECURITY.md): _get_jwt_secret never returns a literal string.

    The post-GHSA-gc24 design of ``_get_jwt_secret`` reads from env, then
    file, then generates a random value via ``secrets.token_urlsafe``.
    No code path returns a string literal. This test asserts that
    structural property by walking the function's AST and confirming
    every ``return`` statement returns either a Name (variable) or a
    Call (function result), never an ast.Constant string literal.

    If this test fails, ``_get_jwt_secret`` has been modified to return
    a hardcoded value somewhere — likely as a "convenience default" that
    will end up in a shipped Docker image, which is exactly how the
    original GHSA-gc24 advisory happened.
    """
    repo_root = Path(__file__).resolve().parents[3]
    auth_module = repo_root / "backend" / "app" / "core" / "auth.py"
    tree = ast.parse(auth_module.read_text(encoding="utf-8"))

    loader: ast.FunctionDef | None = None
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef) and node.name == "_get_jwt_secret":
            loader = node
            break

    assert loader is not None, "_get_jwt_secret() not found in backend/app/core/auth.py — has it been renamed?"

    literal_returns: list[tuple[int, str]] = []
    for node in ast.walk(loader):
        if isinstance(node, ast.Return) and isinstance(node.value, ast.Constant) and isinstance(node.value.value, str):
            literal_returns.append((node.lineno, node.value.value))

    assert not literal_returns, (
        "_get_jwt_secret() has a string-literal return — this is the GHSA-gc24 vulnerability shape. "
        "Use os.environ + file storage + secrets.token_urlsafe; never return a hardcoded string.\n"
        + "\n".join(f"  auth.py:{ln}: returns {val!r}" for ln, val in literal_returns)
    )