"""Tests for ``backend.app.core.trace`` — the per-request trace ID plumbing that ties uvicorn HTTP access lines to the application log records produced while handling that request. These tests stay at the unit level: the ContextVar / filter / inbound- ID validator can each be exercised directly without spinning up a real FastAPI app, and going through Starlette's TestClient just to assert "the middleware sets a header" would obscure rather than illuminate the contract. """ from __future__ import annotations import asyncio import logging import pytest from backend.app.core.trace import ( TRACE_ID_PLACEHOLDER, TraceIDFilter, generate_trace_id, get_trace_id, normalise_inbound_trace_id, trace_id_var, ) @pytest.fixture(autouse=True) def _reset_trace_id(): """Each test gets a fresh ``trace_id_var`` — without the reset, a test that sets the var would leak its value into the next test running on the same event loop, producing surprising 'why is this other test seeing my ID?' failures.""" token = trace_id_var.set(TRACE_ID_PLACEHOLDER) try: yield finally: trace_id_var.reset(token) def _record(message: str = "irrelevant") -> logging.LogRecord: """Build a vanilla log record — the filter doesn't care about its contents, only the surrounding ContextVar value at filter time.""" return logging.LogRecord( name="test", level=logging.INFO, pathname="", lineno=0, msg=message, args=None, exc_info=None ) class TestPlaceholderWhenUnset: def test_get_trace_id_returns_placeholder_outside_request(self): """Code paths with no HTTP request scope (startup, MQTT callbacks, scheduled tasks) must see the placeholder rather than ``None``, so the format-string column is always populated and missing values stay greppable.""" assert get_trace_id() == TRACE_ID_PLACEHOLDER def test_filter_sets_placeholder_when_no_request_context(self): """The filter must annotate every record, including those emitted when no request is in flight — the format string would otherwise raise KeyError on those records.""" record = _record() assert TraceIDFilter().filter(record) is True assert record.trace_id == TRACE_ID_PLACEHOLDER class TestRequestScopePropagation: def test_filter_picks_up_active_request_id(self): """Inside a request, the ContextVar holds that request's ID and the filter copies it onto the record — this is the whole point of the plumbing.""" trace_id_var.set("abc12345") record = _record() TraceIDFilter().filter(record) assert record.trace_id == "abc12345" @pytest.mark.asyncio async def test_id_propagates_into_spawned_task(self): """asyncio copies the current context into ``create_task``, so background work spawned from inside a request inherits the same trace ID without explicit threading. This is why a ContextVar beats ``request.state``: state doesn't survive the hop.""" trace_id_var.set("parent01") captured: list[str] = [] async def _child(): captured.append(get_trace_id()) await asyncio.create_task(_child()) assert captured == ["parent01"] @pytest.mark.asyncio async def test_concurrent_requests_do_not_leak_ids_into_each_other(self): """Two concurrent requests each see only their own trace ID — if the filter ever started reading from the wrong context (e.g. a process-global) this test would catch it immediately.""" seen: dict[str, str] = {} async def _request(label: str, tid: str): trace_id_var.set(tid) # Yield to the scheduler so the other coroutine has a chance # to overwrite a poorly-scoped global if one existed. await asyncio.sleep(0) seen[label] = get_trace_id() await asyncio.gather( _request("a", "aaaaaaaa"), _request("b", "bbbbbbbb"), ) assert seen == {"a": "aaaaaaaa", "b": "bbbbbbbb"} class TestGenerateTraceId: def test_generated_ids_are_hex(self): tid = generate_trace_id() int(tid, 16) # raises ValueError if not hex assert tid def test_generated_ids_are_unique_across_calls(self): """secrets.token_hex; collisions across a handful of calls would signal a generator regression rather than statistical bad luck.""" ids = {generate_trace_id() for _ in range(200)} assert len(ids) == 200 class TestNormaliseInboundTraceId: """Hostile / buggy callers sending ``X-Trace-Id`` must NOT be able to push log-injection payloads (newlines, control chars, megabyte blobs) into bambuddy.log via the trace ID column. Anything that fails the gate gets ``None`` so the middleware mints fresh.""" def test_none_input_returns_none(self): assert normalise_inbound_trace_id(None) is None def test_empty_string_returns_none(self): """An explicit ``X-Trace-Id:`` header with empty value is indistinguishable from no header for our purposes — mint fresh. """ assert normalise_inbound_trace_id("") is None def test_short_alphanumeric_accepted(self): assert normalise_inbound_trace_id("abc123") == "abc123" def test_uuid_format_accepted(self): """32-char hex (UUID-style without dashes) is the most common real-world correlation ID format — must round-trip unchanged.""" uuid_like = "0123456789abcdef0123456789abcdef" assert normalise_inbound_trace_id(uuid_like) == uuid_like def test_dash_and_underscore_accepted(self): """Datadog / OpenTelemetry frequently use dashes between span components; underscores show up in some Bambu-internal IDs we might want to echo. Both stay in the whitelist.""" assert normalise_inbound_trace_id("trace-abc_123") == "trace-abc_123" @pytest.mark.parametrize( "hostile", [ "abc def", # space — could split log-line columns "abc\ndef", # newline — log injection "abc\rdef", # carriage return — log injection "abc\tdef", # tab — column drift 'abc"def', # quote — could break grep-friendly delimiters "abc;def", # semicolon — script-injection-shaped "abc