test_trace.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. """Tests for ``backend.app.core.trace`` — the per-request trace ID
  2. plumbing that ties uvicorn HTTP access lines to the application log
  3. records produced while handling that request.
  4. These tests stay at the unit level: the ContextVar / filter / inbound-
  5. ID validator can each be exercised directly without spinning up a real
  6. FastAPI app, and going through Starlette's TestClient just to assert
  7. "the middleware sets a header" would obscure rather than illuminate the
  8. contract.
  9. """
  10. from __future__ import annotations
  11. import asyncio
  12. import logging
  13. import pytest
  14. from backend.app.core.trace import (
  15. TRACE_ID_PLACEHOLDER,
  16. TraceIDFilter,
  17. generate_trace_id,
  18. get_trace_id,
  19. normalise_inbound_trace_id,
  20. trace_id_var,
  21. )
  22. @pytest.fixture(autouse=True)
  23. def _reset_trace_id():
  24. """Each test gets a fresh ``trace_id_var`` — without the reset, a
  25. test that sets the var would leak its value into the next test
  26. running on the same event loop, producing surprising 'why is this
  27. other test seeing my ID?' failures."""
  28. token = trace_id_var.set(TRACE_ID_PLACEHOLDER)
  29. try:
  30. yield
  31. finally:
  32. trace_id_var.reset(token)
  33. def _record(message: str = "irrelevant") -> logging.LogRecord:
  34. """Build a vanilla log record — the filter doesn't care about its
  35. contents, only the surrounding ContextVar value at filter time."""
  36. return logging.LogRecord(
  37. name="test", level=logging.INFO, pathname="", lineno=0, msg=message, args=None, exc_info=None
  38. )
  39. class TestPlaceholderWhenUnset:
  40. def test_get_trace_id_returns_placeholder_outside_request(self):
  41. """Code paths with no HTTP request scope (startup, MQTT
  42. callbacks, scheduled tasks) must see the placeholder rather
  43. than ``None``, so the format-string column is always populated
  44. and missing values stay greppable."""
  45. assert get_trace_id() == TRACE_ID_PLACEHOLDER
  46. def test_filter_sets_placeholder_when_no_request_context(self):
  47. """The filter must annotate every record, including those
  48. emitted when no request is in flight — the format string would
  49. otherwise raise KeyError on those records."""
  50. record = _record()
  51. assert TraceIDFilter().filter(record) is True
  52. assert record.trace_id == TRACE_ID_PLACEHOLDER
  53. class TestRequestScopePropagation:
  54. def test_filter_picks_up_active_request_id(self):
  55. """Inside a request, the ContextVar holds that request's ID and
  56. the filter copies it onto the record — this is the whole point
  57. of the plumbing."""
  58. trace_id_var.set("abc12345")
  59. record = _record()
  60. TraceIDFilter().filter(record)
  61. assert record.trace_id == "abc12345"
  62. @pytest.mark.asyncio
  63. async def test_id_propagates_into_spawned_task(self):
  64. """asyncio copies the current context into ``create_task``, so
  65. background work spawned from inside a request inherits the same
  66. trace ID without explicit threading. This is why a ContextVar
  67. beats ``request.state``: state doesn't survive the hop."""
  68. trace_id_var.set("parent01")
  69. captured: list[str] = []
  70. async def _child():
  71. captured.append(get_trace_id())
  72. await asyncio.create_task(_child())
  73. assert captured == ["parent01"]
  74. @pytest.mark.asyncio
  75. async def test_concurrent_requests_do_not_leak_ids_into_each_other(self):
  76. """Two concurrent requests each see only their own trace ID —
  77. if the filter ever started reading from the wrong context (e.g.
  78. a process-global) this test would catch it immediately."""
  79. seen: dict[str, str] = {}
  80. async def _request(label: str, tid: str):
  81. trace_id_var.set(tid)
  82. # Yield to the scheduler so the other coroutine has a chance
  83. # to overwrite a poorly-scoped global if one existed.
  84. await asyncio.sleep(0)
  85. seen[label] = get_trace_id()
  86. await asyncio.gather(
  87. _request("a", "aaaaaaaa"),
  88. _request("b", "bbbbbbbb"),
  89. )
  90. assert seen == {"a": "aaaaaaaa", "b": "bbbbbbbb"}
  91. class TestGenerateTraceId:
  92. def test_generated_ids_are_hex(self):
  93. tid = generate_trace_id()
  94. int(tid, 16) # raises ValueError if not hex
  95. assert tid
  96. def test_generated_ids_are_unique_across_calls(self):
  97. """secrets.token_hex; collisions across a handful of calls would
  98. signal a generator regression rather than statistical bad luck."""
  99. ids = {generate_trace_id() for _ in range(200)}
  100. assert len(ids) == 200
  101. class TestNormaliseInboundTraceId:
  102. """Hostile / buggy callers sending ``X-Trace-Id`` must NOT be able
  103. to push log-injection payloads (newlines, control chars, megabyte
  104. blobs) into bambuddy.log via the trace ID column. Anything that
  105. fails the gate gets ``None`` so the middleware mints fresh."""
  106. def test_none_input_returns_none(self):
  107. assert normalise_inbound_trace_id(None) is None
  108. def test_empty_string_returns_none(self):
  109. """An explicit ``X-Trace-Id:`` header with empty value is
  110. indistinguishable from no header for our purposes — mint fresh.
  111. """
  112. assert normalise_inbound_trace_id("") is None
  113. def test_short_alphanumeric_accepted(self):
  114. assert normalise_inbound_trace_id("abc123") == "abc123"
  115. def test_uuid_format_accepted(self):
  116. """32-char hex (UUID-style without dashes) is the most common
  117. real-world correlation ID format — must round-trip unchanged."""
  118. uuid_like = "0123456789abcdef0123456789abcdef"
  119. assert normalise_inbound_trace_id(uuid_like) == uuid_like
  120. def test_dash_and_underscore_accepted(self):
  121. """Datadog / OpenTelemetry frequently use dashes between span
  122. components; underscores show up in some Bambu-internal IDs we
  123. might want to echo. Both stay in the whitelist."""
  124. assert normalise_inbound_trace_id("trace-abc_123") == "trace-abc_123"
  125. @pytest.mark.parametrize(
  126. "hostile",
  127. [
  128. "abc def", # space — could split log-line columns
  129. "abc\ndef", # newline — log injection
  130. "abc\rdef", # carriage return — log injection
  131. "abc\tdef", # tab — column drift
  132. 'abc"def', # quote — could break grep-friendly delimiters
  133. "abc;def", # semicolon — script-injection-shaped
  134. "abc<def", # angle bracket — XSS-shaped
  135. "abc/def", # slash — looks like a path
  136. ],
  137. )
  138. def test_hostile_payloads_rejected(self, hostile):
  139. """Each rejected character is one the regex whitelist intentionally
  140. excludes; this parametrised set documents the threat model and
  141. will fail loud if the regex ever drifts."""
  142. assert normalise_inbound_trace_id(hostile) is None
  143. def test_overlong_input_rejected(self):
  144. """A 1KB X-Trace-Id should never end up in every log line for
  145. the duration of a request — bound it strictly."""
  146. assert normalise_inbound_trace_id("a" * 65) is None
  147. def test_max_length_boundary_accepted(self):
  148. """The configured cap (currently 64) must accept exactly 64
  149. chars; one off-by-one would silently reject UUID-like IDs that
  150. happen to land at the boundary."""
  151. assert normalise_inbound_trace_id("a" * 64) == "a" * 64
  152. class TestFilterMustBeAttachedToHandlerNotLogger:
  153. """A filter on a Logger only fires for records that *originate* at that
  154. logger — records propagated up from child loggers (every backend.* logger
  155. in the app) never trigger it. Attaching TraceIDFilter to root_logger meant
  156. child-logger records arrived at the file handler with no trace_id
  157. attribute, the formatter raised KeyError, and the record was silently
  158. dropped — manifesting as "logs/bambuddy.log only shows logs partially".
  159. The filter must live on each *handler* so every record passing through it
  160. gets annotated regardless of which logger emitted it."""
  161. def test_handler_level_filter_fires_on_child_logger_propagation(self):
  162. import io
  163. root = logging.getLogger("test_trace_filter_handler_path")
  164. root.setLevel(logging.DEBUG)
  165. root.handlers.clear()
  166. root.filters.clear()
  167. captured = io.StringIO()
  168. handler = logging.StreamHandler(captured)
  169. handler.setFormatter(logging.Formatter("%(trace_id)s|%(message)s"))
  170. handler.addFilter(TraceIDFilter())
  171. root.addHandler(handler)
  172. child = logging.getLogger("test_trace_filter_handler_path.child")
  173. try:
  174. child.info("hi from child")
  175. handler.flush()
  176. assert f"{TRACE_ID_PLACEHOLDER}|hi from child" in captured.getvalue()
  177. finally:
  178. root.handlers.clear()
  179. root.filters.clear()