safe_path.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. """Containment-checked path joining.
  2. Single source of truth for joining a user-controlled string under a trusted
  3. parent directory. The two-vector arbitrary-file-write reported against
  4. ``backend/app/api/routes/projects.py::import_project_file`` traced to plain
  5. ``Path / user_string`` arithmetic with no resolve + containment check —
  6. attacker passed an absolute path, ``Path("/lib") / "/etc"`` collapsed to
  7. ``Path("/etc")``, and the next ``write_bytes`` landed wherever the attacker
  8. chose. This module is the answer.
  9. Every site that joins a path component coming from a request body, a ZIP
  10. ``namelist()``, an ``UploadFile.filename``, or any other attacker-controlled
  11. source MUST route through ``safe_join_under``. Sites that join trusted
  12. constants (settings paths, hardcoded subdirs) are not in scope — those should
  13. carry a ``# SEC-PATH-OK: <reason>`` marker so the CI backstop knows.
  14. """
  15. from __future__ import annotations
  16. from pathlib import Path
  17. from fastapi import HTTPException
  18. class PathTraversalError(ValueError):
  19. """Raised when a join attempt would escape the trusted parent.
  20. Callers in API-route context catch this and translate to ``HTTPException``
  21. via ``safe_join_under`` (which already raises HTTPException directly when
  22. invoked with ``http=True``). Non-route callers can catch the
  23. ``PathTraversalError`` and decide their own response shape.
  24. """
  25. def safe_join_under(parent: Path, *parts: str, http: bool = True) -> Path:
  26. """Join *parts* under *parent* and assert the result stays under it.
  27. Rejects:
  28. - empty / None / non-str parts;
  29. - parts containing NUL (``\\x00``);
  30. - parts starting with ``/`` or ``\\`` (absolute paths;
  31. ``Path("/lib") / "/etc"`` discards ``/lib``);
  32. - any sequence whose resolved form is not a descendant of *parent*'s
  33. resolved form (defeats ``..`` traversal even when the literal join
  34. doesn't look suspicious).
  35. Returns the resolved absolute path on success.
  36. When ``http=True`` (default; suitable for FastAPI routes), failures raise
  37. ``HTTPException(400, "Invalid path in upload")``. Set ``http=False`` to
  38. raise ``PathTraversalError`` instead — for non-route callers that need
  39. finer control over the response.
  40. """
  41. if not parts:
  42. _fail("safe_join_under called with no parts", http)
  43. for part in parts:
  44. if not isinstance(part, str):
  45. _fail(f"Path part has type {type(part).__name__}, expected str", http)
  46. if not part:
  47. _fail("Empty path part", http)
  48. if "\x00" in part:
  49. _fail("NUL byte in path part", http)
  50. # Reject literal absolute markers: pathlib collapses ``Path("/a") /
  51. # "/b"`` to ``Path("/b")`` so the catch-after-resolve below would also
  52. # fire, but rejecting up-front gives a clearer error and avoids
  53. # touching the filesystem.
  54. if part.startswith("/") or part.startswith("\\"):
  55. _fail("Absolute path part not allowed", http)
  56. parent_resolved = parent.resolve()
  57. candidate = parent
  58. for part in parts:
  59. candidate = candidate / part
  60. candidate_resolved = candidate.resolve()
  61. if not _is_relative_to(candidate_resolved, parent_resolved):
  62. _fail("Path escapes the parent directory", http)
  63. return candidate_resolved
  64. def assert_under(parent: Path, candidate: Path, *, http: bool = True) -> Path:
  65. """Assert that an already-joined *candidate* path is under *parent*.
  66. Use when you have an existing ``Path`` (e.g. from another helper that
  67. builds the path itself) and need a containment check before writing or
  68. deleting. Equivalent to ``safe_join_under`` minus the per-part input
  69. validation.
  70. """
  71. parent_resolved = parent.resolve()
  72. candidate_resolved = candidate.resolve()
  73. if not _is_relative_to(candidate_resolved, parent_resolved):
  74. _fail("Path escapes the parent directory", http)
  75. return candidate_resolved
  76. def _is_relative_to(child: Path, parent: Path) -> bool:
  77. # ``Path.is_relative_to`` exists in Python 3.9+. Bambuddy targets 3.11+
  78. # (per pyproject and the bug-report system info) so this is safe.
  79. try:
  80. return child.is_relative_to(parent)
  81. except AttributeError: # pragma: no cover - defensive
  82. try:
  83. child.relative_to(parent)
  84. return True
  85. except ValueError:
  86. return False
  87. def _fail(reason: str, http: bool) -> None:
  88. if http:
  89. raise HTTPException(status_code=400, detail="Invalid path in upload")
  90. raise PathTraversalError(reason)