test_restore_sqlite_wal_safety.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. """Regression tests for the SQLite WAL leftover bug in /settings/restore.
  2. Background — see #1211 / #668. The live database runs in WAL mode
  3. (``database.py:19``: ``PRAGMA journal_mode = WAL``). Anything written to
  4. the database before the restore call that hasn't been checkpointed yet
  5. sits in ``bambuddy.db-wal`` with valid checksums. The original restore
  6. implementation used ``shutil.copy2(backup_db, db_path)`` which only
  7. overwrites the main DB file's content, so on the next open SQLite found
  8. the stale WAL and silently re-applied those page-level writes on top of
  9. the restored DB — partially clobbering it with fresh-install state.
  10. These tests exercise the bug condition deterministically (using the
  11. classic reader-snapshot trick to prevent SQLite's close-time checkpoint)
  12. and pin that the production restore path — the SQLite online backup API
  13. called via ``src_conn.backup(dst_conn)`` — produces a clean restored DB
  14. even with un-checkpointed WAL frames sitting on disk.
  15. """
  16. from __future__ import annotations
  17. import shutil
  18. import sqlite3
  19. from pathlib import Path
  20. import pytest
  21. def _seed_live_db_with_uncheckpointed_wal(live_db: Path) -> sqlite3.Connection:
  22. """Create a SQLite DB in WAL mode with frames that haven't been
  23. checkpointed to the main file. Returns a still-open reader so the
  24. caller can keep the WAL alive and the close-time checkpoint blocked
  25. until the test is ready to assert.
  26. The returned connection holds an open ``BEGIN`` transaction, which is
  27. what prevents SQLite from auto-checkpointing the WAL on the writer's
  28. close. In production this role is played by the route handler's own
  29. ``db: Depends(get_db)`` session — FastAPI's dependency injection keeps
  30. that session alive across the entire request, ``engine.dispose()``
  31. doesn't touch checked-out connections, and the WAL accordingly
  32. persists with un-checkpointed frames at the moment the file copy
  33. would happen.
  34. """
  35. writer = sqlite3.connect(str(live_db))
  36. writer.execute("PRAGMA journal_mode = WAL")
  37. writer.execute("CREATE TABLE settings (key TEXT PRIMARY KEY, value TEXT)")
  38. # These rows are the "fresh-install defaults" — what would clobber the
  39. # restored DB if the WAL were re-applied.
  40. writer.execute("INSERT INTO settings VALUES ('energy_cost_per_kwh', '0.15')")
  41. writer.execute("INSERT INTO settings VALUES ('currency', 'EUR')")
  42. writer.commit()
  43. reader = sqlite3.connect(str(live_db))
  44. reader.execute("BEGIN")
  45. reader.execute("SELECT * FROM settings").fetchall() # acquires a snapshot
  46. writer.close() # WAL persists because reader still holds the snapshot
  47. # Sanity: the WAL must actually contain frames, otherwise the test is
  48. # vacuous (we'd be testing the safe case, not the bug condition).
  49. wal = live_db.parent / f"{live_db.name}-wal"
  50. assert wal.exists() and wal.stat().st_size > 0, (
  51. "Test setup failed to leave un-checkpointed WAL frames; the bug condition isn't being exercised."
  52. )
  53. return reader
  54. def _make_backup_db(backup_db: Path, *, energy: str, currency: str) -> None:
  55. """Build a 'backup' SQLite DB at the given path with the user's actual
  56. settings. Same schema as ``_seed_live_db_with_uncheckpointed_wal`` so
  57. a successful restore should replace the live DB row-for-row."""
  58. conn = sqlite3.connect(str(backup_db))
  59. try:
  60. conn.execute("CREATE TABLE settings (key TEXT PRIMARY KEY, value TEXT)")
  61. conn.execute("INSERT INTO settings VALUES (?, ?)", ("energy_cost_per_kwh", energy))
  62. conn.execute("INSERT INTO settings VALUES (?, ?)", ("currency", currency))
  63. conn.commit()
  64. finally:
  65. conn.close()
  66. def _read_settings(db_path: Path) -> dict[str, str]:
  67. """Open a fresh connection and return the settings rows as a dict."""
  68. conn = sqlite3.connect(str(db_path))
  69. try:
  70. rows = conn.execute("SELECT key, value FROM settings").fetchall()
  71. return dict(rows)
  72. finally:
  73. conn.close()
  74. def test_shutil_copy_loses_to_stale_wal(tmp_path):
  75. """Pin the bug: ``shutil.copy2`` over a live DB with un-checkpointed
  76. WAL leaves the WAL behind, and on the next open SQLite re-applies
  77. those frames on top of the copied content. The user sees a
  78. "successful" restore that mostly reverted to fresh-install defaults
  79. (energy=0.15, currency=EUR) instead of their values (0.12, USD).
  80. Pinned here so a future "small simplification" that replaces the
  81. backup API call with a file copy can't silently re-introduce the bug.
  82. """
  83. live = tmp_path / "live.db"
  84. backup = tmp_path / "backup.db"
  85. reader = _seed_live_db_with_uncheckpointed_wal(live)
  86. _make_backup_db(backup, energy="0.12", currency="USD")
  87. # The buggy restore: file copy over the live DB.
  88. shutil.copy2(backup, live)
  89. reader.close()
  90. settings = _read_settings(live)
  91. # The bug manifests as the live DB's WAL frames overwriting the
  92. # restored content. We pin the symptom directly: at least one of the
  93. # user's settings was clobbered by the fresh-install defaults.
  94. assert settings != {"energy_cost_per_kwh": "0.12", "currency": "USD"}, (
  95. "Expected the shutil.copy2 path to lose data to WAL leftover, "
  96. "but the restore was clean. If this assertion starts failing, "
  97. "either the test setup no longer reproduces the bug condition "
  98. "or SQLite's behaviour changed — re-investigate before relaxing."
  99. )
  100. def test_sqlite_backup_api_replaces_live_db_safely(tmp_path):
  101. """Pin the fix: ``src.backup(dst)`` (SQLite online backup API) over a
  102. live DB that has un-checkpointed WAL frames produces a restored DB
  103. with exactly the backup contents. No fresh-install state leaks
  104. through.
  105. Mirrors the production path in ``backend/app/api/routes/settings.py``
  106. (``restore_backup``) so a regression in either the route or the
  107. helper used by it surfaces here.
  108. """
  109. live = tmp_path / "live.db"
  110. backup = tmp_path / "backup.db"
  111. reader = _seed_live_db_with_uncheckpointed_wal(live)
  112. _make_backup_db(backup, energy="0.12", currency="USD")
  113. # The production path: SQLite online backup API.
  114. src_conn = sqlite3.connect(str(backup))
  115. try:
  116. dst_conn = sqlite3.connect(str(live))
  117. try:
  118. src_conn.backup(dst_conn)
  119. finally:
  120. dst_conn.close()
  121. finally:
  122. src_conn.close()
  123. reader.close()
  124. settings = _read_settings(live)
  125. assert settings == {"energy_cost_per_kwh": "0.12", "currency": "USD"}, (
  126. f"Restore lost or corrupted user data. Got {settings!r}. If "
  127. "energy_cost_per_kwh is back to 0.15 or currency is back to EUR "
  128. "the WAL leftover bug has regressed — see #1211."
  129. )
  130. def test_sqlite_backup_api_works_when_no_wal_frames(tmp_path):
  131. """Defensive: the fix must also work in the simple case where the
  132. live DB has no leftover WAL (e.g. fresh container, restore as the
  133. very first action). Failing here would indicate the production path
  134. has accidentally become specific to the WAL-leftover scenario.
  135. """
  136. live = tmp_path / "live.db"
  137. backup = tmp_path / "backup.db"
  138. # Set up a live DB but force a checkpoint so WAL is empty.
  139. conn = sqlite3.connect(str(live))
  140. conn.execute("PRAGMA journal_mode = WAL")
  141. conn.execute("CREATE TABLE settings (key TEXT PRIMARY KEY, value TEXT)")
  142. conn.execute("INSERT INTO settings VALUES ('energy_cost_per_kwh', '0.15')")
  143. conn.commit()
  144. conn.execute("PRAGMA wal_checkpoint(TRUNCATE)")
  145. conn.close()
  146. _make_backup_db(backup, energy="0.12", currency="USD")
  147. src_conn = sqlite3.connect(str(backup))
  148. try:
  149. dst_conn = sqlite3.connect(str(live))
  150. try:
  151. src_conn.backup(dst_conn)
  152. finally:
  153. dst_conn.close()
  154. finally:
  155. src_conn.close()
  156. settings = _read_settings(live)
  157. assert settings == {"energy_cost_per_kwh": "0.12", "currency": "USD"}
  158. @pytest.mark.parametrize("backup_size_pages", [1, 100, 1000])
  159. def test_sqlite_backup_api_handles_various_db_sizes(tmp_path, backup_size_pages):
  160. """The backup API copies in 4 KB pages — make sure single-page,
  161. medium, and multi-page DBs all round-trip correctly. A regression in
  162. backup-API usage that only manifested at one size would otherwise
  163. slip through.
  164. """
  165. live = tmp_path / "live.db"
  166. backup = tmp_path / "backup.db"
  167. # Live with un-checkpointed WAL (the bug condition).
  168. reader = _seed_live_db_with_uncheckpointed_wal(live)
  169. # Build a backup DB sized to roughly the requested page count.
  170. # 4 KB pages ≈ 100 INTEGER rows per page; over-provision a bit.
  171. conn = sqlite3.connect(str(backup))
  172. conn.execute("CREATE TABLE settings (key TEXT PRIMARY KEY, value TEXT)")
  173. conn.execute("CREATE TABLE bulk (id INTEGER PRIMARY KEY, payload TEXT)")
  174. conn.execute("INSERT INTO settings VALUES ('energy_cost_per_kwh', '0.12')")
  175. rows_needed = backup_size_pages * 50
  176. conn.executemany(
  177. "INSERT INTO bulk (payload) VALUES (?)",
  178. [("x" * 80,) for _ in range(rows_needed)],
  179. )
  180. conn.commit()
  181. conn.close()
  182. src_conn = sqlite3.connect(str(backup))
  183. try:
  184. dst_conn = sqlite3.connect(str(live))
  185. try:
  186. src_conn.backup(dst_conn)
  187. finally:
  188. dst_conn.close()
  189. finally:
  190. src_conn.close()
  191. reader.close()
  192. # Verify both tables round-tripped intact.
  193. conn = sqlite3.connect(str(live))
  194. try:
  195. energy = conn.execute("SELECT value FROM settings WHERE key = 'energy_cost_per_kwh'").fetchone()
  196. bulk_count = conn.execute("SELECT count(*) FROM bulk").fetchone()
  197. finally:
  198. conn.close()
  199. assert energy == ("0.12",), f"Expected '0.12', got {energy!r}"
  200. assert bulk_count == (rows_needed,), f"Bulk table size mismatch: expected {rows_needed} rows, got {bulk_count!r}"