failure_analysis.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. from collections import defaultdict
  2. from datetime import date, datetime, time, timedelta, timezone
  3. from sqlalchemy import and_, func, select
  4. from sqlalchemy.ext.asyncio import AsyncSession
  5. from backend.app.models.print_log import PrintLogEntry
  6. from backend.app.models.printer import Printer
  7. class FailureAnalysisService:
  8. """Service for analyzing print failure patterns.
  9. Reads from print_log_entries (per-event data) rather than print_archives
  10. so reprints contribute each run and orphan events (archive deleted, log
  11. row survived via ON DELETE SET NULL) still count consistently with
  12. Quick Stats. The archive-based predecessor diverged from Quick Stats
  13. after #1378 moved the rest of the page to per-event aggregation.
  14. """
  15. def __init__(self, db: AsyncSession):
  16. self.db = db
  17. async def analyze_failures(
  18. self,
  19. days: int | None = None,
  20. date_from: date | None = None,
  21. date_to: date | None = None,
  22. printer_id: int | None = None,
  23. project_id: int | None = None,
  24. created_by_id: int | None = None,
  25. ) -> dict:
  26. """Analyze failure patterns across logged print events."""
  27. # Build base query — separate date vs non-date filters for trend reuse
  28. base_filter = []
  29. non_date_filter = []
  30. if date_from or date_to:
  31. if date_from:
  32. dt_from = datetime.combine(date_from, time.min, tzinfo=timezone.utc)
  33. base_filter.append(PrintLogEntry.created_at >= dt_from)
  34. if date_to:
  35. dt_to = datetime.combine(date_to, time.max, tzinfo=timezone.utc)
  36. base_filter.append(PrintLogEntry.created_at <= dt_to)
  37. range_start = dt_from if date_from else datetime.now(timezone.utc) - timedelta(days=365)
  38. range_end = dt_to if date_to else datetime.now(timezone.utc)
  39. effective_days = max((range_end - range_start).days, 1)
  40. else:
  41. effective_days = days if days is not None else 30
  42. cutoff_date = datetime.now(timezone.utc) - timedelta(days=effective_days)
  43. base_filter.append(PrintLogEntry.created_at >= cutoff_date)
  44. if printer_id:
  45. non_date_filter.append(PrintLogEntry.printer_id == printer_id)
  46. # project_id is an archive-level concept; PrintLogEntry has no project
  47. # link, so we resolve it by archive_id where present.
  48. if project_id:
  49. from backend.app.models.archive import PrintArchive
  50. project_archive_ids = await self.db.execute(
  51. select(PrintArchive.id).where(PrintArchive.project_id == project_id)
  52. )
  53. archive_ids = [row[0] for row in project_archive_ids.fetchall()]
  54. if archive_ids:
  55. non_date_filter.append(PrintLogEntry.archive_id.in_(archive_ids))
  56. else:
  57. # No archives in this project → nothing to count
  58. non_date_filter.append(PrintLogEntry.id.is_(None))
  59. if created_by_id is not None:
  60. if created_by_id == -1:
  61. non_date_filter.append(PrintLogEntry.created_by_id.is_(None))
  62. else:
  63. non_date_filter.append(PrintLogEntry.created_by_id == created_by_id)
  64. base_filter.extend(non_date_filter)
  65. # Total counts
  66. total_result = await self.db.execute(select(func.count(PrintLogEntry.id)).where(and_(*base_filter)))
  67. total_prints = total_result.scalar() or 0
  68. successful_result = await self.db.execute(
  69. select(func.count(PrintLogEntry.id)).where(and_(*base_filter, PrintLogEntry.status == "completed"))
  70. )
  71. successful_prints = successful_result.scalar() or 0
  72. failed_result = await self.db.execute(
  73. select(func.count(PrintLogEntry.id)).where(
  74. and_(*base_filter, PrintLogEntry.status.in_(["failed", "aborted"]))
  75. )
  76. )
  77. failed_prints = failed_result.scalar() or 0
  78. # Failure rate divides by quality-outcome prints only — a cancelled or
  79. # skipped print is neither a success nor a failure of the printer, so
  80. # including it in the denominator silently lowered the displayed rate
  81. # whenever the user stopped jobs (#1390). Total Prints (the absolute
  82. # count incl. cancelled) is still returned separately for the "X / Y
  83. # prints failed" caption.
  84. outcome_prints = successful_prints + failed_prints
  85. failure_rate = (failed_prints / outcome_prints * 100) if outcome_prints > 0 else 0
  86. # Failures by reason
  87. reason_result = await self.db.execute(
  88. select(
  89. PrintLogEntry.failure_reason,
  90. func.count(PrintLogEntry.id).label("count"),
  91. )
  92. .where(and_(*base_filter, PrintLogEntry.status.in_(["failed", "aborted"])))
  93. .group_by(PrintLogEntry.failure_reason)
  94. .order_by(func.count(PrintLogEntry.id).desc())
  95. )
  96. failures_by_reason = {(row[0] or "Unknown"): row[1] for row in reason_result.fetchall()}
  97. # Failures by filament type
  98. filament_result = await self.db.execute(
  99. select(
  100. PrintLogEntry.filament_type,
  101. func.count(PrintLogEntry.id).label("count"),
  102. )
  103. .where(and_(*base_filter, PrintLogEntry.status.in_(["failed", "aborted"])))
  104. .group_by(PrintLogEntry.filament_type)
  105. .order_by(func.count(PrintLogEntry.id).desc())
  106. )
  107. failures_by_filament = {(row[0] or "Unknown"): row[1] for row in filament_result.fetchall()}
  108. # Failures by printer
  109. printer_result = await self.db.execute(
  110. select(
  111. PrintLogEntry.printer_id,
  112. func.count(PrintLogEntry.id).label("count"),
  113. )
  114. .where(
  115. and_(
  116. *base_filter,
  117. PrintLogEntry.status.in_(["failed", "aborted"]),
  118. PrintLogEntry.printer_id.isnot(None),
  119. )
  120. )
  121. .group_by(PrintLogEntry.printer_id)
  122. .order_by(func.count(PrintLogEntry.id).desc())
  123. )
  124. failures_by_printer_id = {row[0]: row[1] for row in printer_result.fetchall()}
  125. # Get printer names
  126. if failures_by_printer_id:
  127. printers_result = await self.db.execute(
  128. select(Printer.id, Printer.name).where(Printer.id.in_(failures_by_printer_id.keys()))
  129. )
  130. printer_names = {row[0]: row[1] for row in printers_result.fetchall()}
  131. failures_by_printer = {
  132. printer_names.get(pid, f"Printer {pid}"): count for pid, count in failures_by_printer_id.items()
  133. }
  134. else:
  135. failures_by_printer = {}
  136. # Failures by hour of day
  137. failed_events_result = await self.db.execute(
  138. select(PrintLogEntry.started_at).where(
  139. and_(
  140. *base_filter,
  141. PrintLogEntry.status.in_(["failed", "aborted"]),
  142. PrintLogEntry.started_at.isnot(None),
  143. )
  144. )
  145. )
  146. failures_by_hour = defaultdict(int)
  147. for (started_at,) in failed_events_result.fetchall():
  148. if started_at:
  149. hour = started_at.hour
  150. failures_by_hour[hour] += 1
  151. failures_by_hour_complete = {h: failures_by_hour.get(h, 0) for h in range(24)}
  152. # Recent failures
  153. recent_result = await self.db.execute(
  154. select(PrintLogEntry)
  155. .where(and_(*base_filter, PrintLogEntry.status.in_(["failed", "aborted"])))
  156. .order_by(PrintLogEntry.created_at.desc())
  157. .limit(10)
  158. )
  159. recent_failures = [
  160. {
  161. "id": e.archive_id,
  162. "print_name": e.print_name,
  163. "failure_reason": e.failure_reason,
  164. "filament_type": e.filament_type,
  165. "printer_id": e.printer_id,
  166. "created_at": e.created_at.isoformat() if e.created_at else None,
  167. }
  168. for e in recent_result.scalars().all()
  169. ]
  170. # Failure rate trend (by week)
  171. trend_data = []
  172. num_weeks = max(effective_days // 7, 1)
  173. for i in range(num_weeks):
  174. week_end = datetime.now(timezone.utc) - timedelta(weeks=i)
  175. week_start = week_end - timedelta(weeks=1)
  176. week_filter = [
  177. PrintLogEntry.created_at >= week_start,
  178. PrintLogEntry.created_at < week_end,
  179. *non_date_filter,
  180. ]
  181. week_total = await self.db.execute(select(func.count(PrintLogEntry.id)).where(and_(*week_filter)))
  182. week_successful = await self.db.execute(
  183. select(func.count(PrintLogEntry.id)).where(and_(*week_filter, PrintLogEntry.status == "completed"))
  184. )
  185. week_failed = await self.db.execute(
  186. select(func.count(PrintLogEntry.id)).where(
  187. and_(*week_filter, PrintLogEntry.status.in_(["failed", "aborted"]))
  188. )
  189. )
  190. total = week_total.scalar() or 0
  191. successful = week_successful.scalar() or 0
  192. failed = week_failed.scalar() or 0
  193. week_outcome = successful + failed
  194. rate = (failed / week_outcome * 100) if week_outcome > 0 else 0
  195. trend_data.append(
  196. {
  197. "week_start": week_start.date().isoformat(),
  198. "total_prints": total,
  199. "failed_prints": failed,
  200. "failure_rate": round(rate, 1),
  201. }
  202. )
  203. trend_data.reverse() # Oldest first
  204. return {
  205. "period_days": effective_days,
  206. "total_prints": total_prints,
  207. "failed_prints": failed_prints,
  208. "failure_rate": round(failure_rate, 1),
  209. "failures_by_reason": failures_by_reason,
  210. "failures_by_filament": failures_by_filament,
  211. "failures_by_printer": failures_by_printer,
  212. "failures_by_hour": failures_by_hour_complete,
  213. "recent_failures": recent_failures,
  214. "trend": trend_data,
  215. }