failure_analysis.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. from collections import defaultdict
  2. from datetime import date, datetime, time, timedelta, timezone
  3. from sqlalchemy import and_, func, select
  4. from sqlalchemy.ext.asyncio import AsyncSession
  5. from backend.app.models.archive import PrintArchive
  6. from backend.app.models.printer import Printer
  7. class FailureAnalysisService:
  8. """Service for analyzing print failure patterns."""
  9. def __init__(self, db: AsyncSession):
  10. self.db = db
  11. async def analyze_failures(
  12. self,
  13. days: int | None = None,
  14. date_from: date | None = None,
  15. date_to: date | None = None,
  16. printer_id: int | None = None,
  17. project_id: int | None = None,
  18. ) -> dict:
  19. """Analyze failure patterns across archives.
  20. Args:
  21. days: Number of days to analyze (fallback when no date range)
  22. date_from: Start date filter (inclusive)
  23. date_to: End date filter (inclusive)
  24. printer_id: Optional filter by printer
  25. project_id: Optional filter by project
  26. Returns:
  27. Dictionary with failure analysis results
  28. """
  29. # Build base query — separate date vs non-date filters for trend reuse
  30. base_filter = []
  31. non_date_filter = []
  32. if date_from or date_to:
  33. if date_from:
  34. dt_from = datetime.combine(date_from, time.min, tzinfo=timezone.utc)
  35. base_filter.append(PrintArchive.created_at >= dt_from)
  36. if date_to:
  37. dt_to = datetime.combine(date_to, time.max, tzinfo=timezone.utc)
  38. base_filter.append(PrintArchive.created_at <= dt_to)
  39. # Compute effective span for trend
  40. range_start = dt_from if date_from else datetime.now(timezone.utc) - timedelta(days=365)
  41. range_end = dt_to if date_to else datetime.now(timezone.utc)
  42. effective_days = max((range_end - range_start).days, 1)
  43. else:
  44. effective_days = days if days is not None else 30
  45. cutoff_date = datetime.now(timezone.utc) - timedelta(days=effective_days)
  46. base_filter.append(PrintArchive.created_at >= cutoff_date)
  47. if printer_id:
  48. non_date_filter.append(PrintArchive.printer_id == printer_id)
  49. if project_id:
  50. non_date_filter.append(PrintArchive.project_id == project_id)
  51. base_filter.extend(non_date_filter)
  52. # Total counts
  53. total_result = await self.db.execute(select(func.count(PrintArchive.id)).where(and_(*base_filter)))
  54. total_prints = total_result.scalar() or 0
  55. failed_result = await self.db.execute(
  56. select(func.count(PrintArchive.id)).where(
  57. and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"]))
  58. )
  59. )
  60. failed_prints = failed_result.scalar() or 0
  61. failure_rate = (failed_prints / total_prints * 100) if total_prints > 0 else 0
  62. # Failures by reason
  63. reason_result = await self.db.execute(
  64. select(
  65. PrintArchive.failure_reason,
  66. func.count(PrintArchive.id).label("count"),
  67. )
  68. .where(and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"])))
  69. .group_by(PrintArchive.failure_reason)
  70. .order_by(func.count(PrintArchive.id).desc())
  71. )
  72. failures_by_reason = {(row[0] or "Unknown"): row[1] for row in reason_result.fetchall()}
  73. # Failures by filament type
  74. filament_result = await self.db.execute(
  75. select(
  76. PrintArchive.filament_type,
  77. func.count(PrintArchive.id).label("count"),
  78. )
  79. .where(and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"])))
  80. .group_by(PrintArchive.filament_type)
  81. .order_by(func.count(PrintArchive.id).desc())
  82. )
  83. failures_by_filament = {(row[0] or "Unknown"): row[1] for row in filament_result.fetchall()}
  84. # Failures by printer
  85. printer_result = await self.db.execute(
  86. select(
  87. PrintArchive.printer_id,
  88. func.count(PrintArchive.id).label("count"),
  89. )
  90. .where(
  91. and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"]), PrintArchive.printer_id.isnot(None))
  92. )
  93. .group_by(PrintArchive.printer_id)
  94. .order_by(func.count(PrintArchive.id).desc())
  95. )
  96. failures_by_printer_id = {row[0]: row[1] for row in printer_result.fetchall()}
  97. # Get printer names
  98. if failures_by_printer_id:
  99. printers_result = await self.db.execute(
  100. select(Printer.id, Printer.name).where(Printer.id.in_(failures_by_printer_id.keys()))
  101. )
  102. printer_names = {row[0]: row[1] for row in printers_result.fetchall()}
  103. failures_by_printer = {
  104. printer_names.get(pid, f"Printer {pid}"): count for pid, count in failures_by_printer_id.items()
  105. }
  106. else:
  107. failures_by_printer = {}
  108. # Failures by hour of day
  109. failed_archives_result = await self.db.execute(
  110. select(PrintArchive.started_at).where(
  111. and_(
  112. *base_filter,
  113. PrintArchive.status.in_(["failed", "aborted"]),
  114. PrintArchive.started_at.isnot(None),
  115. )
  116. )
  117. )
  118. failures_by_hour = defaultdict(int)
  119. for (started_at,) in failed_archives_result.fetchall():
  120. if started_at:
  121. hour = started_at.hour
  122. failures_by_hour[hour] += 1
  123. # Convert to dict with all 24 hours
  124. failures_by_hour_complete = {h: failures_by_hour.get(h, 0) for h in range(24)}
  125. # Recent failures
  126. recent_result = await self.db.execute(
  127. select(PrintArchive)
  128. .where(and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"])))
  129. .order_by(PrintArchive.created_at.desc())
  130. .limit(10)
  131. )
  132. recent_failures = [
  133. {
  134. "id": a.id,
  135. "print_name": a.print_name or a.filename,
  136. "failure_reason": a.failure_reason,
  137. "filament_type": a.filament_type,
  138. "printer_id": a.printer_id,
  139. "created_at": a.created_at.isoformat() if a.created_at else None,
  140. }
  141. for a in recent_result.scalars().all()
  142. ]
  143. # Failure rate trend (by week)
  144. trend_data = []
  145. num_weeks = max(effective_days // 7, 1)
  146. for i in range(num_weeks):
  147. week_end = datetime.now(timezone.utc) - timedelta(weeks=i)
  148. week_start = week_end - timedelta(weeks=1)
  149. week_filter = [
  150. PrintArchive.created_at >= week_start,
  151. PrintArchive.created_at < week_end,
  152. *non_date_filter,
  153. ]
  154. week_total = await self.db.execute(select(func.count(PrintArchive.id)).where(and_(*week_filter)))
  155. week_failed = await self.db.execute(
  156. select(func.count(PrintArchive.id)).where(
  157. and_(*week_filter, PrintArchive.status.in_(["failed", "aborted"]))
  158. )
  159. )
  160. total = week_total.scalar() or 0
  161. failed = week_failed.scalar() or 0
  162. rate = (failed / total * 100) if total > 0 else 0
  163. trend_data.append(
  164. {
  165. "week_start": week_start.date().isoformat(),
  166. "total_prints": total,
  167. "failed_prints": failed,
  168. "failure_rate": round(rate, 1),
  169. }
  170. )
  171. trend_data.reverse() # Oldest first
  172. return {
  173. "period_days": effective_days,
  174. "total_prints": total_prints,
  175. "failed_prints": failed_prints,
  176. "failure_rate": round(failure_rate, 1),
  177. "failures_by_reason": failures_by_reason,
  178. "failures_by_filament": failures_by_filament,
  179. "failures_by_printer": failures_by_printer,
  180. "failures_by_hour": failures_by_hour_complete,
  181. "recent_failures": recent_failures,
  182. "trend": trend_data,
  183. }