failure_analysis.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. from collections import defaultdict
  2. from datetime import date, datetime, time, timedelta, timezone
  3. from sqlalchemy import and_, func, select
  4. from sqlalchemy.ext.asyncio import AsyncSession
  5. from backend.app.models.archive import PrintArchive
  6. from backend.app.models.printer import Printer
  7. class FailureAnalysisService:
  8. """Service for analyzing print failure patterns."""
  9. def __init__(self, db: AsyncSession):
  10. self.db = db
  11. async def analyze_failures(
  12. self,
  13. days: int | None = None,
  14. date_from: date | None = None,
  15. date_to: date | None = None,
  16. printer_id: int | None = None,
  17. project_id: int | None = None,
  18. created_by_id: int | None = None,
  19. ) -> dict:
  20. """Analyze failure patterns across archives.
  21. Args:
  22. days: Number of days to analyze (fallback when no date range)
  23. date_from: Start date filter (inclusive)
  24. date_to: End date filter (inclusive)
  25. printer_id: Optional filter by printer
  26. project_id: Optional filter by project
  27. Returns:
  28. Dictionary with failure analysis results
  29. """
  30. # Build base query — separate date vs non-date filters for trend reuse
  31. base_filter = []
  32. non_date_filter = []
  33. if date_from or date_to:
  34. if date_from:
  35. dt_from = datetime.combine(date_from, time.min, tzinfo=timezone.utc)
  36. base_filter.append(PrintArchive.created_at >= dt_from)
  37. if date_to:
  38. dt_to = datetime.combine(date_to, time.max, tzinfo=timezone.utc)
  39. base_filter.append(PrintArchive.created_at <= dt_to)
  40. # Compute effective span for trend
  41. range_start = dt_from if date_from else datetime.now(timezone.utc) - timedelta(days=365)
  42. range_end = dt_to if date_to else datetime.now(timezone.utc)
  43. effective_days = max((range_end - range_start).days, 1)
  44. else:
  45. effective_days = days if days is not None else 30
  46. cutoff_date = datetime.now(timezone.utc) - timedelta(days=effective_days)
  47. base_filter.append(PrintArchive.created_at >= cutoff_date)
  48. if printer_id:
  49. non_date_filter.append(PrintArchive.printer_id == printer_id)
  50. if project_id:
  51. non_date_filter.append(PrintArchive.project_id == project_id)
  52. if created_by_id is not None:
  53. if created_by_id == -1:
  54. non_date_filter.append(PrintArchive.created_by_id.is_(None))
  55. else:
  56. non_date_filter.append(PrintArchive.created_by_id == created_by_id)
  57. base_filter.extend(non_date_filter)
  58. # Total counts
  59. total_result = await self.db.execute(select(func.count(PrintArchive.id)).where(and_(*base_filter)))
  60. total_prints = total_result.scalar() or 0
  61. failed_result = await self.db.execute(
  62. select(func.count(PrintArchive.id)).where(
  63. and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"]))
  64. )
  65. )
  66. failed_prints = failed_result.scalar() or 0
  67. failure_rate = (failed_prints / total_prints * 100) if total_prints > 0 else 0
  68. # Failures by reason
  69. reason_result = await self.db.execute(
  70. select(
  71. PrintArchive.failure_reason,
  72. func.count(PrintArchive.id).label("count"),
  73. )
  74. .where(and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"])))
  75. .group_by(PrintArchive.failure_reason)
  76. .order_by(func.count(PrintArchive.id).desc())
  77. )
  78. failures_by_reason = {(row[0] or "Unknown"): row[1] for row in reason_result.fetchall()}
  79. # Failures by filament type
  80. filament_result = await self.db.execute(
  81. select(
  82. PrintArchive.filament_type,
  83. func.count(PrintArchive.id).label("count"),
  84. )
  85. .where(and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"])))
  86. .group_by(PrintArchive.filament_type)
  87. .order_by(func.count(PrintArchive.id).desc())
  88. )
  89. failures_by_filament = {(row[0] or "Unknown"): row[1] for row in filament_result.fetchall()}
  90. # Failures by printer
  91. printer_result = await self.db.execute(
  92. select(
  93. PrintArchive.printer_id,
  94. func.count(PrintArchive.id).label("count"),
  95. )
  96. .where(
  97. and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"]), PrintArchive.printer_id.isnot(None))
  98. )
  99. .group_by(PrintArchive.printer_id)
  100. .order_by(func.count(PrintArchive.id).desc())
  101. )
  102. failures_by_printer_id = {row[0]: row[1] for row in printer_result.fetchall()}
  103. # Get printer names
  104. if failures_by_printer_id:
  105. printers_result = await self.db.execute(
  106. select(Printer.id, Printer.name).where(Printer.id.in_(failures_by_printer_id.keys()))
  107. )
  108. printer_names = {row[0]: row[1] for row in printers_result.fetchall()}
  109. failures_by_printer = {
  110. printer_names.get(pid, f"Printer {pid}"): count for pid, count in failures_by_printer_id.items()
  111. }
  112. else:
  113. failures_by_printer = {}
  114. # Failures by hour of day
  115. failed_archives_result = await self.db.execute(
  116. select(PrintArchive.started_at).where(
  117. and_(
  118. *base_filter,
  119. PrintArchive.status.in_(["failed", "aborted"]),
  120. PrintArchive.started_at.isnot(None),
  121. )
  122. )
  123. )
  124. failures_by_hour = defaultdict(int)
  125. for (started_at,) in failed_archives_result.fetchall():
  126. if started_at:
  127. hour = started_at.hour
  128. failures_by_hour[hour] += 1
  129. # Convert to dict with all 24 hours
  130. failures_by_hour_complete = {h: failures_by_hour.get(h, 0) for h in range(24)}
  131. # Recent failures
  132. recent_result = await self.db.execute(
  133. select(PrintArchive)
  134. .where(and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"])))
  135. .order_by(PrintArchive.created_at.desc())
  136. .limit(10)
  137. )
  138. recent_failures = [
  139. {
  140. "id": a.id,
  141. "print_name": a.print_name or a.filename,
  142. "failure_reason": a.failure_reason,
  143. "filament_type": a.filament_type,
  144. "printer_id": a.printer_id,
  145. "created_at": a.created_at.isoformat() if a.created_at else None,
  146. }
  147. for a in recent_result.scalars().all()
  148. ]
  149. # Failure rate trend (by week)
  150. trend_data = []
  151. num_weeks = max(effective_days // 7, 1)
  152. for i in range(num_weeks):
  153. week_end = datetime.now(timezone.utc) - timedelta(weeks=i)
  154. week_start = week_end - timedelta(weeks=1)
  155. week_filter = [
  156. PrintArchive.created_at >= week_start,
  157. PrintArchive.created_at < week_end,
  158. *non_date_filter,
  159. ]
  160. week_total = await self.db.execute(select(func.count(PrintArchive.id)).where(and_(*week_filter)))
  161. week_failed = await self.db.execute(
  162. select(func.count(PrintArchive.id)).where(
  163. and_(*week_filter, PrintArchive.status.in_(["failed", "aborted"]))
  164. )
  165. )
  166. total = week_total.scalar() or 0
  167. failed = week_failed.scalar() or 0
  168. rate = (failed / total * 100) if total > 0 else 0
  169. trend_data.append(
  170. {
  171. "week_start": week_start.date().isoformat(),
  172. "total_prints": total,
  173. "failed_prints": failed,
  174. "failure_rate": round(rate, 1),
  175. }
  176. )
  177. trend_data.reverse() # Oldest first
  178. return {
  179. "period_days": effective_days,
  180. "total_prints": total_prints,
  181. "failed_prints": failed_prints,
  182. "failure_rate": round(failure_rate, 1),
  183. "failures_by_reason": failures_by_reason,
  184. "failures_by_filament": failures_by_filament,
  185. "failures_by_printer": failures_by_printer,
  186. "failures_by_hour": failures_by_hour_complete,
  187. "recent_failures": recent_failures,
  188. "trend": trend_data,
  189. }