failure_analysis.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187
  1. from collections import defaultdict
  2. from datetime import datetime, timedelta
  3. from sqlalchemy import and_, func, select
  4. from sqlalchemy.ext.asyncio import AsyncSession
  5. from backend.app.models.archive import PrintArchive
  6. from backend.app.models.printer import Printer
  7. class FailureAnalysisService:
  8. """Service for analyzing print failure patterns."""
  9. def __init__(self, db: AsyncSession):
  10. self.db = db
  11. async def analyze_failures(
  12. self,
  13. days: int = 30,
  14. printer_id: int | None = None,
  15. project_id: int | None = None,
  16. ) -> dict:
  17. """Analyze failure patterns across archives.
  18. Args:
  19. days: Number of days to analyze
  20. printer_id: Optional filter by printer
  21. project_id: Optional filter by project
  22. Returns:
  23. Dictionary with failure analysis results
  24. """
  25. cutoff_date = datetime.utcnow() - timedelta(days=days)
  26. # Build base query
  27. base_filter = [PrintArchive.created_at >= cutoff_date]
  28. if printer_id:
  29. base_filter.append(PrintArchive.printer_id == printer_id)
  30. if project_id:
  31. base_filter.append(PrintArchive.project_id == project_id)
  32. # Total counts
  33. total_result = await self.db.execute(select(func.count(PrintArchive.id)).where(and_(*base_filter)))
  34. total_prints = total_result.scalar() or 0
  35. failed_result = await self.db.execute(
  36. select(func.count(PrintArchive.id)).where(
  37. and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"]))
  38. )
  39. )
  40. failed_prints = failed_result.scalar() or 0
  41. failure_rate = (failed_prints / total_prints * 100) if total_prints > 0 else 0
  42. # Failures by reason
  43. reason_result = await self.db.execute(
  44. select(
  45. PrintArchive.failure_reason,
  46. func.count(PrintArchive.id).label("count"),
  47. )
  48. .where(and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"])))
  49. .group_by(PrintArchive.failure_reason)
  50. .order_by(func.count(PrintArchive.id).desc())
  51. )
  52. failures_by_reason = {(row[0] or "Unknown"): row[1] for row in reason_result.fetchall()}
  53. # Failures by filament type
  54. filament_result = await self.db.execute(
  55. select(
  56. PrintArchive.filament_type,
  57. func.count(PrintArchive.id).label("count"),
  58. )
  59. .where(and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"])))
  60. .group_by(PrintArchive.filament_type)
  61. .order_by(func.count(PrintArchive.id).desc())
  62. )
  63. failures_by_filament = {(row[0] or "Unknown"): row[1] for row in filament_result.fetchall()}
  64. # Failures by printer
  65. printer_result = await self.db.execute(
  66. select(
  67. PrintArchive.printer_id,
  68. func.count(PrintArchive.id).label("count"),
  69. )
  70. .where(
  71. and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"]), PrintArchive.printer_id.isnot(None))
  72. )
  73. .group_by(PrintArchive.printer_id)
  74. .order_by(func.count(PrintArchive.id).desc())
  75. )
  76. failures_by_printer_id = {row[0]: row[1] for row in printer_result.fetchall()}
  77. # Get printer names
  78. if failures_by_printer_id:
  79. printers_result = await self.db.execute(
  80. select(Printer.id, Printer.name).where(Printer.id.in_(failures_by_printer_id.keys()))
  81. )
  82. printer_names = {row[0]: row[1] for row in printers_result.fetchall()}
  83. failures_by_printer = {
  84. printer_names.get(pid, f"Printer {pid}"): count for pid, count in failures_by_printer_id.items()
  85. }
  86. else:
  87. failures_by_printer = {}
  88. # Failures by hour of day
  89. failed_archives_result = await self.db.execute(
  90. select(PrintArchive.started_at).where(
  91. and_(
  92. *base_filter,
  93. PrintArchive.status.in_(["failed", "aborted"]),
  94. PrintArchive.started_at.isnot(None),
  95. )
  96. )
  97. )
  98. failures_by_hour = defaultdict(int)
  99. for (started_at,) in failed_archives_result.fetchall():
  100. if started_at:
  101. hour = started_at.hour
  102. failures_by_hour[hour] += 1
  103. # Convert to dict with all 24 hours
  104. failures_by_hour_complete = {h: failures_by_hour.get(h, 0) for h in range(24)}
  105. # Recent failures
  106. recent_result = await self.db.execute(
  107. select(PrintArchive)
  108. .where(and_(*base_filter, PrintArchive.status.in_(["failed", "aborted"])))
  109. .order_by(PrintArchive.created_at.desc())
  110. .limit(10)
  111. )
  112. recent_failures = [
  113. {
  114. "id": a.id,
  115. "print_name": a.print_name or a.filename,
  116. "failure_reason": a.failure_reason,
  117. "filament_type": a.filament_type,
  118. "printer_id": a.printer_id,
  119. "created_at": a.created_at.isoformat() if a.created_at else None,
  120. }
  121. for a in recent_result.scalars().all()
  122. ]
  123. # Failure rate trend (by week)
  124. trend_data = []
  125. for i in range(min(days // 7, 12)): # Up to 12 weeks
  126. week_end = datetime.utcnow() - timedelta(weeks=i)
  127. week_start = week_end - timedelta(weeks=1)
  128. week_filter = base_filter.copy()
  129. week_filter[0] = and_(
  130. PrintArchive.created_at >= week_start,
  131. PrintArchive.created_at < week_end,
  132. )
  133. week_total = await self.db.execute(select(func.count(PrintArchive.id)).where(and_(*week_filter)))
  134. week_failed = await self.db.execute(
  135. select(func.count(PrintArchive.id)).where(
  136. and_(*week_filter, PrintArchive.status.in_(["failed", "aborted"]))
  137. )
  138. )
  139. total = week_total.scalar() or 0
  140. failed = week_failed.scalar() or 0
  141. rate = (failed / total * 100) if total > 0 else 0
  142. trend_data.append(
  143. {
  144. "week_start": week_start.date().isoformat(),
  145. "total_prints": total,
  146. "failed_prints": failed,
  147. "failure_rate": round(rate, 1),
  148. }
  149. )
  150. trend_data.reverse() # Oldest first
  151. return {
  152. "period_days": days,
  153. "total_prints": total_prints,
  154. "failed_prints": failed_prints,
  155. "failure_rate": round(failure_rate, 1),
  156. "failures_by_reason": failures_by_reason,
  157. "failures_by_filament": failures_by_filament,
  158. "failures_by_printer": failures_by_printer,
  159. "failures_by_hour": failures_by_hour_complete,
  160. "recent_failures": recent_failures,
  161. "trend": trend_data,
  162. }