package service import ( "math" "time" ) // computeDashboardHealthScore computes a 0-100 health score from the metrics returned by the dashboard overview. // // Design goals: // - Backend-owned scoring (UI only displays). // - Layered scoring: Business Health (70%) + Infrastructure Health (30%) // - Avoids double-counting (e.g., DB failure affects both infra and business metrics) // - Conservative + stable: penalize clear degradations; avoid overreacting to missing/idle data. func computeDashboardHealthScore(now time.Time, overview *OpsDashboardOverview) int { if overview == nil { return 0 } // Idle/no-data: avoid showing a "bad" score when there is no traffic. // UI can still render a gray/idle state based on QPS + error rate. if overview.RequestCountSLA <= 0 && overview.RequestCountTotal <= 0 && overview.ErrorCountTotal <= 0 { return 100 } businessHealth := computeBusinessHealth(overview) infraHealth := computeInfraHealth(now, overview) // Weighted combination: 70% business + 30% infrastructure score := businessHealth*0.7 + infraHealth*0.3 return int(math.Round(clampFloat64(score, 0, 100))) } // computeBusinessHealth calculates business health score (0-100) // Components: SLA (50%) + Error Rate (30%) + Latency (20%) func computeBusinessHealth(overview *OpsDashboardOverview) float64 { // SLA score: 99.5% → 100, 95% → 0 (linear) slaScore := 100.0 slaPct := clampFloat64(overview.SLA*100, 0, 100) if slaPct < 99.5 { if slaPct >= 95 { slaScore = (slaPct - 95) / 4.5 * 100 } else { slaScore = 0 } } // Error rate score: 0.5% → 100, 5% → 0 (linear) // Combines request errors and upstream errors errorScore := 100.0 errorPct := clampFloat64(overview.ErrorRate*100, 0, 100) upstreamPct := clampFloat64(overview.UpstreamErrorRate*100, 0, 100) combinedErrorPct := math.Max(errorPct, upstreamPct) // Use worst case if combinedErrorPct > 0.5 { if combinedErrorPct <= 5 { errorScore = (5 - combinedErrorPct) / 4.5 * 100 } else { errorScore = 0 } } // Latency score: 1s → 100, 10s → 0 (linear) // Uses P99 of duration (TTFT is less critical for overall health) latencyScore := 100.0 if overview.Duration.P99 != nil { p99 := float64(*overview.Duration.P99) if p99 > 1000 { if p99 <= 10000 { latencyScore = (10000 - p99) / 9000 * 100 } else { latencyScore = 0 } } } // Weighted combination return slaScore*0.5 + errorScore*0.3 + latencyScore*0.2 } // computeInfraHealth calculates infrastructure health score (0-100) // Components: Storage (40%) + Compute Resources (30%) + Background Jobs (30%) func computeInfraHealth(now time.Time, overview *OpsDashboardOverview) float64 { // Storage score: DB critical, Redis less critical storageScore := 100.0 if overview.SystemMetrics != nil { if overview.SystemMetrics.DBOK != nil && !*overview.SystemMetrics.DBOK { storageScore = 0 // DB failure is critical } else if overview.SystemMetrics.RedisOK != nil && !*overview.SystemMetrics.RedisOK { storageScore = 50 // Redis failure is degraded but not critical } } // Compute resources score: CPU + Memory computeScore := 100.0 if overview.SystemMetrics != nil { cpuScore := 100.0 if overview.SystemMetrics.CPUUsagePercent != nil { cpuPct := clampFloat64(*overview.SystemMetrics.CPUUsagePercent, 0, 100) if cpuPct > 80 { if cpuPct <= 100 { cpuScore = (100 - cpuPct) / 20 * 100 } else { cpuScore = 0 } } } memScore := 100.0 if overview.SystemMetrics.MemoryUsagePercent != nil { memPct := clampFloat64(*overview.SystemMetrics.MemoryUsagePercent, 0, 100) if memPct > 85 { if memPct <= 100 { memScore = (100 - memPct) / 15 * 100 } else { memScore = 0 } } } computeScore = (cpuScore + memScore) / 2 } // Background jobs score jobScore := 100.0 failedJobs := 0 totalJobs := 0 for _, hb := range overview.JobHeartbeats { if hb == nil { continue } totalJobs++ if hb.LastErrorAt != nil && (hb.LastSuccessAt == nil || hb.LastErrorAt.After(*hb.LastSuccessAt)) { failedJobs++ } else if hb.LastSuccessAt != nil && now.Sub(*hb.LastSuccessAt) > 15*time.Minute { failedJobs++ } } if totalJobs > 0 && failedJobs > 0 { jobScore = (1 - float64(failedJobs)/float64(totalJobs)) * 100 } // Weighted combination return storageScore*0.4 + computeScore*0.3 + jobScore*0.3 } func clampFloat64(v float64, min float64, max float64) float64 { if v < min { return min } if v > max { return max } return v }