feat(ops): add account switch metrics and trend
This commit is contained in:
@@ -285,6 +285,11 @@ func (c *OpsMetricsCollector) collectAndPersist(ctx context.Context) error {
|
||||
return fmt.Errorf("query error counts: %w", err)
|
||||
}
|
||||
|
||||
accountSwitchCount, err := c.queryAccountSwitchCount(ctx, windowStart, windowEnd)
|
||||
if err != nil {
|
||||
return fmt.Errorf("query account switch counts: %w", err)
|
||||
}
|
||||
|
||||
windowSeconds := windowEnd.Sub(windowStart).Seconds()
|
||||
if windowSeconds <= 0 {
|
||||
windowSeconds = 60
|
||||
@@ -310,6 +315,7 @@ func (c *OpsMetricsCollector) collectAndPersist(ctx context.Context) error {
|
||||
Upstream529Count: upstream529,
|
||||
|
||||
TokenConsumed: tokenConsumed,
|
||||
AccountSwitchCount: accountSwitchCount,
|
||||
QPS: float64Ptr(roundTo1DP(qps)),
|
||||
TPS: float64Ptr(roundTo1DP(tps)),
|
||||
|
||||
@@ -551,6 +557,27 @@ WHERE created_at >= $1 AND created_at < $2`
|
||||
return errorTotal, businessLimited, errorSLA, upstreamExcl429529, upstream429, upstream529, nil
|
||||
}
|
||||
|
||||
func (c *OpsMetricsCollector) queryAccountSwitchCount(ctx context.Context, start, end time.Time) (int64, error) {
|
||||
q := `
|
||||
SELECT
|
||||
COALESCE(SUM(CASE
|
||||
WHEN ev->>'kind' IN ('failover', 'retry_exhausted_failover', 'failover_on_400') THEN 1
|
||||
ELSE 0
|
||||
END), 0) AS switch_count
|
||||
FROM ops_error_logs o
|
||||
CROSS JOIN LATERAL jsonb_array_elements(
|
||||
COALESCE(NULLIF(o.upstream_errors, 'null'::jsonb), '[]'::jsonb)
|
||||
) AS ev
|
||||
WHERE o.created_at >= $1 AND o.created_at < $2
|
||||
AND o.is_count_tokens = FALSE`
|
||||
|
||||
var count int64
|
||||
if err := c.db.QueryRowContext(ctx, q, start, end).Scan(&count); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return count, nil
|
||||
}
|
||||
|
||||
type opsCollectedSystemStats struct {
|
||||
cpuUsagePercent *float64
|
||||
memoryUsedMB *int64
|
||||
|
||||
@@ -162,6 +162,7 @@ type OpsInsertSystemMetricsInput struct {
|
||||
Upstream529Count int64
|
||||
|
||||
TokenConsumed int64
|
||||
AccountSwitchCount int64
|
||||
|
||||
QPS *float64
|
||||
TPS *float64
|
||||
@@ -225,6 +226,7 @@ type OpsSystemMetricsSnapshot struct {
|
||||
|
||||
GoroutineCount *int `json:"goroutine_count"`
|
||||
ConcurrencyQueueDepth *int `json:"concurrency_queue_depth"`
|
||||
AccountSwitchCount *int64 `json:"account_switch_count"`
|
||||
}
|
||||
|
||||
type OpsUpsertJobHeartbeatInput struct {
|
||||
|
||||
@@ -6,6 +6,7 @@ type OpsThroughputTrendPoint struct {
|
||||
BucketStart time.Time `json:"bucket_start"`
|
||||
RequestCount int64 `json:"request_count"`
|
||||
TokenConsumed int64 `json:"token_consumed"`
|
||||
SwitchCount int64 `json:"switch_count"`
|
||||
QPS float64 `json:"qps"`
|
||||
TPS float64 `json:"tps"`
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user