feat(ops): add account switch metrics and trend

This commit is contained in:
song
2026-01-23 19:39:48 +08:00
parent 3002c7a17f
commit 316f2fee21
12 changed files with 307 additions and 20 deletions

View File

@@ -285,6 +285,11 @@ func (c *OpsMetricsCollector) collectAndPersist(ctx context.Context) error {
return fmt.Errorf("query error counts: %w", err)
}
accountSwitchCount, err := c.queryAccountSwitchCount(ctx, windowStart, windowEnd)
if err != nil {
return fmt.Errorf("query account switch counts: %w", err)
}
windowSeconds := windowEnd.Sub(windowStart).Seconds()
if windowSeconds <= 0 {
windowSeconds = 60
@@ -310,6 +315,7 @@ func (c *OpsMetricsCollector) collectAndPersist(ctx context.Context) error {
Upstream529Count: upstream529,
TokenConsumed: tokenConsumed,
AccountSwitchCount: accountSwitchCount,
QPS: float64Ptr(roundTo1DP(qps)),
TPS: float64Ptr(roundTo1DP(tps)),
@@ -551,6 +557,27 @@ WHERE created_at >= $1 AND created_at < $2`
return errorTotal, businessLimited, errorSLA, upstreamExcl429529, upstream429, upstream529, nil
}
func (c *OpsMetricsCollector) queryAccountSwitchCount(ctx context.Context, start, end time.Time) (int64, error) {
q := `
SELECT
COALESCE(SUM(CASE
WHEN ev->>'kind' IN ('failover', 'retry_exhausted_failover', 'failover_on_400') THEN 1
ELSE 0
END), 0) AS switch_count
FROM ops_error_logs o
CROSS JOIN LATERAL jsonb_array_elements(
COALESCE(NULLIF(o.upstream_errors, 'null'::jsonb), '[]'::jsonb)
) AS ev
WHERE o.created_at >= $1 AND o.created_at < $2
AND o.is_count_tokens = FALSE`
var count int64
if err := c.db.QueryRowContext(ctx, q, start, end).Scan(&count); err != nil {
return 0, err
}
return count, nil
}
type opsCollectedSystemStats struct {
cpuUsagePercent *float64
memoryUsedMB *int64

View File

@@ -162,6 +162,7 @@ type OpsInsertSystemMetricsInput struct {
Upstream529Count int64
TokenConsumed int64
AccountSwitchCount int64
QPS *float64
TPS *float64
@@ -225,6 +226,7 @@ type OpsSystemMetricsSnapshot struct {
GoroutineCount *int `json:"goroutine_count"`
ConcurrencyQueueDepth *int `json:"concurrency_queue_depth"`
AccountSwitchCount *int64 `json:"account_switch_count"`
}
type OpsUpsertJobHeartbeatInput struct {

View File

@@ -6,6 +6,7 @@ type OpsThroughputTrendPoint struct {
BucketStart time.Time `json:"bucket_start"`
RequestCount int64 `json:"request_count"`
TokenConsumed int64 `json:"token_consumed"`
SwitchCount int64 `json:"switch_count"`
QPS float64 `json:"qps"`
TPS float64 `json:"tps"`
}