feat(ops): 添加 count_tokens 错误过滤功能

功能特性:
- 自动识别并标记 count_tokens 请求的错误
- 支持配置是否在统计中忽略 count_tokens 错误
- 错误数据完整保留,仅在统计时动态过滤

技术实现:
- ops_error_logger.go: 自动标记 count_tokens 请求
- ops_repo.go: INSERT 语句添加 is_count_tokens 字段
- ops_repo_dashboard.go: buildErrorWhere 核心过滤函数
- ops_repo_preagg.go: 预聚合统计中添加过滤
- ops_repo_trends.go: 趋势统计查询添加过滤(2 处)
- ops_settings_models.go: 添加 ignore_count_tokens_errors 配置
- ops_settings.go: 配置验证和默认值设置
- ops_port.go: 错误日志模型添加 IsCountTokens 字段

业务价值:
- count_tokens 是探测性请求,其错误不影响真实业务 SLA
- 用户可根据需求灵活控制是否计入统计
- 提升错误率、告警等运维指标的准确性

影响范围:
- Dashboard 概览统计
- 错误趋势图表
- 告警规则评估
- 预聚合指标(hourly/daily)
- 健康分数计算
This commit is contained in:
IanShaw027
2026-01-12 16:50:41 +08:00
parent c02c120579
commit 345a965fa3
8 changed files with 37 additions and 5 deletions

View File

@@ -46,6 +46,7 @@ INSERT INTO ops_error_logs (
severity,
status_code,
is_business_limited,
is_count_tokens,
error_message,
error_body,
error_source,
@@ -64,7 +65,7 @@ INSERT INTO ops_error_logs (
retry_count,
created_at
) VALUES (
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$30,$31,$32,$33,$34
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$30,$31,$32,$33,$34,$35
) RETURNING id`
var id int64
@@ -88,6 +89,7 @@ INSERT INTO ops_error_logs (
opsNullString(input.Severity),
opsNullInt(input.StatusCode),
input.IsBusinessLimited,
input.IsCountTokens,
opsNullString(input.ErrorMessage),
opsNullString(input.ErrorBody),
opsNullString(input.ErrorSource),

View File

@@ -964,8 +964,8 @@ func buildErrorWhere(filter *service.OpsDashboardFilter, start, end time.Time, s
}
idx := startIndex
clauses := make([]string, 0, 4)
args = make([]any, 0, 4)
clauses := make([]string, 0, 5)
args = make([]any, 0, 5)
args = append(args, start)
clauses = append(clauses, fmt.Sprintf("created_at >= $%d", idx))
@@ -974,6 +974,8 @@ func buildErrorWhere(filter *service.OpsDashboardFilter, start, end time.Time, s
clauses = append(clauses, fmt.Sprintf("created_at < $%d", idx))
idx++
clauses = append(clauses, "is_count_tokens = FALSE")
if groupID != nil && *groupID > 0 {
args = append(args, *groupID)
clauses = append(clauses, fmt.Sprintf("group_id = $%d", idx))

View File

@@ -78,7 +78,9 @@ error_base AS (
status_code AS client_status_code,
COALESCE(upstream_status_code, status_code, 0) AS effective_status_code
FROM ops_error_logs
-- Exclude count_tokens requests from error metrics as they are informational probes
WHERE created_at >= $1 AND created_at < $2
AND is_count_tokens = FALSE
),
error_agg AS (
SELECT

View File

@@ -170,6 +170,7 @@ error_totals AS (
FROM ops_error_logs
WHERE created_at >= $1 AND created_at < $2
AND COALESCE(status_code, 0) >= 400
AND is_count_tokens = FALSE -- 排除 count_tokens 请求的错误
GROUP BY 1
),
combined AS (
@@ -243,6 +244,7 @@ error_totals AS (
AND platform = $3
AND group_id IS NOT NULL
AND COALESCE(status_code, 0) >= 400
AND is_count_tokens = FALSE -- 排除 count_tokens 请求的错误
GROUP BY 1
),
combined AS (