Files
sub2api/backend/internal/service/ops_settings_models.go
IanShaw027 345a965fa3 feat(ops): 添加 count_tokens 错误过滤功能
功能特性:
- 自动识别并标记 count_tokens 请求的错误
- 支持配置是否在统计中忽略 count_tokens 错误
- 错误数据完整保留,仅在统计时动态过滤

技术实现:
- ops_error_logger.go: 自动标记 count_tokens 请求
- ops_repo.go: INSERT 语句添加 is_count_tokens 字段
- ops_repo_dashboard.go: buildErrorWhere 核心过滤函数
- ops_repo_preagg.go: 预聚合统计中添加过滤
- ops_repo_trends.go: 趋势统计查询添加过滤(2 处)
- ops_settings_models.go: 添加 ignore_count_tokens_errors 配置
- ops_settings.go: 配置验证和默认值设置
- ops_port.go: 错误日志模型添加 IsCountTokens 字段

业务价值:
- count_tokens 是探测性请求,其错误不影响真实业务 SLA
- 用户可根据需求灵活控制是否计入统计
- 提升错误率、告警等运维指标的准确性

影响范围:
- Dashboard 概览统计
- 错误趋势图表
- 告警规则评估
- 预聚合指标(hourly/daily)
- 健康分数计算
2026-01-12 17:06:12 +08:00

100 lines
4.3 KiB
Go

package service
// Ops settings models stored in DB `settings` table (JSON blobs).
type OpsEmailNotificationConfig struct {
Alert OpsEmailAlertConfig `json:"alert"`
Report OpsEmailReportConfig `json:"report"`
}
type OpsEmailAlertConfig struct {
Enabled bool `json:"enabled"`
Recipients []string `json:"recipients"`
MinSeverity string `json:"min_severity"`
RateLimitPerHour int `json:"rate_limit_per_hour"`
BatchingWindowSeconds int `json:"batching_window_seconds"`
IncludeResolvedAlerts bool `json:"include_resolved_alerts"`
}
type OpsEmailReportConfig struct {
Enabled bool `json:"enabled"`
Recipients []string `json:"recipients"`
DailySummaryEnabled bool `json:"daily_summary_enabled"`
DailySummarySchedule string `json:"daily_summary_schedule"`
WeeklySummaryEnabled bool `json:"weekly_summary_enabled"`
WeeklySummarySchedule string `json:"weekly_summary_schedule"`
ErrorDigestEnabled bool `json:"error_digest_enabled"`
ErrorDigestSchedule string `json:"error_digest_schedule"`
ErrorDigestMinCount int `json:"error_digest_min_count"`
AccountHealthEnabled bool `json:"account_health_enabled"`
AccountHealthSchedule string `json:"account_health_schedule"`
AccountHealthErrorRateThreshold float64 `json:"account_health_error_rate_threshold"`
}
// OpsEmailNotificationConfigUpdateRequest allows partial updates, while the
// frontend can still send the full config shape.
type OpsEmailNotificationConfigUpdateRequest struct {
Alert *OpsEmailAlertConfig `json:"alert"`
Report *OpsEmailReportConfig `json:"report"`
}
type OpsDistributedLockSettings struct {
Enabled bool `json:"enabled"`
Key string `json:"key"`
TTLSeconds int `json:"ttl_seconds"`
}
type OpsAlertSilenceEntry struct {
RuleID *int64 `json:"rule_id,omitempty"`
Severities []string `json:"severities,omitempty"`
UntilRFC3339 string `json:"until_rfc3339"`
Reason string `json:"reason"`
}
type OpsAlertSilencingSettings struct {
Enabled bool `json:"enabled"`
GlobalUntilRFC3339 string `json:"global_until_rfc3339"`
GlobalReason string `json:"global_reason"`
Entries []OpsAlertSilenceEntry `json:"entries,omitempty"`
}
type OpsMetricThresholds struct {
SLAPercentMin *float64 `json:"sla_percent_min,omitempty"` // SLA低于此值变红
LatencyP99MsMax *float64 `json:"latency_p99_ms_max,omitempty"` // 延迟P99高于此值变红
TTFTp99MsMax *float64 `json:"ttft_p99_ms_max,omitempty"` // TTFT P99高于此值变红
RequestErrorRatePercentMax *float64 `json:"request_error_rate_percent_max,omitempty"` // 请求错误率高于此值变红
UpstreamErrorRatePercentMax *float64 `json:"upstream_error_rate_percent_max,omitempty"` // 上游错误率高于此值变红
}
type OpsAlertRuntimeSettings struct {
EvaluationIntervalSeconds int `json:"evaluation_interval_seconds"`
DistributedLock OpsDistributedLockSettings `json:"distributed_lock"`
Silencing OpsAlertSilencingSettings `json:"silencing"`
Thresholds OpsMetricThresholds `json:"thresholds"` // 指标阈值配置
}
// OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).
type OpsAdvancedSettings struct {
DataRetention OpsDataRetentionSettings `json:"data_retention"`
Aggregation OpsAggregationSettings `json:"aggregation"`
IgnoreCountTokensErrors bool `json:"ignore_count_tokens_errors"`
AutoRefreshEnabled bool `json:"auto_refresh_enabled"`
AutoRefreshIntervalSec int `json:"auto_refresh_interval_seconds"`
}
type OpsDataRetentionSettings struct {
CleanupEnabled bool `json:"cleanup_enabled"`
CleanupSchedule string `json:"cleanup_schedule"`
ErrorLogRetentionDays int `json:"error_log_retention_days"`
MinuteMetricsRetentionDays int `json:"minute_metrics_retention_days"`
HourlyMetricsRetentionDays int `json:"hourly_metrics_retention_days"`
}
type OpsAggregationSettings struct {
AggregationEnabled bool `json:"aggregation_enabled"`
}