Merge pull request #254 from IanShaw027/feat/ops-count-tokens-filter-and-auto-refresh
feat(ops): count_tokens 错误过滤和自动刷新功能
This commit is contained in:
@@ -489,6 +489,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
|
||||
Severity: classifyOpsSeverity("upstream_error", effectiveUpstreamStatus),
|
||||
StatusCode: status,
|
||||
IsBusinessLimited: false,
|
||||
IsCountTokens: isCountTokensRequest(c),
|
||||
|
||||
ErrorMessage: recoveredMsg,
|
||||
ErrorBody: "",
|
||||
@@ -598,6 +599,7 @@ func OpsErrorLoggerMiddleware(ops *service.OpsService) gin.HandlerFunc {
|
||||
Severity: classifyOpsSeverity(parsed.ErrorType, status),
|
||||
StatusCode: status,
|
||||
IsBusinessLimited: isBusinessLimited,
|
||||
IsCountTokens: isCountTokensRequest(c),
|
||||
|
||||
ErrorMessage: parsed.Message,
|
||||
// Keep the full captured error body (capture is already capped at 64KB) so the
|
||||
@@ -704,6 +706,14 @@ var opsRetryRequestHeaderAllowlist = []string{
|
||||
"anthropic-version",
|
||||
}
|
||||
|
||||
// isCountTokensRequest checks if the request is a count_tokens request
|
||||
func isCountTokensRequest(c *gin.Context) bool {
|
||||
if c == nil || c.Request == nil || c.Request.URL == nil {
|
||||
return false
|
||||
}
|
||||
return strings.Contains(c.Request.URL.Path, "/count_tokens")
|
||||
}
|
||||
|
||||
func extractOpsRetryRequestHeaders(c *gin.Context) *string {
|
||||
if c == nil || c.Request == nil {
|
||||
return nil
|
||||
|
||||
@@ -46,6 +46,7 @@ INSERT INTO ops_error_logs (
|
||||
severity,
|
||||
status_code,
|
||||
is_business_limited,
|
||||
is_count_tokens,
|
||||
error_message,
|
||||
error_body,
|
||||
error_source,
|
||||
@@ -64,7 +65,7 @@ INSERT INTO ops_error_logs (
|
||||
retry_count,
|
||||
created_at
|
||||
) VALUES (
|
||||
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$30,$31,$32,$33,$34
|
||||
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$30,$31,$32,$33,$34,$35
|
||||
) RETURNING id`
|
||||
|
||||
var id int64
|
||||
@@ -88,6 +89,7 @@ INSERT INTO ops_error_logs (
|
||||
opsNullString(input.Severity),
|
||||
opsNullInt(input.StatusCode),
|
||||
input.IsBusinessLimited,
|
||||
input.IsCountTokens,
|
||||
opsNullString(input.ErrorMessage),
|
||||
opsNullString(input.ErrorBody),
|
||||
opsNullString(input.ErrorSource),
|
||||
|
||||
@@ -964,8 +964,8 @@ func buildErrorWhere(filter *service.OpsDashboardFilter, start, end time.Time, s
|
||||
}
|
||||
|
||||
idx := startIndex
|
||||
clauses := make([]string, 0, 4)
|
||||
args = make([]any, 0, 4)
|
||||
clauses := make([]string, 0, 5)
|
||||
args = make([]any, 0, 5)
|
||||
|
||||
args = append(args, start)
|
||||
clauses = append(clauses, fmt.Sprintf("created_at >= $%d", idx))
|
||||
@@ -974,6 +974,8 @@ func buildErrorWhere(filter *service.OpsDashboardFilter, start, end time.Time, s
|
||||
clauses = append(clauses, fmt.Sprintf("created_at < $%d", idx))
|
||||
idx++
|
||||
|
||||
clauses = append(clauses, "is_count_tokens = FALSE")
|
||||
|
||||
if groupID != nil && *groupID > 0 {
|
||||
args = append(args, *groupID)
|
||||
clauses = append(clauses, fmt.Sprintf("group_id = $%d", idx))
|
||||
|
||||
@@ -78,7 +78,9 @@ error_base AS (
|
||||
status_code AS client_status_code,
|
||||
COALESCE(upstream_status_code, status_code, 0) AS effective_status_code
|
||||
FROM ops_error_logs
|
||||
-- Exclude count_tokens requests from error metrics as they are informational probes
|
||||
WHERE created_at >= $1 AND created_at < $2
|
||||
AND is_count_tokens = FALSE
|
||||
),
|
||||
error_agg AS (
|
||||
SELECT
|
||||
|
||||
@@ -170,6 +170,7 @@ error_totals AS (
|
||||
FROM ops_error_logs
|
||||
WHERE created_at >= $1 AND created_at < $2
|
||||
AND COALESCE(status_code, 0) >= 400
|
||||
AND is_count_tokens = FALSE -- 排除 count_tokens 请求的错误
|
||||
GROUP BY 1
|
||||
),
|
||||
combined AS (
|
||||
@@ -243,6 +244,7 @@ error_totals AS (
|
||||
AND platform = $3
|
||||
AND group_id IS NOT NULL
|
||||
AND COALESCE(status_code, 0) >= 400
|
||||
AND is_count_tokens = FALSE -- 排除 count_tokens 请求的错误
|
||||
GROUP BY 1
|
||||
),
|
||||
combined AS (
|
||||
|
||||
@@ -73,6 +73,7 @@ type OpsInsertErrorLogInput struct {
|
||||
Severity string
|
||||
StatusCode int
|
||||
IsBusinessLimited bool
|
||||
IsCountTokens bool // 是否为 count_tokens 请求
|
||||
|
||||
ErrorMessage string
|
||||
ErrorBody string
|
||||
|
||||
@@ -368,6 +368,9 @@ func defaultOpsAdvancedSettings() *OpsAdvancedSettings {
|
||||
Aggregation: OpsAggregationSettings{
|
||||
AggregationEnabled: false,
|
||||
},
|
||||
IgnoreCountTokensErrors: false,
|
||||
AutoRefreshEnabled: false,
|
||||
AutoRefreshIntervalSec: 30,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -388,6 +391,10 @@ func normalizeOpsAdvancedSettings(cfg *OpsAdvancedSettings) {
|
||||
if cfg.DataRetention.HourlyMetricsRetentionDays <= 0 {
|
||||
cfg.DataRetention.HourlyMetricsRetentionDays = 30
|
||||
}
|
||||
// Normalize auto refresh interval (default 30 seconds)
|
||||
if cfg.AutoRefreshIntervalSec <= 0 {
|
||||
cfg.AutoRefreshIntervalSec = 30
|
||||
}
|
||||
}
|
||||
|
||||
func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
|
||||
@@ -403,6 +410,9 @@ func validateOpsAdvancedSettings(cfg *OpsAdvancedSettings) error {
|
||||
if cfg.DataRetention.HourlyMetricsRetentionDays < 1 || cfg.DataRetention.HourlyMetricsRetentionDays > 365 {
|
||||
return errors.New("hourly_metrics_retention_days must be between 1 and 365")
|
||||
}
|
||||
if cfg.AutoRefreshIntervalSec < 15 || cfg.AutoRefreshIntervalSec > 300 {
|
||||
return errors.New("auto_refresh_interval_seconds must be between 15 and 300")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -79,8 +79,11 @@ type OpsAlertRuntimeSettings struct {
|
||||
|
||||
// OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).
|
||||
type OpsAdvancedSettings struct {
|
||||
DataRetention OpsDataRetentionSettings `json:"data_retention"`
|
||||
Aggregation OpsAggregationSettings `json:"aggregation"`
|
||||
DataRetention OpsDataRetentionSettings `json:"data_retention"`
|
||||
Aggregation OpsAggregationSettings `json:"aggregation"`
|
||||
IgnoreCountTokensErrors bool `json:"ignore_count_tokens_errors"`
|
||||
AutoRefreshEnabled bool `json:"auto_refresh_enabled"`
|
||||
AutoRefreshIntervalSec int `json:"auto_refresh_interval_seconds"`
|
||||
}
|
||||
|
||||
type OpsDataRetentionSettings struct {
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
-- Migration: 添加 is_count_tokens 字段到 ops_error_logs 表
|
||||
-- Purpose: 标记 count_tokens 请求的错误,以便在统计和告警中根据配置动态过滤
|
||||
-- Author: System
|
||||
-- Date: 2026-01-12
|
||||
|
||||
-- Add is_count_tokens column to ops_error_logs table
|
||||
ALTER TABLE ops_error_logs
|
||||
ADD COLUMN is_count_tokens BOOLEAN NOT NULL DEFAULT FALSE;
|
||||
|
||||
-- Add comment
|
||||
COMMENT ON COLUMN ops_error_logs.is_count_tokens IS '是否为 count_tokens 请求的错误(用于统计过滤)';
|
||||
|
||||
-- Create index for filtering (optional, improves query performance)
|
||||
CREATE INDEX IF NOT EXISTS idx_ops_error_logs_is_count_tokens
|
||||
ON ops_error_logs(is_count_tokens)
|
||||
WHERE is_count_tokens = TRUE;
|
||||
@@ -734,6 +734,9 @@ export interface OpsAlertRuntimeSettings {
|
||||
export interface OpsAdvancedSettings {
|
||||
data_retention: OpsDataRetentionSettings
|
||||
aggregation: OpsAggregationSettings
|
||||
ignore_count_tokens_errors: boolean
|
||||
auto_refresh_enabled: boolean
|
||||
auto_refresh_interval_seconds: number
|
||||
}
|
||||
|
||||
export interface OpsDataRetentionSettings {
|
||||
|
||||
@@ -20,6 +20,8 @@
|
||||
:loading="loading"
|
||||
:last-updated="lastUpdated"
|
||||
:thresholds="metricThresholds"
|
||||
:auto-refresh-enabled="autoRefreshEnabled"
|
||||
:auto-refresh-countdown="autoRefreshCountdown"
|
||||
@update:time-range="onTimeRangeChange"
|
||||
@update:platform="onPlatformChange"
|
||||
@update:group="onGroupChange"
|
||||
@@ -104,7 +106,7 @@
|
||||
|
||||
<script setup lang="ts">
|
||||
import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
|
||||
import { useDebounceFn } from '@vueuse/core'
|
||||
import { useDebounceFn, useIntervalFn } from '@vueuse/core'
|
||||
import { useI18n } from 'vue-i18n'
|
||||
import { useRoute, useRouter } from 'vue-router'
|
||||
import AppLayout from '@/components/layout/AppLayout.vue'
|
||||
@@ -287,6 +289,45 @@ const requestDetailsPreset = ref<OpsRequestDetailsPreset>({
|
||||
const showSettingsDialog = ref(false)
|
||||
const showAlertRulesCard = ref(false)
|
||||
|
||||
// Auto refresh settings
|
||||
const autoRefreshEnabled = ref(false)
|
||||
const autoRefreshIntervalMs = ref(30000) // default 30 seconds
|
||||
const autoRefreshCountdown = ref(0)
|
||||
|
||||
// Auto refresh timer
|
||||
const { pause: pauseAutoRefresh, resume: resumeAutoRefresh } = useIntervalFn(
|
||||
() => {
|
||||
if (autoRefreshEnabled.value && opsEnabled.value && !loading.value) {
|
||||
fetchData()
|
||||
}
|
||||
},
|
||||
autoRefreshIntervalMs,
|
||||
{ immediate: false }
|
||||
)
|
||||
|
||||
// Countdown timer (updates every second)
|
||||
const { pause: pauseCountdown, resume: resumeCountdown } = useIntervalFn(
|
||||
() => {
|
||||
if (autoRefreshEnabled.value && autoRefreshCountdown.value > 0) {
|
||||
autoRefreshCountdown.value--
|
||||
}
|
||||
},
|
||||
1000,
|
||||
{ immediate: false }
|
||||
)
|
||||
|
||||
// Load auto refresh settings from backend
|
||||
async function loadAutoRefreshSettings() {
|
||||
try {
|
||||
const settings = await opsAPI.getAdvancedSettings()
|
||||
autoRefreshEnabled.value = settings.auto_refresh_enabled
|
||||
autoRefreshIntervalMs.value = settings.auto_refresh_interval_seconds * 1000
|
||||
autoRefreshCountdown.value = settings.auto_refresh_interval_seconds
|
||||
} catch (err) {
|
||||
console.error('[OpsDashboard] Failed to load auto refresh settings', err)
|
||||
}
|
||||
}
|
||||
|
||||
function handleThroughputSelectPlatform(nextPlatform: string) {
|
||||
platform.value = nextPlatform || ''
|
||||
groupId.value = null
|
||||
@@ -510,6 +551,10 @@ async function fetchData() {
|
||||
])
|
||||
if (fetchSeq !== dashboardFetchSeq) return
|
||||
lastUpdated.value = new Date()
|
||||
// Reset auto refresh countdown after successful fetch
|
||||
if (autoRefreshEnabled.value) {
|
||||
autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
|
||||
}
|
||||
} catch (err) {
|
||||
if (!isOpsDisabledError(err)) {
|
||||
console.error('[ops] failed to fetch dashboard data', err)
|
||||
@@ -567,9 +612,18 @@ onMounted(async () => {
|
||||
// Load thresholds configuration
|
||||
loadThresholds()
|
||||
|
||||
// Load auto refresh settings
|
||||
await loadAutoRefreshSettings()
|
||||
|
||||
if (opsEnabled.value) {
|
||||
await fetchData()
|
||||
}
|
||||
|
||||
// Start auto refresh if enabled
|
||||
if (autoRefreshEnabled.value) {
|
||||
resumeAutoRefresh()
|
||||
resumeCountdown()
|
||||
}
|
||||
})
|
||||
|
||||
async function loadThresholds() {
|
||||
@@ -584,5 +638,27 @@ async function loadThresholds() {
|
||||
|
||||
onUnmounted(() => {
|
||||
abortDashboardFetch()
|
||||
pauseAutoRefresh()
|
||||
pauseCountdown()
|
||||
})
|
||||
|
||||
// Watch auto refresh settings changes
|
||||
watch(autoRefreshEnabled, (enabled) => {
|
||||
if (enabled) {
|
||||
autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
|
||||
resumeAutoRefresh()
|
||||
resumeCountdown()
|
||||
} else {
|
||||
pauseAutoRefresh()
|
||||
pauseCountdown()
|
||||
autoRefreshCountdown.value = 0
|
||||
}
|
||||
})
|
||||
|
||||
// Reload auto refresh settings after settings dialog is closed
|
||||
watch(showSettingsDialog, async (show) => {
|
||||
if (!show) {
|
||||
await loadAutoRefreshSettings()
|
||||
}
|
||||
})
|
||||
</script>
|
||||
|
||||
@@ -23,6 +23,8 @@ interface Props {
|
||||
loading: boolean
|
||||
lastUpdated: Date | null
|
||||
thresholds?: OpsMetricThresholds | null // 阈值配置
|
||||
autoRefreshEnabled?: boolean
|
||||
autoRefreshCountdown?: number
|
||||
}
|
||||
|
||||
interface Emits {
|
||||
@@ -839,6 +841,17 @@ function handleToolbarRefresh() {
|
||||
<span>·</span>
|
||||
<span>{{ t('common.refresh') }}: {{ updatedAtLabel }}</span>
|
||||
|
||||
<template v-if="props.autoRefreshEnabled && props.autoRefreshCountdown !== undefined">
|
||||
<span>·</span>
|
||||
<span class="flex items-center gap-1">
|
||||
<svg class="h-3 w-3 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
|
||||
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
|
||||
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
|
||||
</svg>
|
||||
<span>自动刷新: {{ props.autoRefreshCountdown }}s</span>
|
||||
</span>
|
||||
</template>
|
||||
|
||||
<template v-if="systemMetrics">
|
||||
<span>·</span>
|
||||
<span>
|
||||
|
||||
@@ -487,6 +487,48 @@ async function saveAllSettings() {
|
||||
<Toggle v-model="advancedSettings.aggregation.aggregation_enabled" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 错误过滤 -->
|
||||
<div class="space-y-3">
|
||||
<h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">错误过滤</h5>
|
||||
|
||||
<div class="flex items-center justify-between">
|
||||
<div>
|
||||
<label class="text-sm font-medium text-gray-700 dark:text-gray-300">忽略 count_tokens 错误</label>
|
||||
<p class="mt-1 text-xs text-gray-500">
|
||||
启用后,count_tokens 请求的错误将不计入运维监控的统计和告警中(但仍会存储在数据库中)
|
||||
</p>
|
||||
</div>
|
||||
<Toggle v-model="advancedSettings.ignore_count_tokens_errors" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- 自动刷新 -->
|
||||
<div class="space-y-3">
|
||||
<h5 class="text-xs font-semibold text-gray-700 dark:text-gray-300">自动刷新</h5>
|
||||
|
||||
<div class="flex items-center justify-between">
|
||||
<div>
|
||||
<label class="text-sm font-medium text-gray-700 dark:text-gray-300">启用自动刷新</label>
|
||||
<p class="mt-1 text-xs text-gray-500">
|
||||
自动刷新仪表板数据,启用后会定期拉取最新数据
|
||||
</p>
|
||||
</div>
|
||||
<Toggle v-model="advancedSettings.auto_refresh_enabled" />
|
||||
</div>
|
||||
|
||||
<div v-if="advancedSettings.auto_refresh_enabled">
|
||||
<label class="input-label">刷新间隔</label>
|
||||
<Select
|
||||
v-model="advancedSettings.auto_refresh_interval_seconds"
|
||||
:options="[
|
||||
{ value: 15, label: '15 秒' },
|
||||
{ value: 30, label: '30 秒' },
|
||||
{ value: 60, label: '60 秒' }
|
||||
]"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</details>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user