From dd59e872ffae5cf65c0aa37164f3ea099f4ad194 Mon Sep 17 00:00:00 2001 From: IanShaw027 <131567472+IanShaw027@users.noreply.github.com> Date: Sun, 11 Jan 2026 20:33:52 +0800 Subject: [PATCH] =?UTF-8?q?feat(ops):=20=E6=B7=BB=E5=8A=A0=E5=88=86?= =?UTF-8?q?=E7=BB=84=E5=92=8C=E8=B4=A6=E5=8F=B7=E7=BA=A7=E5=88=AB=E7=9B=91?= =?UTF-8?q?=E6=8E=A7=E6=8C=87=E6=A0=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 后端新增 GetAccountAvailability 方法获取账号可用性数据 - 添加分组可用率和限流率计算辅助函数 - 前端支持分组和账号级别的监控指标类型 - 优化警报规则指标选择器,按类别分组显示 --- .../service/ops_account_availability.go | 37 +++++++++++++ .../service/ops_alert_evaluator_service.go | 35 ++++++++++++ backend/internal/service/ops_service.go | 3 + frontend/src/api/admin/ops.ts | 7 +++ .../ops/components/OpsAlertRulesCard.vue | 55 +++++++++++++++---- 5 files changed, 126 insertions(+), 11 deletions(-) diff --git a/backend/internal/service/ops_account_availability.go b/backend/internal/service/ops_account_availability.go index d0cbbe5c..da66ec4d 100644 --- a/backend/internal/service/ops_account_availability.go +++ b/backend/internal/service/ops_account_availability.go @@ -2,6 +2,7 @@ package service import ( "context" + "errors" "time" ) @@ -155,3 +156,39 @@ func (s *OpsService) GetAccountAvailabilityStats(ctx context.Context, platformFi return platform, group, account, &collectedAt, nil } + +type OpsAccountAvailability struct { + Group *GroupAvailability + Accounts map[int64]*AccountAvailability + CollectedAt *time.Time +} + +func (s *OpsService) GetAccountAvailability(ctx context.Context, platformFilter string, groupIDFilter *int64) (*OpsAccountAvailability, error) { + if s == nil { + return nil, errors.New("ops service is nil") + } + + if s.getAccountAvailability != nil { + return s.getAccountAvailability(ctx, platformFilter, groupIDFilter) + } + + _, groupStats, accountStats, collectedAt, err := s.GetAccountAvailabilityStats(ctx, platformFilter, groupIDFilter) + if err != nil { + return nil, err + } + + var group *GroupAvailability + if groupIDFilter != nil && *groupIDFilter > 0 { + group = groupStats[*groupIDFilter] + } + + if accountStats == nil { + accountStats = map[int64]*AccountAvailability{} + } + + return &OpsAccountAvailability{ + Group: group, + Accounts: accountStats, + CollectedAt: collectedAt, + }, nil +} diff --git a/backend/internal/service/ops_alert_evaluator_service.go b/backend/internal/service/ops_alert_evaluator_service.go index 81712136..ceec5d87 100644 --- a/backend/internal/service/ops_alert_evaluator_service.go +++ b/backend/internal/service/ops_alert_evaluator_service.go @@ -838,3 +838,38 @@ func (l *slidingWindowLimiter) Allow(now time.Time) bool { l.sent = append(l.sent, now) return true } + +// computeGroupAvailableRatio returns the available percentage for a group. +// Formula: (AvailableCount / TotalAccounts) * 100. +// Returns 0 when TotalAccounts is 0. +func computeGroupAvailableRatio(group *GroupAvailability) float64 { + if group == nil || group.TotalAccounts <= 0 { + return 0 + } + return (float64(group.AvailableCount) / float64(group.TotalAccounts)) * 100 +} + +// computeGroupRateLimitRatio returns the rate-limited percentage for a group. +// Formula: (RateLimitCount / TotalAccounts) * 100. +// Returns 0 when TotalAccounts is 0. +func computeGroupRateLimitRatio(group *GroupAvailability) float64 { + if group == nil || group.TotalAccounts <= 0 { + return 0 + } + return (float64(group.RateLimitCount) / float64(group.TotalAccounts)) * 100 +} + +// countAccountsByCondition counts accounts that satisfy the given condition. +// It iterates over accounts and applies the predicate to each entry. +func countAccountsByCondition(accounts map[int64]*AccountAvailability, condition func(*AccountAvailability) bool) int64 { + if len(accounts) == 0 || condition == nil { + return 0 + } + var count int64 + for _, account := range accounts { + if account != nil && condition(account) { + count++ + } + } + return count +} diff --git a/backend/internal/service/ops_service.go b/backend/internal/service/ops_service.go index e3ad5589..426d46f1 100644 --- a/backend/internal/service/ops_service.go +++ b/backend/internal/service/ops_service.go @@ -28,6 +28,9 @@ type OpsService struct { accountRepo AccountRepository + // getAccountAvailability is a unit-test hook for overriding account availability lookup. + getAccountAvailability func(ctx context.Context, platformFilter string, groupIDFilter *int64) (*OpsAccountAvailability, error) + concurrencyService *ConcurrencyService gatewayService *GatewayService openAIGatewayService *OpenAIGatewayService diff --git a/frontend/src/api/admin/ops.ts b/frontend/src/api/admin/ops.ts index c0df4605..1d1453f5 100644 --- a/frontend/src/api/admin/ops.ts +++ b/frontend/src/api/admin/ops.ts @@ -592,6 +592,13 @@ export type MetricType = | 'cpu_usage_percent' | 'memory_usage_percent' | 'concurrency_queue_depth' + | 'group_available_accounts' + | 'group_available_ratio' + | 'group_rate_limit_ratio' + | 'account_rate_limited_count' + | 'account_error_count' + | 'account_error_ratio' + | 'overload_account_count' export type Operator = '>' | '>=' | '<' | '<=' | '==' | '!=' export interface AlertRule { diff --git a/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue b/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue index edf8c40c..bc6d4948 100644 --- a/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue +++ b/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue @@ -4,7 +4,7 @@ import { useI18n } from 'vue-i18n' import { useAppStore } from '@/stores/app' import BaseDialog from '@/components/common/BaseDialog.vue' import ConfirmDialog from '@/components/common/ConfirmDialog.vue' -import Select from '@/components/common/Select.vue' +import Select, { type SelectOption } from '@/components/common/Select.vue' import { opsAPI } from '@/api/admin/ops' import type { AlertRule, MetricType, Operator } from '../types' import type { OpsSeverity } from '@/api/admin/ops' @@ -42,17 +42,50 @@ const saving = ref(false) const editingId = ref(null) const draft = ref(null) +type MetricGroup = 'system' | 'group' | 'account' + +const metricDefinitions = computed(() => { + return [ + // System-level metrics + { type: 'success_rate' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.successRate') }, + { type: 'error_rate' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.errorRate') }, + { type: 'upstream_error_rate' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.upstreamErrorRate') }, + { type: 'p95_latency_ms' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.p95') }, + { type: 'p99_latency_ms' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.p99') }, + { type: 'cpu_usage_percent' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.cpu') }, + { type: 'memory_usage_percent' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.memory') }, + { type: 'concurrency_queue_depth' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.queueDepth') }, + + // Group-level metrics (requires group_id filter) + { type: 'group_available_accounts' as MetricType, group: 'group' as const, label: t('admin.ops.alertRules.metrics.groupAvailableAccounts') }, + { type: 'group_available_ratio' as MetricType, group: 'group' as const, label: t('admin.ops.alertRules.metrics.groupAvailableRatio') }, + { type: 'group_rate_limit_ratio' as MetricType, group: 'group' as const, label: t('admin.ops.alertRules.metrics.groupRateLimitRatio') }, + + // Account-level metrics + { type: 'account_rate_limited_count' as MetricType, group: 'account' as const, label: t('admin.ops.alertRules.metrics.accountRateLimitedCount') }, + { type: 'account_error_count' as MetricType, group: 'account' as const, label: t('admin.ops.alertRules.metrics.accountErrorCount') }, + { type: 'account_error_ratio' as MetricType, group: 'account' as const, label: t('admin.ops.alertRules.metrics.accountErrorRatio') }, + { type: 'overload_account_count' as MetricType, group: 'account' as const, label: t('admin.ops.alertRules.metrics.overloadAccountCount') } + ] satisfies Array<{ type: MetricType; group: MetricGroup; label: string }> +}) + const metricOptions = computed(() => { - const items: Array<{ value: MetricType; label: string }> = [ - { value: 'success_rate', label: t('admin.ops.alertRules.metrics.successRate') }, - { value: 'error_rate', label: t('admin.ops.alertRules.metrics.errorRate') }, - { value: 'p95_latency_ms', label: t('admin.ops.alertRules.metrics.p95') }, - { value: 'p99_latency_ms', label: t('admin.ops.alertRules.metrics.p99') }, - { value: 'cpu_usage_percent', label: t('admin.ops.alertRules.metrics.cpu') }, - { value: 'memory_usage_percent', label: t('admin.ops.alertRules.metrics.memory') }, - { value: 'concurrency_queue_depth', label: t('admin.ops.alertRules.metrics.queueDepth') } - ] - return items + const buildGroup = (group: MetricGroup): SelectOption[] => { + const items = metricDefinitions.value.filter((m) => m.group === group) + if (items.length === 0) return [] + const headerValue = `__group__${group}` + return [ + { + value: headerValue, + label: t(`admin.ops.alertRules.metricGroups.${group}`), + disabled: true, + kind: 'group' + }, + ...items.map((m) => ({ value: m.type, label: m.label })) + ] + } + + return [...buildGroup('system'), ...buildGroup('group'), ...buildGroup('account')] }) const operatorOptions = computed(() => {