feat(ops): 添加分组和账号级别监控指标
- 后端新增 GetAccountAvailability 方法获取账号可用性数据 - 添加分组可用率和限流率计算辅助函数 - 前端支持分组和账号级别的监控指标类型 - 优化警报规则指标选择器,按类别分组显示
This commit is contained in:
@@ -2,6 +2,7 @@ package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
)
|
||||
|
||||
@@ -155,3 +156,39 @@ func (s *OpsService) GetAccountAvailabilityStats(ctx context.Context, platformFi
|
||||
|
||||
return platform, group, account, &collectedAt, nil
|
||||
}
|
||||
|
||||
type OpsAccountAvailability struct {
|
||||
Group *GroupAvailability
|
||||
Accounts map[int64]*AccountAvailability
|
||||
CollectedAt *time.Time
|
||||
}
|
||||
|
||||
func (s *OpsService) GetAccountAvailability(ctx context.Context, platformFilter string, groupIDFilter *int64) (*OpsAccountAvailability, error) {
|
||||
if s == nil {
|
||||
return nil, errors.New("ops service is nil")
|
||||
}
|
||||
|
||||
if s.getAccountAvailability != nil {
|
||||
return s.getAccountAvailability(ctx, platformFilter, groupIDFilter)
|
||||
}
|
||||
|
||||
_, groupStats, accountStats, collectedAt, err := s.GetAccountAvailabilityStats(ctx, platformFilter, groupIDFilter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var group *GroupAvailability
|
||||
if groupIDFilter != nil && *groupIDFilter > 0 {
|
||||
group = groupStats[*groupIDFilter]
|
||||
}
|
||||
|
||||
if accountStats == nil {
|
||||
accountStats = map[int64]*AccountAvailability{}
|
||||
}
|
||||
|
||||
return &OpsAccountAvailability{
|
||||
Group: group,
|
||||
Accounts: accountStats,
|
||||
CollectedAt: collectedAt,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -838,3 +838,38 @@ func (l *slidingWindowLimiter) Allow(now time.Time) bool {
|
||||
l.sent = append(l.sent, now)
|
||||
return true
|
||||
}
|
||||
|
||||
// computeGroupAvailableRatio returns the available percentage for a group.
|
||||
// Formula: (AvailableCount / TotalAccounts) * 100.
|
||||
// Returns 0 when TotalAccounts is 0.
|
||||
func computeGroupAvailableRatio(group *GroupAvailability) float64 {
|
||||
if group == nil || group.TotalAccounts <= 0 {
|
||||
return 0
|
||||
}
|
||||
return (float64(group.AvailableCount) / float64(group.TotalAccounts)) * 100
|
||||
}
|
||||
|
||||
// computeGroupRateLimitRatio returns the rate-limited percentage for a group.
|
||||
// Formula: (RateLimitCount / TotalAccounts) * 100.
|
||||
// Returns 0 when TotalAccounts is 0.
|
||||
func computeGroupRateLimitRatio(group *GroupAvailability) float64 {
|
||||
if group == nil || group.TotalAccounts <= 0 {
|
||||
return 0
|
||||
}
|
||||
return (float64(group.RateLimitCount) / float64(group.TotalAccounts)) * 100
|
||||
}
|
||||
|
||||
// countAccountsByCondition counts accounts that satisfy the given condition.
|
||||
// It iterates over accounts and applies the predicate to each entry.
|
||||
func countAccountsByCondition(accounts map[int64]*AccountAvailability, condition func(*AccountAvailability) bool) int64 {
|
||||
if len(accounts) == 0 || condition == nil {
|
||||
return 0
|
||||
}
|
||||
var count int64
|
||||
for _, account := range accounts {
|
||||
if account != nil && condition(account) {
|
||||
count++
|
||||
}
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
@@ -28,6 +28,9 @@ type OpsService struct {
|
||||
|
||||
accountRepo AccountRepository
|
||||
|
||||
// getAccountAvailability is a unit-test hook for overriding account availability lookup.
|
||||
getAccountAvailability func(ctx context.Context, platformFilter string, groupIDFilter *int64) (*OpsAccountAvailability, error)
|
||||
|
||||
concurrencyService *ConcurrencyService
|
||||
gatewayService *GatewayService
|
||||
openAIGatewayService *OpenAIGatewayService
|
||||
|
||||
@@ -592,6 +592,13 @@ export type MetricType =
|
||||
| 'cpu_usage_percent'
|
||||
| 'memory_usage_percent'
|
||||
| 'concurrency_queue_depth'
|
||||
| 'group_available_accounts'
|
||||
| 'group_available_ratio'
|
||||
| 'group_rate_limit_ratio'
|
||||
| 'account_rate_limited_count'
|
||||
| 'account_error_count'
|
||||
| 'account_error_ratio'
|
||||
| 'overload_account_count'
|
||||
export type Operator = '>' | '>=' | '<' | '<=' | '==' | '!='
|
||||
|
||||
export interface AlertRule {
|
||||
|
||||
@@ -4,7 +4,7 @@ import { useI18n } from 'vue-i18n'
|
||||
import { useAppStore } from '@/stores/app'
|
||||
import BaseDialog from '@/components/common/BaseDialog.vue'
|
||||
import ConfirmDialog from '@/components/common/ConfirmDialog.vue'
|
||||
import Select from '@/components/common/Select.vue'
|
||||
import Select, { type SelectOption } from '@/components/common/Select.vue'
|
||||
import { opsAPI } from '@/api/admin/ops'
|
||||
import type { AlertRule, MetricType, Operator } from '../types'
|
||||
import type { OpsSeverity } from '@/api/admin/ops'
|
||||
@@ -42,17 +42,50 @@ const saving = ref(false)
|
||||
const editingId = ref<number | null>(null)
|
||||
const draft = ref<AlertRule | null>(null)
|
||||
|
||||
type MetricGroup = 'system' | 'group' | 'account'
|
||||
|
||||
const metricDefinitions = computed(() => {
|
||||
return [
|
||||
// System-level metrics
|
||||
{ type: 'success_rate' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.successRate') },
|
||||
{ type: 'error_rate' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.errorRate') },
|
||||
{ type: 'upstream_error_rate' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.upstreamErrorRate') },
|
||||
{ type: 'p95_latency_ms' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.p95') },
|
||||
{ type: 'p99_latency_ms' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.p99') },
|
||||
{ type: 'cpu_usage_percent' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.cpu') },
|
||||
{ type: 'memory_usage_percent' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.memory') },
|
||||
{ type: 'concurrency_queue_depth' as MetricType, group: 'system' as const, label: t('admin.ops.alertRules.metrics.queueDepth') },
|
||||
|
||||
// Group-level metrics (requires group_id filter)
|
||||
{ type: 'group_available_accounts' as MetricType, group: 'group' as const, label: t('admin.ops.alertRules.metrics.groupAvailableAccounts') },
|
||||
{ type: 'group_available_ratio' as MetricType, group: 'group' as const, label: t('admin.ops.alertRules.metrics.groupAvailableRatio') },
|
||||
{ type: 'group_rate_limit_ratio' as MetricType, group: 'group' as const, label: t('admin.ops.alertRules.metrics.groupRateLimitRatio') },
|
||||
|
||||
// Account-level metrics
|
||||
{ type: 'account_rate_limited_count' as MetricType, group: 'account' as const, label: t('admin.ops.alertRules.metrics.accountRateLimitedCount') },
|
||||
{ type: 'account_error_count' as MetricType, group: 'account' as const, label: t('admin.ops.alertRules.metrics.accountErrorCount') },
|
||||
{ type: 'account_error_ratio' as MetricType, group: 'account' as const, label: t('admin.ops.alertRules.metrics.accountErrorRatio') },
|
||||
{ type: 'overload_account_count' as MetricType, group: 'account' as const, label: t('admin.ops.alertRules.metrics.overloadAccountCount') }
|
||||
] satisfies Array<{ type: MetricType; group: MetricGroup; label: string }>
|
||||
})
|
||||
|
||||
const metricOptions = computed(() => {
|
||||
const items: Array<{ value: MetricType; label: string }> = [
|
||||
{ value: 'success_rate', label: t('admin.ops.alertRules.metrics.successRate') },
|
||||
{ value: 'error_rate', label: t('admin.ops.alertRules.metrics.errorRate') },
|
||||
{ value: 'p95_latency_ms', label: t('admin.ops.alertRules.metrics.p95') },
|
||||
{ value: 'p99_latency_ms', label: t('admin.ops.alertRules.metrics.p99') },
|
||||
{ value: 'cpu_usage_percent', label: t('admin.ops.alertRules.metrics.cpu') },
|
||||
{ value: 'memory_usage_percent', label: t('admin.ops.alertRules.metrics.memory') },
|
||||
{ value: 'concurrency_queue_depth', label: t('admin.ops.alertRules.metrics.queueDepth') }
|
||||
]
|
||||
return items
|
||||
const buildGroup = (group: MetricGroup): SelectOption[] => {
|
||||
const items = metricDefinitions.value.filter((m) => m.group === group)
|
||||
if (items.length === 0) return []
|
||||
const headerValue = `__group__${group}`
|
||||
return [
|
||||
{
|
||||
value: headerValue,
|
||||
label: t(`admin.ops.alertRules.metricGroups.${group}`),
|
||||
disabled: true,
|
||||
kind: 'group'
|
||||
},
|
||||
...items.map((m) => ({ value: m.type, label: m.label }))
|
||||
]
|
||||
}
|
||||
|
||||
return [...buildGroup('system'), ...buildGroup('group'), ...buildGroup('account')]
|
||||
})
|
||||
|
||||
const operatorOptions = computed(() => {
|
||||
|
||||
Reference in New Issue
Block a user