feat(monitor): admin channel monitor MVP with SSRF protection and batch aggregation
新增 admin「渠道监控」模块(参考 BingZi-233/check-cx),独立于现有 Channel 体系。
admin 配置 + 后台定时调用上游 LLM chat completions 健康检查 + 所有登录用户只读可见。
后端:
- ent: channel_monitor + channel_monitor_history(AES-256-GCM 加密 api_key)
- service 按职责拆分:service/aggregator/validate/checker/runner/ssrf
- provider strategy map 替代 switch(openai/anthropic/gemini)
- repository batch 聚合(ListLatestForMonitorIDs + ComputeAvailabilityForMonitors)消除 N+1
- runner: ticker(5s) + pond worker pool(5) + inFlight 防并发 + TrySubmit 防雪崩
+ 凌晨 3 点 cron 清理 30 天历史
- SSRF 防护:强制 https + 私网/loopback/云元数据 IP 拒绝(127/8、10/8、172.16/12、
192.168/16、169.254/16、100.64/10、::1、fc00::/7、fe80::/10)+ DialContext
在 socket 层防 DNS rebinding
- API key sanitize:擦除 url.Error 与上游响应 body 中的 sk-/sk-ant-/AIza/JWT 模式
- APIKeyDecryptFailed 标志位 + 单 monitor 路径检测,避免空 key 调用上游
handler:
- admin: CRUD + 手动触发 + 历史接口(api_key 脱敏)
- user: 只读列表 + 状态详情(去除 api_key/endpoint)
- ParseChannelMonitorID 共用 + dto.ChannelMonitorExtraModelStatus 共用
前端:
- 路由 /admin/channels/{pricing,monitor} + /monitor(用户只读)
- AppSidebar 父项 expandOnly 支持
- ChannelMonitorView 拆为 8 个子组件 + ChannelStatusView 拆出 detail dialog
- composables/useChannelMonitorFormat + constants/channelMonitor 共享
- i18n monitorCommon namespace 消除 admin/user 两 view 重复
合规:所有文件符合 CLAUDE.md(Go ≤ 500 行 / Vue ≤ 300 行 / 函数 ≤ 30 行)
CI: go build / gofmt / golangci-lint(0 issues) / make test-unit / pnpm build 全绿
This commit is contained in:
217
backend/internal/service/channel_monitor_aggregator.go
Normal file
217
backend/internal/service/channel_monitor_aggregator.go
Normal file
@@ -0,0 +1,217 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
)
|
||||
|
||||
// 渠道监控聚合层:把 latest + availability 拼成 admin/user 视图所需的 summary / detail。
|
||||
// 所有方法都遵守"失败仅日志,返回零值"的原则,避免 N+1 查询失败拖垮列表渲染。
|
||||
|
||||
// BatchMonitorStatusSummary 批量聚合多个监控的 latest + 7d 可用率(admin/user list 用,消除 N+1)。
|
||||
// 失败时返回空 map,错误仅日志,不影响列表渲染。
|
||||
//
|
||||
// 参数:
|
||||
// - ids: 要聚合的 monitor ID 列表
|
||||
// - primaryByID: monitor ID -> primary model(用于读 7d 可用率与 latest 状态)
|
||||
// - extrasByID: monitor ID -> extra models 列表(用于读 latest 状态填充 ExtraModels)
|
||||
func (s *ChannelMonitorService) BatchMonitorStatusSummary(
|
||||
ctx context.Context,
|
||||
ids []int64,
|
||||
primaryByID map[int64]string,
|
||||
extrasByID map[int64][]string,
|
||||
) map[int64]MonitorStatusSummary {
|
||||
out := make(map[int64]MonitorStatusSummary, len(ids))
|
||||
if len(ids) == 0 {
|
||||
return out
|
||||
}
|
||||
latestMap, err := s.repo.ListLatestForMonitorIDs(ctx, ids)
|
||||
if err != nil {
|
||||
slog.Warn("channel_monitor: batch load latest failed", "error", err)
|
||||
latestMap = map[int64][]*ChannelMonitorLatest{}
|
||||
}
|
||||
availMap, err := s.repo.ComputeAvailabilityForMonitors(ctx, ids, monitorAvailability7Days)
|
||||
if err != nil {
|
||||
slog.Warn("channel_monitor: batch compute availability failed", "error", err)
|
||||
availMap = map[int64][]*ChannelMonitorAvailability{}
|
||||
}
|
||||
|
||||
for _, id := range ids {
|
||||
out[id] = buildStatusSummary(
|
||||
indexLatestByModel(latestMap[id]),
|
||||
indexAvailabilityByModel(availMap[id]),
|
||||
primaryByID[id],
|
||||
extrasByID[id],
|
||||
)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// ListUserView 用户只读视图:列出所有 enabled 监控的概览。
|
||||
// 使用批量聚合接口避免 N+1:1 次查 monitors,1 次查 latest(所有 monitor),1 次查 availability。
|
||||
func (s *ChannelMonitorService) ListUserView(ctx context.Context) ([]*UserMonitorView, error) {
|
||||
monitors, err := s.repo.ListEnabled(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list enabled monitors: %w", err)
|
||||
}
|
||||
if len(monitors) == 0 {
|
||||
return []*UserMonitorView{}, nil
|
||||
}
|
||||
|
||||
ids := make([]int64, 0, len(monitors))
|
||||
primaryByID := make(map[int64]string, len(monitors))
|
||||
extrasByID := make(map[int64][]string, len(monitors))
|
||||
for _, m := range monitors {
|
||||
ids = append(ids, m.ID)
|
||||
primaryByID[m.ID] = m.PrimaryModel
|
||||
extrasByID[m.ID] = m.ExtraModels
|
||||
}
|
||||
summaries := s.BatchMonitorStatusSummary(ctx, ids, primaryByID, extrasByID)
|
||||
|
||||
views := make([]*UserMonitorView, 0, len(monitors))
|
||||
for _, m := range monitors {
|
||||
summary := summaries[m.ID]
|
||||
views = append(views, buildUserViewFromSummary(m, summary))
|
||||
}
|
||||
return views, nil
|
||||
}
|
||||
|
||||
// GetUserDetail 用户只读视图:单个监控详情(每个模型 7d/15d/30d 可用率与平均延迟)。
|
||||
// 不暴露 api_key。
|
||||
func (s *ChannelMonitorService) GetUserDetail(ctx context.Context, id int64) (*UserMonitorDetail, error) {
|
||||
m, err := s.repo.GetByID(ctx, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if !m.Enabled {
|
||||
return nil, ErrChannelMonitorNotFound
|
||||
}
|
||||
|
||||
latest, err := s.repo.ListLatestPerModel(ctx, id)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("list latest per model: %w", err)
|
||||
}
|
||||
availMap, err := s.collectAvailabilityWindows(ctx, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
models := mergeModelDetails(m, latest, availMap)
|
||||
return &UserMonitorDetail{
|
||||
ID: m.ID,
|
||||
Name: m.Name,
|
||||
Provider: m.Provider,
|
||||
GroupName: m.GroupName,
|
||||
Models: models,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// collectAvailabilityWindows 一次性查询 7/15/30 天三个窗口,按模型组织。
|
||||
func (s *ChannelMonitorService) collectAvailabilityWindows(ctx context.Context, monitorID int64) (map[int]map[string]*ChannelMonitorAvailability, error) {
|
||||
out := make(map[int]map[string]*ChannelMonitorAvailability, 3)
|
||||
windows := []int{monitorAvailability7Days, monitorAvailability15Days, monitorAvailability30Days}
|
||||
for _, w := range windows {
|
||||
rows, err := s.repo.ComputeAvailability(ctx, monitorID, w)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("compute availability %dd: %w", w, err)
|
||||
}
|
||||
out[w] = indexAvailabilityByModel(rows)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// ---------- 纯函数 helper(无 IO,可在 batch / 单 monitor / detail 路径复用)----------
|
||||
|
||||
// indexLatestByModel 把 latest 切片按 model 索引(小工具,避免在 hot path 重复写)。
|
||||
func indexLatestByModel(rows []*ChannelMonitorLatest) map[string]*ChannelMonitorLatest {
|
||||
m := make(map[string]*ChannelMonitorLatest, len(rows))
|
||||
for _, r := range rows {
|
||||
m[r.Model] = r
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// indexAvailabilityByModel 把 availability 切片按 model 索引。
|
||||
func indexAvailabilityByModel(rows []*ChannelMonitorAvailability) map[string]*ChannelMonitorAvailability {
|
||||
m := make(map[string]*ChannelMonitorAvailability, len(rows))
|
||||
for _, r := range rows {
|
||||
m[r.Model] = r
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// buildStatusSummary 由 latest + availability 字典构造 MonitorStatusSummary。
|
||||
// 不做任何 IO,纯组装,便于在 batch 与单 monitor 路径复用。
|
||||
func buildStatusSummary(
|
||||
latestByModel map[string]*ChannelMonitorLatest,
|
||||
availByModel map[string]*ChannelMonitorAvailability,
|
||||
primary string,
|
||||
extras []string,
|
||||
) MonitorStatusSummary {
|
||||
summary := MonitorStatusSummary{ExtraModels: make([]ExtraModelStatus, 0, len(extras))}
|
||||
if primary != "" {
|
||||
if l, ok := latestByModel[primary]; ok {
|
||||
summary.PrimaryStatus = l.Status
|
||||
summary.PrimaryLatencyMs = l.LatencyMs
|
||||
}
|
||||
if a, ok := availByModel[primary]; ok {
|
||||
summary.Availability7d = a.AvailabilityPct
|
||||
}
|
||||
}
|
||||
for _, model := range extras {
|
||||
entry := ExtraModelStatus{Model: model}
|
||||
if l, ok := latestByModel[model]; ok {
|
||||
entry.Status = l.Status
|
||||
entry.LatencyMs = l.LatencyMs
|
||||
}
|
||||
summary.ExtraModels = append(summary.ExtraModels, entry)
|
||||
}
|
||||
return summary
|
||||
}
|
||||
|
||||
// buildUserViewFromSummary 用预聚合好的 MonitorStatusSummary 装填 UserMonitorView(无 IO)。
|
||||
func buildUserViewFromSummary(m *ChannelMonitor, summary MonitorStatusSummary) *UserMonitorView {
|
||||
return &UserMonitorView{
|
||||
ID: m.ID,
|
||||
Name: m.Name,
|
||||
Provider: m.Provider,
|
||||
GroupName: m.GroupName,
|
||||
PrimaryModel: m.PrimaryModel,
|
||||
PrimaryStatus: summary.PrimaryStatus,
|
||||
PrimaryLatencyMs: summary.PrimaryLatencyMs,
|
||||
Availability7d: summary.Availability7d,
|
||||
ExtraModels: summary.ExtraModels,
|
||||
}
|
||||
}
|
||||
|
||||
// mergeModelDetails 合并 latest + availability 三个窗口为 ModelDetail 列表。
|
||||
// 复用 indexLatestByModel,避免在多处重复写 build map 逻辑。
|
||||
func mergeModelDetails(
|
||||
m *ChannelMonitor,
|
||||
latest []*ChannelMonitorLatest,
|
||||
availMap map[int]map[string]*ChannelMonitorAvailability,
|
||||
) []ModelDetail {
|
||||
all := append([]string{m.PrimaryModel}, m.ExtraModels...)
|
||||
latestByModel := indexLatestByModel(latest)
|
||||
out := make([]ModelDetail, 0, len(all))
|
||||
for _, model := range all {
|
||||
d := ModelDetail{Model: model}
|
||||
if l, ok := latestByModel[model]; ok {
|
||||
d.LatestStatus = l.Status
|
||||
d.LatestLatencyMs = l.LatencyMs
|
||||
}
|
||||
if a, ok := availMap[monitorAvailability7Days][model]; ok {
|
||||
d.Availability7d = a.AvailabilityPct
|
||||
d.AvgLatency7dMs = a.AvgLatencyMs
|
||||
}
|
||||
if a, ok := availMap[monitorAvailability15Days][model]; ok {
|
||||
d.Availability15d = a.AvailabilityPct
|
||||
}
|
||||
if a, ok := availMap[monitorAvailability30Days][model]; ok {
|
||||
d.Availability30d = a.AvailabilityPct
|
||||
}
|
||||
out = append(out, d)
|
||||
}
|
||||
return out
|
||||
}
|
||||
Reference in New Issue
Block a user