Merge pull request #251 from IanShaw027/fix/ops-bugs

feat(ops): 运维看板功能增强 - 实时流量监控与指标阈值配置
This commit is contained in:
Wesley Liddick
2026-01-12 15:26:26 +08:00
committed by GitHub
22 changed files with 1291 additions and 413 deletions

View File

@@ -118,3 +118,96 @@ func (h *OpsHandler) GetAccountAvailability(c *gin.Context) {
}
response.Success(c, payload)
}
func parseOpsRealtimeWindow(v string) (time.Duration, string, bool) {
switch strings.ToLower(strings.TrimSpace(v)) {
case "", "1min", "1m":
return 1 * time.Minute, "1min", true
case "5min", "5m":
return 5 * time.Minute, "5min", true
case "30min", "30m":
return 30 * time.Minute, "30min", true
case "1h", "60m", "60min":
return 1 * time.Hour, "1h", true
default:
return 0, "", false
}
}
// GetRealtimeTrafficSummary returns QPS/TPS current/peak/avg for the selected window.
// GET /api/v1/admin/ops/realtime-traffic
//
// Query params:
// - window: 1min|5min|30min|1h (default: 1min)
// - platform: optional
// - group_id: optional
func (h *OpsHandler) GetRealtimeTrafficSummary(c *gin.Context) {
if h.opsService == nil {
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
return
}
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
response.ErrorFrom(c, err)
return
}
windowDur, windowLabel, ok := parseOpsRealtimeWindow(c.Query("window"))
if !ok {
response.BadRequest(c, "Invalid window")
return
}
platform := strings.TrimSpace(c.Query("platform"))
var groupID *int64
if v := strings.TrimSpace(c.Query("group_id")); v != "" {
id, err := strconv.ParseInt(v, 10, 64)
if err != nil || id <= 0 {
response.BadRequest(c, "Invalid group_id")
return
}
groupID = &id
}
endTime := time.Now().UTC()
startTime := endTime.Add(-windowDur)
if !h.opsService.IsRealtimeMonitoringEnabled(c.Request.Context()) {
disabledSummary := &service.OpsRealtimeTrafficSummary{
Window: windowLabel,
StartTime: startTime,
EndTime: endTime,
Platform: platform,
GroupID: groupID,
QPS: service.OpsRateSummary{},
TPS: service.OpsRateSummary{},
}
response.Success(c, gin.H{
"enabled": false,
"summary": disabledSummary,
"timestamp": endTime,
})
return
}
filter := &service.OpsDashboardFilter{
StartTime: startTime,
EndTime: endTime,
Platform: platform,
GroupID: groupID,
QueryMode: service.OpsQueryModeRaw,
}
summary, err := h.opsService.GetRealtimeTrafficSummary(c.Request.Context(), filter)
if err != nil {
response.ErrorFrom(c, err)
return
}
if summary != nil {
summary.Window = windowLabel
}
response.Success(c, gin.H{
"enabled": true,
"summary": summary,
"timestamp": endTime,
})
}

View File

@@ -146,3 +146,49 @@ func (h *OpsHandler) UpdateAdvancedSettings(c *gin.Context) {
}
response.Success(c, updated)
}
// GetMetricThresholds returns Ops metric thresholds (DB-backed).
// GET /api/v1/admin/ops/settings/metric-thresholds
func (h *OpsHandler) GetMetricThresholds(c *gin.Context) {
if h.opsService == nil {
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
return
}
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
response.ErrorFrom(c, err)
return
}
cfg, err := h.opsService.GetMetricThresholds(c.Request.Context())
if err != nil {
response.Error(c, http.StatusInternalServerError, "Failed to get metric thresholds")
return
}
response.Success(c, cfg)
}
// UpdateMetricThresholds updates Ops metric thresholds (DB-backed).
// PUT /api/v1/admin/ops/settings/metric-thresholds
func (h *OpsHandler) UpdateMetricThresholds(c *gin.Context) {
if h.opsService == nil {
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
return
}
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
response.ErrorFrom(c, err)
return
}
var req service.OpsMetricThresholds
if err := c.ShouldBindJSON(&req); err != nil {
response.BadRequest(c, "Invalid request body")
return
}
updated, err := h.opsService.UpdateMetricThresholds(c.Request.Context(), &req)
if err != nil {
response.Error(c, http.StatusBadRequest, err.Error())
return
}
response.Success(c, updated)
}

View File

@@ -0,0 +1,129 @@
package repository
import (
"context"
"fmt"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/service"
)
func (r *opsRepository) GetRealtimeTrafficSummary(ctx context.Context, filter *service.OpsDashboardFilter) (*service.OpsRealtimeTrafficSummary, error) {
if r == nil || r.db == nil {
return nil, fmt.Errorf("nil ops repository")
}
if filter == nil {
return nil, fmt.Errorf("nil filter")
}
if filter.StartTime.IsZero() || filter.EndTime.IsZero() {
return nil, fmt.Errorf("start_time/end_time required")
}
start := filter.StartTime.UTC()
end := filter.EndTime.UTC()
if start.After(end) {
return nil, fmt.Errorf("start_time must be <= end_time")
}
window := end.Sub(start)
if window <= 0 {
return nil, fmt.Errorf("invalid time window")
}
if window > time.Hour {
return nil, fmt.Errorf("window too large")
}
usageJoin, usageWhere, usageArgs, next := buildUsageWhere(filter, start, end, 1)
errorWhere, errorArgs, _ := buildErrorWhere(filter, start, end, next)
q := `
WITH usage_buckets AS (
SELECT
date_trunc('minute', ul.created_at) AS bucket,
COALESCE(COUNT(*), 0) AS success_count,
COALESCE(SUM(input_tokens + output_tokens + cache_creation_tokens + cache_read_tokens), 0) AS token_sum
FROM usage_logs ul
` + usageJoin + `
` + usageWhere + `
GROUP BY 1
),
error_buckets AS (
SELECT
date_trunc('minute', created_at) AS bucket,
COALESCE(COUNT(*), 0) AS error_count
FROM ops_error_logs
` + errorWhere + `
AND COALESCE(status_code, 0) >= 400
GROUP BY 1
),
combined AS (
SELECT
COALESCE(u.bucket, e.bucket) AS bucket,
COALESCE(u.success_count, 0) AS success_count,
COALESCE(u.token_sum, 0) AS token_sum,
COALESCE(e.error_count, 0) AS error_count,
COALESCE(u.success_count, 0) + COALESCE(e.error_count, 0) AS request_total
FROM usage_buckets u
FULL OUTER JOIN error_buckets e ON u.bucket = e.bucket
)
SELECT
COALESCE(SUM(success_count), 0) AS success_total,
COALESCE(SUM(error_count), 0) AS error_total,
COALESCE(SUM(token_sum), 0) AS token_total,
COALESCE(MAX(request_total), 0) AS peak_requests_per_min,
COALESCE(MAX(token_sum), 0) AS peak_tokens_per_min
FROM combined`
args := append(usageArgs, errorArgs...)
var successCount int64
var errorTotal int64
var tokenConsumed int64
var peakRequestsPerMin int64
var peakTokensPerMin int64
if err := r.db.QueryRowContext(ctx, q, args...).Scan(
&successCount,
&errorTotal,
&tokenConsumed,
&peakRequestsPerMin,
&peakTokensPerMin,
); err != nil {
return nil, err
}
windowSeconds := window.Seconds()
if windowSeconds <= 0 {
windowSeconds = 1
}
requestCountTotal := successCount + errorTotal
qpsAvg := roundTo1DP(float64(requestCountTotal) / windowSeconds)
tpsAvg := roundTo1DP(float64(tokenConsumed) / windowSeconds)
// Keep "current" consistent with the dashboard overview semantics: last 1 minute.
// This remains "within the selected window" since end=start+window.
qpsCurrent, tpsCurrent, err := r.queryCurrentRates(ctx, filter, end)
if err != nil {
return nil, err
}
qpsPeak := roundTo1DP(float64(peakRequestsPerMin) / 60.0)
tpsPeak := roundTo1DP(float64(peakTokensPerMin) / 60.0)
return &service.OpsRealtimeTrafficSummary{
StartTime: start,
EndTime: end,
Platform: strings.TrimSpace(filter.Platform),
GroupID: filter.GroupID,
QPS: service.OpsRateSummary{
Current: qpsCurrent,
Peak: qpsPeak,
Avg: qpsAvg,
},
TPS: service.OpsRateSummary{
Current: tpsCurrent,
Peak: tpsPeak,
Avg: tpsAvg,
},
}, nil
}

View File

@@ -73,6 +73,7 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
// Realtime ops signals
ops.GET("/concurrency", h.Admin.Ops.GetConcurrencyStats)
ops.GET("/account-availability", h.Admin.Ops.GetAccountAvailability)
ops.GET("/realtime-traffic", h.Admin.Ops.GetRealtimeTrafficSummary)
// Alerts (rules + events)
ops.GET("/alert-rules", h.Admin.Ops.ListAlertRules)
@@ -96,6 +97,13 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
ops.GET("/advanced-settings", h.Admin.Ops.GetAdvancedSettings)
ops.PUT("/advanced-settings", h.Admin.Ops.UpdateAdvancedSettings)
// Settings group (DB-backed)
settings := ops.Group("/settings")
{
settings.GET("/metric-thresholds", h.Admin.Ops.GetMetricThresholds)
settings.PUT("/metric-thresholds", h.Admin.Ops.UpdateMetricThresholds)
}
// WebSocket realtime (QPS/TPS)
ws := ops.Group("/ws")
{

View File

@@ -17,6 +17,8 @@ type OpsRepository interface {
// Lightweight window stats (for realtime WS / quick sampling).
GetWindowStats(ctx context.Context, filter *OpsDashboardFilter) (*OpsWindowStats, error)
// Lightweight realtime traffic summary (for the Ops dashboard header card).
GetRealtimeTrafficSummary(ctx context.Context, filter *OpsDashboardFilter) (*OpsRealtimeTrafficSummary, error)
GetDashboardOverview(ctx context.Context, filter *OpsDashboardFilter) (*OpsDashboardOverview, error)
GetThroughputTrend(ctx context.Context, filter *OpsDashboardFilter, bucketSeconds int) (*OpsThroughputTrendResponse, error)

View File

@@ -0,0 +1,36 @@
package service
import (
"context"
"time"
infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
)
// GetRealtimeTrafficSummary returns QPS/TPS current/peak/avg for the provided window.
// This is used by the Ops dashboard "Realtime Traffic" card and is intentionally lightweight.
func (s *OpsService) GetRealtimeTrafficSummary(ctx context.Context, filter *OpsDashboardFilter) (*OpsRealtimeTrafficSummary, error) {
if err := s.RequireMonitoringEnabled(ctx); err != nil {
return nil, err
}
if s.opsRepo == nil {
return nil, infraerrors.ServiceUnavailable("OPS_REPO_UNAVAILABLE", "Ops repository not available")
}
if filter == nil {
return nil, infraerrors.BadRequest("OPS_FILTER_REQUIRED", "filter is required")
}
if filter.StartTime.IsZero() || filter.EndTime.IsZero() {
return nil, infraerrors.BadRequest("OPS_TIME_RANGE_REQUIRED", "start_time/end_time are required")
}
if filter.StartTime.After(filter.EndTime) {
return nil, infraerrors.BadRequest("OPS_TIME_RANGE_INVALID", "start_time must be <= end_time")
}
if filter.EndTime.Sub(filter.StartTime) > time.Hour {
return nil, infraerrors.BadRequest("OPS_TIME_RANGE_TOO_LARGE", "invalid time range: max window is 1 hour")
}
// Realtime traffic summary always uses raw logs (minute granularity peaks).
filter.QueryMode = OpsQueryModeRaw
return s.opsRepo.GetRealtimeTrafficSummary(ctx, filter)
}

View File

@@ -0,0 +1,19 @@
package service
import "time"
// OpsRealtimeTrafficSummary is a lightweight summary used by the Ops dashboard "Realtime Traffic" card.
// It reports QPS/TPS current/peak/avg for the requested time window.
type OpsRealtimeTrafficSummary struct {
// Window is a normalized label (e.g. "1min", "5min", "30min", "1h").
Window string `json:"window"`
StartTime time.Time `json:"start_time"`
EndTime time.Time `json:"end_time"`
Platform string `json:"platform"`
GroupID *int64 `json:"group_id"`
QPS OpsRateSummary `json:"qps"`
TPS OpsRateSummary `json:"tps"`
}

View File

@@ -463,3 +463,93 @@ func (s *OpsService) UpdateOpsAdvancedSettings(ctx context.Context, cfg *OpsAdva
_ = json.Unmarshal(raw, updated)
return updated, nil
}
// =========================
// Metric thresholds
// =========================
const SettingKeyOpsMetricThresholds = "ops_metric_thresholds"
func defaultOpsMetricThresholds() *OpsMetricThresholds {
slaMin := 99.5
latencyMax := 2000.0
ttftMax := 500.0
reqErrMax := 5.0
upstreamErrMax := 5.0
return &OpsMetricThresholds{
SLAPercentMin: &slaMin,
LatencyP99MsMax: &latencyMax,
TTFTp99MsMax: &ttftMax,
RequestErrorRatePercentMax: &reqErrMax,
UpstreamErrorRatePercentMax: &upstreamErrMax,
}
}
func (s *OpsService) GetMetricThresholds(ctx context.Context) (*OpsMetricThresholds, error) {
defaultCfg := defaultOpsMetricThresholds()
if s == nil || s.settingRepo == nil {
return defaultCfg, nil
}
if ctx == nil {
ctx = context.Background()
}
raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsMetricThresholds)
if err != nil {
if errors.Is(err, ErrSettingNotFound) {
if b, mErr := json.Marshal(defaultCfg); mErr == nil {
_ = s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(b))
}
return defaultCfg, nil
}
return nil, err
}
cfg := &OpsMetricThresholds{}
if err := json.Unmarshal([]byte(raw), cfg); err != nil {
return defaultCfg, nil
}
return cfg, nil
}
func (s *OpsService) UpdateMetricThresholds(ctx context.Context, cfg *OpsMetricThresholds) (*OpsMetricThresholds, error) {
if s == nil || s.settingRepo == nil {
return nil, errors.New("setting repository not initialized")
}
if ctx == nil {
ctx = context.Background()
}
if cfg == nil {
return nil, errors.New("invalid config")
}
// Validate thresholds
if cfg.SLAPercentMin != nil && (*cfg.SLAPercentMin < 0 || *cfg.SLAPercentMin > 100) {
return nil, errors.New("sla_percent_min must be between 0 and 100")
}
if cfg.LatencyP99MsMax != nil && *cfg.LatencyP99MsMax < 0 {
return nil, errors.New("latency_p99_ms_max must be >= 0")
}
if cfg.TTFTp99MsMax != nil && *cfg.TTFTp99MsMax < 0 {
return nil, errors.New("ttft_p99_ms_max must be >= 0")
}
if cfg.RequestErrorRatePercentMax != nil && (*cfg.RequestErrorRatePercentMax < 0 || *cfg.RequestErrorRatePercentMax > 100) {
return nil, errors.New("request_error_rate_percent_max must be between 0 and 100")
}
if cfg.UpstreamErrorRatePercentMax != nil && (*cfg.UpstreamErrorRatePercentMax < 0 || *cfg.UpstreamErrorRatePercentMax > 100) {
return nil, errors.New("upstream_error_rate_percent_max must be between 0 and 100")
}
raw, err := json.Marshal(cfg)
if err != nil {
return nil, err
}
if err := s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(raw)); err != nil {
return nil, err
}
updated := &OpsMetricThresholds{}
_ = json.Unmarshal(raw, updated)
return updated, nil
}

View File

@@ -61,11 +61,20 @@ type OpsAlertSilencingSettings struct {
Entries []OpsAlertSilenceEntry `json:"entries,omitempty"`
}
type OpsMetricThresholds struct {
SLAPercentMin *float64 `json:"sla_percent_min,omitempty"` // SLA低于此值变红
LatencyP99MsMax *float64 `json:"latency_p99_ms_max,omitempty"` // 延迟P99高于此值变红
TTFTp99MsMax *float64 `json:"ttft_p99_ms_max,omitempty"` // TTFT P99高于此值变红
RequestErrorRatePercentMax *float64 `json:"request_error_rate_percent_max,omitempty"` // 请求错误率高于此值变红
UpstreamErrorRatePercentMax *float64 `json:"upstream_error_rate_percent_max,omitempty"` // 上游错误率高于此值变红
}
type OpsAlertRuntimeSettings struct {
EvaluationIntervalSeconds int `json:"evaluation_interval_seconds"`
DistributedLock OpsDistributedLockSettings `json:"distributed_lock"`
Silencing OpsAlertSilencingSettings `json:"silencing"`
Thresholds OpsMetricThresholds `json:"thresholds"` // 指标阈值配置
}
// OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).

14
frontend/.eslintignore Normal file
View File

@@ -0,0 +1,14 @@
# 忽略编译后的文件
vite.config.js
vite.config.d.ts
# 忽略依赖
node_modules/
# 忽略构建输出
dist/
../backend/internal/web/dist/
# 忽略缓存
.cache/
.vite/

View File

@@ -362,6 +362,45 @@ export async function getAccountAvailabilityStats(platform?: string, groupId?: n
return data
}
export interface OpsRateSummary {
current: number
peak: number
avg: number
}
export interface OpsRealtimeTrafficSummary {
window: string
start_time: string
end_time: string
platform: string
group_id?: number | null
qps: OpsRateSummary
tps: OpsRateSummary
}
export interface OpsRealtimeTrafficSummaryResponse {
enabled: boolean
summary: OpsRealtimeTrafficSummary | null
timestamp?: string
}
export async function getRealtimeTrafficSummary(
window: string,
platform?: string,
groupId?: number | null
): Promise<OpsRealtimeTrafficSummaryResponse> {
const params: Record<string, any> = { window }
if (platform) {
params.platform = platform
}
if (typeof groupId === 'number' && groupId > 0) {
params.group_id = groupId
}
const { data } = await apiClient.get<OpsRealtimeTrafficSummaryResponse>('/admin/ops/realtime-traffic', { params })
return data
}
/**
* Subscribe to realtime QPS updates via WebSocket.
*
@@ -661,6 +700,14 @@ export interface EmailNotificationConfig {
}
}
export interface OpsMetricThresholds {
sla_percent_min?: number | null // SLA低于此值变红
latency_p99_ms_max?: number | null // 延迟P99高于此值变红
ttft_p99_ms_max?: number | null // TTFT P99高于此值变红
request_error_rate_percent_max?: number | null // 请求错误率高于此值变红
upstream_error_rate_percent_max?: number | null // 上游错误率高于此值变红
}
export interface OpsDistributedLockSettings {
enabled: boolean
key: string
@@ -681,6 +728,7 @@ export interface OpsAlertRuntimeSettings {
reason: string
}>
}
thresholds: OpsMetricThresholds // 指标阈值配置
}
export interface OpsAdvancedSettings {
@@ -929,6 +977,17 @@ export async function updateAdvancedSettings(config: OpsAdvancedSettings): Promi
return data
}
// ==================== Metric Thresholds ====================
async function getMetricThresholds(): Promise<OpsMetricThresholds> {
const { data } = await apiClient.get<OpsMetricThresholds>('/admin/ops/settings/metric-thresholds')
return data
}
async function updateMetricThresholds(thresholds: OpsMetricThresholds): Promise<void> {
await apiClient.put('/admin/ops/settings/metric-thresholds', thresholds)
}
export const opsAPI = {
getDashboardOverview,
getThroughputTrend,
@@ -937,6 +996,7 @@ export const opsAPI = {
getErrorDistribution,
getConcurrencyStats,
getAccountAvailabilityStats,
getRealtimeTrafficSummary,
subscribeQPS,
listErrorLogs,
getErrorLogDetail,
@@ -952,7 +1012,9 @@ export const opsAPI = {
getAlertRuntimeSettings,
updateAlertRuntimeSettings,
getAdvancedSettings,
updateAdvancedSettings
updateAdvancedSettings,
getMetricThresholds,
updateMetricThresholds
}
export default opsAPI

View File

@@ -124,7 +124,8 @@ const icons = {
chatBubble: 'M8 10h.01M12 10h.01M16 10h.01M9 16H5a2 2 0 01-2-2V6a2 2 0 012-2h14a2 2 0 012 2v8a2 2 0 01-2 2h-5l-5 5v-5z',
calculator: 'M9 7h6m0 10v-3m-3 3h.01M9 17h.01M9 14h.01M12 14h.01M15 11h.01M12 11h.01M9 11h.01M7 21h10a2 2 0 002-2V5a2 2 0 00-2-2H7a2 2 0 00-2 2v14a2 2 0 002 2z',
fire: 'M17.657 18.657A8 8 0 016.343 7.343S7 9 9 10c0-2 .5-5 2.986-7C14 5 16.09 5.777 17.656 7.343A7.975 7.975 0 0120 13a7.975 7.975 0 01-2.343 5.657z',
badge: 'M9 12.75L11.25 15 15 9.75M21 12c0 1.268-.63 2.39-1.593 3.068a3.745 3.745 0 01-1.043 3.296 3.745 3.745 0 01-3.296 1.043A3.745 3.745 0 0112 21c-1.268 0-2.39-.63-3.068-1.593a3.746 3.746 0 01-3.296-1.043 3.745 3.745 0 01-1.043-3.296A3.745 3.745 0 013 12c0-1.268.63-2.39 1.593-3.068a3.745 3.745 0 011.043-3.296 3.746 3.746 0 013.296-1.043A3.746 3.746 0 0112 3c1.268 0 2.39.63 3.068 1.593a3.746 3.746 0 013.296 1.043 3.746 3.746 0 011.043 3.296A3.745 3.745 0 0121 12z'
badge: 'M9 12.75L11.25 15 15 9.75M21 12c0 1.268-.63 2.39-1.593 3.068a3.745 3.745 0 01-1.043 3.296 3.745 3.745 0 01-3.296 1.043A3.745 3.745 0 0112 21c-1.268 0-2.39-.63-3.068-1.593a3.746 3.746 0 01-3.296-1.043 3.745 3.745 0 01-1.043-3.296A3.745 3.745 0 013 12c0-1.268.63-2.39 1.593-3.068a3.745 3.745 0 011.043-3.296 3.746 3.746 0 013.296-1.043A3.746 3.746 0 0112 3c1.268 0 2.39.63 3.068 1.593a3.746 3.746 0 013.296 1.043 3.746 3.746 0 011.043 3.296A3.745 3.745 0 0121 12z',
brain: 'M9.75 3.104v5.714a2.25 2.25 0 01-.659 1.591L5 14.5M9.75 3.104c-.251.023-.501.05-.75.082m.75-.082a24.301 24.301 0 014.5 0m0 0v5.714c0 .597.237 1.17.659 1.591L19.8 15.3M14.25 3.104c.251.023.501.05.75.082M19.8 15.3l-1.57.393A9.065 9.065 0 0112 15a9.065 9.065 0 00-6.23.693L5 14.5m0 0l-2.69 2.689c-1.232 1.232-.65 3.318 1.067 3.611A48.309 48.309 0 0012 21c2.773 0 5.491-.235 8.135-.687 1.718-.293 2.3-2.379 1.067-3.61L19.8 15.3M12 8.25a1.5 1.5 0 100-3 1.5 1.5 0 000 3zm0 0v3m-3-1.5a1.5 1.5 0 100-3 1.5 1.5 0 000 3zm0 0h6m-3 4.5a1.5 1.5 0 100-3 1.5 1.5 0 000 3z'
} as const
const iconPath = computed(() => icons[props.name])

View File

@@ -156,6 +156,7 @@ export default {
unknownError: 'Unknown error occurred',
saving: 'Saving...',
selectedCount: '({count} selected)', refresh: 'Refresh',
settings: 'Settings',
notAvailable: 'N/A',
now: 'Now',
unknown: 'Unknown',
@@ -1906,6 +1907,7 @@ export default {
max: 'max:',
qps: 'QPS',
requests: 'Requests',
requestsTitle: 'Requests',
upstream: 'Upstream',
client: 'Client',
system: 'System',
@@ -2118,7 +2120,10 @@ export default {
empty: 'No alert rules',
loadFailed: 'Failed to load alert rules',
saveFailed: 'Failed to save alert rule',
saveSuccess: 'Alert rule saved successfully',
deleteFailed: 'Failed to delete alert rule',
deleteSuccess: 'Alert rule deleted successfully',
manage: 'Manage Alert Rules',
create: 'Create Rule',
createTitle: 'Create Alert Rule',
editTitle: 'Edit Alert Rule',
@@ -2301,6 +2306,54 @@ export default {
accountHealthThresholdRange: 'Account health threshold must be between 0 and 100'
}
},
settings: {
title: 'Ops Monitoring Settings',
loadFailed: 'Failed to load settings',
saveSuccess: 'Ops monitoring settings saved successfully',
saveFailed: 'Failed to save settings',
dataCollection: 'Data Collection',
evaluationInterval: 'Evaluation Interval (seconds)',
evaluationIntervalHint: 'Frequency of detection tasks, recommended to keep default',
alertConfig: 'Alert Configuration',
enableAlert: 'Enable Alerts',
alertRecipients: 'Alert Recipient Emails',
emailPlaceholder: 'Enter email address',
recipientsHint: 'If empty, the system will use the first admin email as default recipient',
minSeverity: 'Minimum Severity',
reportConfig: 'Report Configuration',
enableReport: 'Enable Reports',
reportRecipients: 'Report Recipient Emails',
dailySummary: 'Daily Summary',
weeklySummary: 'Weekly Summary',
metricThresholds: 'Metric Thresholds',
metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red',
slaMinPercent: 'SLA Minimum Percentage',
slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)',
latencyP99MaxMs: 'Latency P99 Maximum (ms)',
latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)',
ttftP99MaxMs: 'TTFT P99 Maximum (ms)',
ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)',
requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)',
requestErrorRateMaxPercentHint: 'Request error rate above this value will be displayed in red (default: 5%)',
upstreamErrorRateMaxPercent: 'Upstream Error Rate Maximum (%)',
upstreamErrorRateMaxPercentHint: 'Upstream error rate above this value will be displayed in red (default: 5%)',
advancedSettings: 'Advanced Settings',
dataRetention: 'Data Retention Policy',
enableCleanup: 'Enable Data Cleanup',
cleanupSchedule: 'Cleanup Schedule (Cron)',
cleanupScheduleHint: 'Example: 0 2 * * * means 2 AM daily',
errorLogRetentionDays: 'Error Log Retention Days',
minuteMetricsRetentionDays: 'Minute Metrics Retention Days',
hourlyMetricsRetentionDays: 'Hourly Metrics Retention Days',
retentionDaysHint: 'Recommended 7-90 days, longer periods will consume more storage',
aggregation: 'Pre-aggregation Tasks',
enableAggregation: 'Enable Pre-aggregation',
aggregationHint: 'Pre-aggregation improves query performance for long time windows',
validation: {
title: 'Please fix the following issues',
retentionDaysRange: 'Retention days must be between 1-365 days'
}
},
concurrency: {
title: 'Concurrency / Queue',
byPlatform: 'By Platform',
@@ -2334,12 +2387,13 @@ export default {
accountError: 'Error'
},
tooltips: {
totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.',
throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
errorDistribution: 'Error distribution by status code.',
goroutines:
'Number of Go runtime goroutines (lightweight threads). There is no absolute safe number—use your historical baseline. Heuristic: <2k is common; 2k8k watch; >8k plus rising queue/latency often suggests blocking/leaks.',
'Number of Go runtime goroutines (lightweight threads). There is no absolute "safe" number—use your historical baseline. Heuristic: <2k is common; 2k8k watch; >8k plus rising queue/latency often suggests blocking/leaks.',
cpu: 'CPU usage percentage, showing system processor load.',
memory: 'Memory usage, including used and total available memory.',
db: 'Database connection pool status, including active, idle, and waiting connections.',
@@ -2349,6 +2403,7 @@ export default {
tokens: 'Total number of tokens processed in the current time window.',
sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
health: 'System health score (0-100), considering SLA, error rate, and resource usage.'

View File

@@ -2022,7 +2022,7 @@ export default {
ready: '就绪',
requestsTotal: '请求(总计)',
slaScope: 'SLA 范围:',
tokens: 'Token',
tokens: 'Token',
tps: 'TPS',
current: '当前',
peak: '峰值',
@@ -2051,7 +2051,8 @@ export default {
avg: 'avg',
max: 'max',
qps: 'QPS',
requests: '请求',
requests: '请求',
requestsTitle: '请求',
upstream: '上游',
client: '客户端',
system: '系统',
@@ -2469,6 +2470,18 @@ export default {
reportRecipients: '评估报告接收邮箱',
dailySummary: '每日摘要',
weeklySummary: '每周摘要',
metricThresholds: '指标阈值配置',
metricThresholdsHint: '配置各项指标的告警阈值,超出阈值时将以红色显示',
slaMinPercent: 'SLA最低百分比',
slaMinPercentHint: 'SLA低于此值时显示为红色默认99.5%',
latencyP99MaxMs: '延迟P99最大值毫秒',
latencyP99MaxMsHint: '延迟P99高于此值时显示为红色默认2000ms',
ttftP99MaxMs: 'TTFT P99最大值毫秒',
ttftP99MaxMsHint: 'TTFT P99高于此值时显示为红色默认500ms',
requestErrorRateMaxPercent: '请求错误率最大值(%',
requestErrorRateMaxPercentHint: '请求错误率高于此值时显示为红色默认5%',
upstreamErrorRateMaxPercent: '上游错误率最大值(%',
upstreamErrorRateMaxPercentHint: '上游错误率高于此值时显示为红色默认5%',
advancedSettings: '高级设置',
dataRetention: '数据保留策略',
enableCleanup: '启用数据清理',

View File

@@ -13,17 +13,13 @@
<OpsDashboardHeader
v-else-if="opsEnabled"
:overview="overview"
:ws-status="wsStatus"
:ws-reconnect-in-ms="wsReconnectInMs"
:ws-has-data="wsHasData"
:real-time-qps="realTimeQPS"
:real-time-tps="realTimeTPS"
:platform="platform"
:group-id="groupId"
:time-range="timeRange"
:query-mode="queryMode"
:loading="loading"
:last-updated="lastUpdated"
:thresholds="metricThresholds"
@update:time-range="onTimeRangeChange"
@update:platform="onPlatformChange"
@update:group="onGroupChange"
@@ -75,7 +71,7 @@
<OpsAlertEventsCard v-if="opsEnabled && !(loading && !hasLoadedOnce)" />
<!-- Settings Dialog -->
<OpsSettingsDialog :show="showSettingsDialog" @close="showSettingsDialog = false" @saved="fetchData" />
<OpsSettingsDialog :show="showSettingsDialog" @close="showSettingsDialog = false" @saved="onSettingsSaved" />
<!-- Alert Rules Dialog -->
<BaseDialog :show="showAlertRulesCard" :title="t('admin.ops.alertRules.title')" width="extra-wide" @close="showAlertRulesCard = false">
@@ -115,13 +111,12 @@ import AppLayout from '@/components/layout/AppLayout.vue'
import BaseDialog from '@/components/common/BaseDialog.vue'
import {
opsAPI,
OPS_WS_CLOSE_CODES,
type OpsWSStatus,
type OpsDashboardOverview,
type OpsErrorDistributionResponse,
type OpsErrorTrendResponse,
type OpsLatencyHistogramResponse,
type OpsThroughputTrendResponse
type OpsThroughputTrendResponse,
type OpsMetricThresholds
} from '@/api/admin/ops'
import { useAdminSettingsStore, useAppStore } from '@/stores'
import OpsDashboardHeader from './components/OpsDashboardHeader.vue'
@@ -172,14 +167,6 @@ const QUERY_KEYS = {
const isApplyingRouteQuery = ref(false)
const isSyncingRouteQuery = ref(false)
// WebSocket for realtime QPS/TPS
const realTimeQPS = ref(0)
const realTimeTPS = ref(0)
const wsStatus = ref<OpsWSStatus>('closed')
const wsReconnectInMs = ref<number | null>(null)
const wsHasData = ref(false)
let unsubscribeQPS: (() => void) | null = null
let dashboardFetchController: AbortController | null = null
let dashboardFetchSeq = 0
@@ -199,50 +186,6 @@ function abortDashboardFetch() {
}
}
function stopQPSSubscription(options?: { resetMetrics?: boolean }) {
wsStatus.value = 'closed'
wsReconnectInMs.value = null
if (unsubscribeQPS) unsubscribeQPS()
unsubscribeQPS = null
if (options?.resetMetrics) {
realTimeQPS.value = 0
realTimeTPS.value = 0
wsHasData.value = false
}
}
function startQPSSubscription() {
stopQPSSubscription()
unsubscribeQPS = opsAPI.subscribeQPS(
(payload) => {
if (payload && typeof payload === 'object' && payload.type === 'qps_update' && payload.data) {
realTimeQPS.value = payload.data.qps || 0
realTimeTPS.value = payload.data.tps || 0
wsHasData.value = true
}
},
{
onStatusChange: (status) => {
wsStatus.value = status
if (status === 'connected') wsReconnectInMs.value = null
},
onReconnectScheduled: ({ delayMs }) => {
wsReconnectInMs.value = delayMs
},
onFatalClose: (event) => {
// Server-side feature flag says realtime is disabled; keep UI consistent and avoid reconnect loops.
if (event && event.code === OPS_WS_CLOSE_CODES.REALTIME_DISABLED) {
adminSettingsStore.setOpsRealtimeMonitoringEnabledLocal(false)
stopQPSSubscription({ resetMetrics: true })
}
},
// QPS updates may be sparse in idle periods; keep the timeout conservative.
staleTimeoutMs: 180_000
}
)
}
const readQueryString = (key: string): string => {
const value = route.query[key]
if (typeof value === 'string') return value
@@ -314,6 +257,7 @@ const syncQueryToRoute = useDebounceFn(async () => {
}, 250)
const overview = ref<OpsDashboardOverview | null>(null)
const metricThresholds = ref<OpsMetricThresholds | null>(null)
const throughputTrend = ref<OpsThroughputTrendResponse | null>(null)
const loadingTrend = ref(false)
@@ -376,6 +320,11 @@ function onTimeRangeChange(v: string | number | boolean | null) {
timeRange.value = v as TimeRange
}
function onSettingsSaved() {
loadThresholds()
fetchData()
}
function onPlatformChange(v: string | number | boolean | null) {
platform.value = typeof v === 'string' ? v : ''
}
@@ -615,31 +564,25 @@ onMounted(async () => {
return
}
if (adminSettingsStore.opsRealtimeMonitoringEnabled) {
startQPSSubscription()
} else {
stopQPSSubscription({ resetMetrics: true })
}
// Load thresholds configuration
loadThresholds()
if (opsEnabled.value) {
await fetchData()
}
})
async function loadThresholds() {
try {
const settings = await opsAPI.getAlertRuntimeSettings()
metricThresholds.value = settings.thresholds || null
} catch (err) {
console.warn('[OpsDashboard] Failed to load thresholds', err)
metricThresholds.value = null
}
}
onUnmounted(() => {
stopQPSSubscription()
abortDashboardFetch()
})
watch(
() => adminSettingsStore.opsRealtimeMonitoringEnabled,
(enabled) => {
if (!opsEnabled.value) return
if (enabled) {
startQPSSubscription()
} else {
stopQPSSubscription({ resetMetrics: true })
}
}
)
</script>

View File

@@ -1,29 +1,28 @@
<script setup lang="ts">
import { computed, onMounted, ref, watch } from 'vue'
import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
import { useIntervalFn } from '@vueuse/core'
import { useI18n } from 'vue-i18n'
import Select from '@/components/common/Select.vue'
import HelpTooltip from '@/components/common/HelpTooltip.vue'
import BaseDialog from '@/components/common/BaseDialog.vue'
import Icon from '@/components/icons/Icon.vue'
import { adminAPI } from '@/api'
import type { OpsDashboardOverview, OpsWSStatus } from '@/api/admin/ops'
import { opsAPI, type OpsDashboardOverview, type OpsMetricThresholds, type OpsRealtimeTrafficSummary } from '@/api/admin/ops'
import type { OpsRequestDetailsPreset } from './OpsRequestDetailsModal.vue'
import { useAdminSettingsStore } from '@/stores'
import { formatNumber } from '@/utils/format'
type RealtimeWindow = '1min' | '5min' | '30min' | '1h'
interface Props {
overview?: OpsDashboardOverview | null
wsStatus: OpsWSStatus
wsReconnectInMs?: number | null
wsHasData?: boolean
realTimeQps: number
realTimeTps: number
platform: string
groupId: number | null
timeRange: string
queryMode: string
loading: boolean
lastUpdated: Date | null
thresholds?: OpsMetricThresholds | null // 阈值配置
}
interface Emits {
@@ -42,12 +41,43 @@ const props = defineProps<Props>()
const emit = defineEmits<Emits>()
const { t } = useI18n()
const adminSettingsStore = useAdminSettingsStore()
const realtimeWindow = ref<RealtimeWindow>('1min')
const overview = computed(() => props.overview ?? null)
const systemMetrics = computed(() => overview.value?.system_metrics ?? null)
const REALTIME_WINDOW_MINUTES: Record<RealtimeWindow, number> = {
'1min': 1,
'5min': 5,
'30min': 30,
'1h': 60
}
const TOOLBAR_RANGE_MINUTES: Record<string, number> = {
'5m': 5,
'30m': 30,
'1h': 60,
'6h': 6 * 60,
'24h': 24 * 60
}
const availableRealtimeWindows = computed(() => {
const toolbarMinutes = TOOLBAR_RANGE_MINUTES[props.timeRange] ?? 60
return (['1min', '5min', '30min', '1h'] as const).filter((w) => REALTIME_WINDOW_MINUTES[w] <= toolbarMinutes)
})
watch(
() => props.timeRange,
() => {
// The realtime window must be inside the toolbar window; reset to keep UX predictable.
realtimeWindow.value = '1min'
// Keep realtime traffic consistent with toolbar changes even when the window is already 1min.
loadRealtimeTrafficSummary()
}
)
// --- Filters ---
const groups = ref<Array<{ id: number; name: string; platform: string }>>([])
@@ -143,56 +173,143 @@ function getLatencyColor(ms: number | null | undefined): string {
return 'text-red-600 dark:text-red-400'
}
// --- Threshold checking helpers ---
function isSLABelowThreshold(slaPercent: number | null): boolean {
if (slaPercent == null) return false
const threshold = props.thresholds?.sla_percent_min
if (threshold == null) return false
return slaPercent < threshold
}
function isLatencyAboveThreshold(latencyP99Ms: number | null): boolean {
if (latencyP99Ms == null) return false
const threshold = props.thresholds?.latency_p99_ms_max
if (threshold == null) return false
return latencyP99Ms > threshold
}
function isTTFTAboveThreshold(ttftP99Ms: number | null): boolean {
if (ttftP99Ms == null) return false
const threshold = props.thresholds?.ttft_p99_ms_max
if (threshold == null) return false
return ttftP99Ms > threshold
}
function isRequestErrorRateAboveThreshold(errorRatePercent: number | null): boolean {
if (errorRatePercent == null) return false
const threshold = props.thresholds?.request_error_rate_percent_max
if (threshold == null) return false
return errorRatePercent > threshold
}
function isUpstreamErrorRateAboveThreshold(upstreamErrorRatePercent: number | null): boolean {
if (upstreamErrorRatePercent == null) return false
const threshold = props.thresholds?.upstream_error_rate_percent_max
if (threshold == null) return false
return upstreamErrorRatePercent > threshold
}
// --- Realtime / Overview labels ---
const totalRequestsLabel = computed(() => formatNumber(overview.value?.request_count_total ?? 0))
const totalTokensLabel = computed(() => formatNumber(overview.value?.token_consumed ?? 0))
const realtimeTrafficSummary = ref<OpsRealtimeTrafficSummary | null>(null)
const realtimeTrafficLoading = ref(false)
function makeZeroRealtimeTrafficSummary(): OpsRealtimeTrafficSummary {
const now = new Date().toISOString()
return {
window: realtimeWindow.value,
start_time: now,
end_time: now,
platform: props.platform,
group_id: props.groupId,
qps: { current: 0, peak: 0, avg: 0 },
tps: { current: 0, peak: 0, avg: 0 }
}
}
async function loadRealtimeTrafficSummary() {
if (realtimeTrafficLoading.value) return
if (!adminSettingsStore.opsRealtimeMonitoringEnabled) {
realtimeTrafficSummary.value = makeZeroRealtimeTrafficSummary()
return
}
realtimeTrafficLoading.value = true
try {
const res = await opsAPI.getRealtimeTrafficSummary(realtimeWindow.value, props.platform, props.groupId)
if (res && res.enabled === false) {
adminSettingsStore.setOpsRealtimeMonitoringEnabledLocal(false)
}
realtimeTrafficSummary.value = res?.summary ?? null
} catch (err) {
console.error('[OpsDashboardHeader] Failed to load realtime traffic summary', err)
realtimeTrafficSummary.value = null
} finally {
realtimeTrafficLoading.value = false
}
}
watch(
() => [realtimeWindow.value, props.platform, props.groupId] as const,
() => {
loadRealtimeTrafficSummary()
},
{ immediate: true }
)
const { pause: pauseRealtimeTrafficRefresh, resume: resumeRealtimeTrafficRefresh } = useIntervalFn(
() => {
loadRealtimeTrafficSummary()
},
5000,
{ immediate: false }
)
watch(
() => adminSettingsStore.opsRealtimeMonitoringEnabled,
(enabled) => {
if (enabled) {
resumeRealtimeTrafficRefresh()
} else {
pauseRealtimeTrafficRefresh()
// Keep UI stable when realtime monitoring is turned off.
realtimeTrafficSummary.value = makeZeroRealtimeTrafficSummary()
}
},
{ immediate: true }
)
onUnmounted(() => {
pauseRealtimeTrafficRefresh()
})
const displayRealTimeQps = computed(() => {
const ov = overview.value
if (!ov) return 0
const useRealtime = props.wsStatus === 'connected' && !!props.wsHasData
const v = useRealtime ? props.realTimeQps : ov.qps?.current
const v = realtimeTrafficSummary.value?.qps?.current
return typeof v === 'number' && Number.isFinite(v) ? v : 0
})
const displayRealTimeTps = computed(() => {
const ov = overview.value
if (!ov) return 0
const useRealtime = props.wsStatus === 'connected' && !!props.wsHasData
const v = useRealtime ? props.realTimeTps : ov.tps?.current
const v = realtimeTrafficSummary.value?.tps?.current
return typeof v === 'number' && Number.isFinite(v) ? v : 0
})
// Sparkline history (keep last 60 data points)
const qpsHistory = ref<number[]>([])
const tpsHistory = ref<number[]>([])
const MAX_HISTORY_POINTS = 60
watch([displayRealTimeQps, displayRealTimeTps], ([newQps, newTps]) => {
// Add new data points
qpsHistory.value.push(newQps)
tpsHistory.value.push(newTps)
// Keep only last N points
if (qpsHistory.value.length > MAX_HISTORY_POINTS) {
qpsHistory.value.shift()
}
if (tpsHistory.value.length > MAX_HISTORY_POINTS) {
tpsHistory.value.shift()
}
const realtimeQpsPeakLabel = computed(() => {
const v = realtimeTrafficSummary.value?.qps?.peak
return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
})
const qpsPeakLabel = computed(() => {
const v = overview.value?.qps?.peak
if (typeof v !== 'number') return '-'
return v.toFixed(1)
const realtimeTpsPeakLabel = computed(() => {
const v = realtimeTrafficSummary.value?.tps?.peak
return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
})
const tpsPeakLabel = computed(() => {
const v = overview.value?.tps?.peak
if (typeof v !== 'number') return '-'
return v.toFixed(1)
const realtimeQpsAvgLabel = computed(() => {
const v = realtimeTrafficSummary.value?.qps?.avg
return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
})
const realtimeTpsAvgLabel = computed(() => {
const v = realtimeTrafficSummary.value?.tps?.avg
return typeof v === 'number' && Number.isFinite(v) ? v.toFixed(1) : '-'
})
const qpsAvgLabel = computed(() => {
@@ -244,7 +361,7 @@ const ttftMaxMs = computed(() => overview.value?.ttft?.max_ms ?? null)
const isSystemIdle = computed(() => {
const ov = overview.value
if (!ov) return true
const qps = props.wsStatus === 'connected' && props.wsHasData ? props.realTimeQps : ov.qps?.current
const qps = ov.qps?.current
const errorRate = ov.error_rate ?? 0
return (qps ?? 0) === 0 && errorRate === 0
})
@@ -687,6 +804,11 @@ const showJobsDetails = ref(false)
function openJobsDetails() {
showJobsDetails.value = true
}
function handleToolbarRefresh() {
loadRealtimeTrafficSummary()
emit('refresh')
}
</script>
<template>
@@ -764,7 +886,7 @@ function openJobsDetails() {
class="flex h-8 w-8 items-center justify-center rounded-lg bg-gray-100 text-gray-500 transition-colors hover:bg-gray-200 dark:bg-dark-700 dark:text-gray-400 dark:hover:bg-dark-600"
:disabled="loading"
:title="t('common.refresh')"
@click="emit('refresh')"
@click="handleToolbarRefresh"
>
<svg class="h-4 w-4" :class="{ 'animate-spin': loading }" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path
@@ -818,8 +940,9 @@ function openJobsDetails() {
class="pointer-events-none absolute left-1/2 top-full z-50 mt-2 w-72 -translate-x-1/2 opacity-0 transition-opacity duration-200 group-hover:pointer-events-auto group-hover:opacity-100 md:left-full md:top-0 md:ml-2 md:mt-0 md:translate-x-0"
>
<div class="rounded-xl bg-white p-4 shadow-xl ring-1 ring-black/5 dark:bg-gray-800 dark:ring-white/10">
<h4 class="mb-3 border-b border-gray-100 pb-2 text-sm font-bold text-gray-900 dark:border-gray-700 dark:text-white">
🧠 {{ t('admin.ops.diagnosis.title') }}
<h4 class="mb-3 border-b border-gray-100 pb-2 text-sm font-bold text-gray-900 dark:border-gray-700 dark:text-white flex items-center gap-2">
<Icon name="brain" size="sm" class="text-blue-500" />
{{ t('admin.ops.diagnosis.title') }}
</h4>
<div class="space-y-3">
@@ -850,8 +973,9 @@ function openJobsDetails() {
<div class="flex-1">
<div class="text-xs font-semibold text-gray-900 dark:text-white">{{ item.message }}</div>
<div class="mt-0.5 text-[11px] text-gray-500 dark:text-gray-400">{{ item.impact }}</div>
<div v-if="item.action" class="mt-1 text-[11px] text-blue-600 dark:text-blue-400">
💡 {{ item.action }}
<div v-if="item.action" class="mt-1 text-[11px] text-blue-600 dark:text-blue-400 flex items-center gap-1">
<Icon name="lightbulb" size="xs" />
{{ item.action }}
</div>
</div>
</div>
@@ -928,7 +1052,7 @@ function openJobsDetails() {
<!-- Time Window Selector -->
<div class="flex flex-wrap gap-1">
<button
v-for="window in (['1min', '5min', '30min', '1h'] as RealtimeWindow[])"
v-for="window in availableRealtimeWindows"
:key="window"
type="button"
class="rounded px-1.5 py-0.5 text-[9px] font-bold transition-colors sm:px-2 sm:text-[10px]"
@@ -965,11 +1089,11 @@ function openJobsDetails() {
<div class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.peak') }}</div>
<div class="mt-1 space-y-0.5 text-sm font-medium text-gray-600 dark:text-gray-400">
<div class="flex items-baseline gap-1.5">
<span class="font-black text-gray-900 dark:text-white">{{ qpsPeakLabel }}</span>
<span class="font-black text-gray-900 dark:text-white">{{ realtimeQpsPeakLabel }}</span>
<span class="text-xs">QPS</span>
</div>
<div class="flex items-baseline gap-1.5">
<span class="font-black text-gray-900 dark:text-white">{{ tpsPeakLabel }}</span>
<span class="font-black text-gray-900 dark:text-white">{{ realtimeTpsPeakLabel }}</span>
<span class="text-xs">TPS</span>
</div>
</div>
@@ -980,11 +1104,11 @@ function openJobsDetails() {
<div class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.average') }}</div>
<div class="mt-1 space-y-0.5 text-sm font-medium text-gray-600 dark:text-gray-400">
<div class="flex items-baseline gap-1.5">
<span class="font-black text-gray-900 dark:text-white">{{ qpsAvgLabel }}</span>
<span class="font-black text-gray-900 dark:text-white">{{ realtimeQpsAvgLabel }}</span>
<span class="text-xs">QPS</span>
</div>
<div class="flex items-baseline gap-1.5">
<span class="font-black text-gray-900 dark:text-white">{{ tpsAvgLabel }}</span>
<span class="font-black text-gray-900 dark:text-white">{{ realtimeTpsAvgLabel }}</span>
<span class="text-xs">TPS</span>
</div>
</div>
@@ -1024,7 +1148,7 @@ function openJobsDetails() {
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
<div class="flex items-center justify-between">
<div class="flex items-center gap-1">
<span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requests') }}</span>
<span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requestsTitle') }}</span>
<HelpTooltip :content="t('admin.ops.tooltips.totalRequests')" />
</div>
<button
@@ -1061,21 +1185,21 @@ function openJobsDetails() {
<div class="flex items-center gap-2">
<span class="text-[10px] font-bold uppercase text-gray-400">SLA</span>
<HelpTooltip :content="t('admin.ops.tooltips.sla')" />
<span class="h-1.5 w-1.5 rounded-full" :class="(slaPercent ?? 0) >= 99.5 ? 'bg-green-500' : 'bg-yellow-500'"></span>
<span class="h-1.5 w-1.5 rounded-full" :class="isSLABelowThreshold(slaPercent) ? 'bg-red-500' : (slaPercent ?? 0) >= 99.5 ? 'bg-green-500' : 'bg-yellow-500'"></span>
</div>
<button
class="text-[10px] font-bold text-blue-500 hover:underline"
type="button"
@click="openDetails({ title: t('admin.ops.requestDetails.title') })"
@click="openDetails({ title: t('admin.ops.requestDetails.title'), kind: 'error' })"
>
{{ t('admin.ops.requestDetails.details') }}
</button>
</div>
<div class="mt-2 text-3xl font-black text-gray-900 dark:text-white">
<div class="mt-2 text-3xl font-black" :class="isSLABelowThreshold(slaPercent) ? 'text-red-600 dark:text-red-400' : 'text-gray-900 dark:text-white'">
{{ slaPercent == null ? '-' : `${slaPercent.toFixed(3)}%` }}
</div>
<div class="mt-3 h-2 w-full overflow-hidden rounded-full bg-gray-200 dark:bg-dark-700">
<div class="h-full bg-green-500 transition-all" :style="{ width: `${Math.max((slaPercent ?? 0) - 90, 0) * 10}%` }"></div>
<div class="h-full transition-all" :class="isSLABelowThreshold(slaPercent) ? 'bg-red-500' : 'bg-green-500'" :style="{ width: `${Math.max((slaPercent ?? 0) - 90, 0) * 10}%` }"></div>
</div>
<div class="mt-3 text-xs">
<div class="flex justify-between">
@@ -1101,7 +1225,7 @@ function openJobsDetails() {
</button>
</div>
<div class="mt-2 flex items-baseline gap-2">
<div class="text-3xl font-black" :class="getLatencyColor(durationP99Ms)">
<div class="text-3xl font-black" :class="isLatencyAboveThreshold(durationP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(durationP99Ms)">
{{ durationP99Ms ?? '-' }}
</div>
<span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1145,13 +1269,13 @@ function openJobsDetails() {
<button
class="text-[10px] font-bold text-blue-500 hover:underline"
type="button"
@click="openDetails({ title: 'TTFT' })"
@click="openDetails({ title: 'TTFT', sort: 'duration_desc' })"
>
{{ t('admin.ops.requestDetails.details') }}
</button>
</div>
<div class="mt-2 flex items-baseline gap-2">
<div class="text-3xl font-black" :class="getLatencyColor(ttftP99Ms)">
<div class="text-3xl font-black" :class="isTTFTAboveThreshold(ttftP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(ttftP99Ms)">
{{ ttftP99Ms ?? '-' }}
</div>
<span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1196,7 +1320,7 @@ function openJobsDetails() {
{{ t('admin.ops.requestDetails.details') }}
</button>
</div>
<div class="mt-2 text-3xl font-black" :class="(errorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
<div class="mt-2 text-3xl font-black" :class="isRequestErrorRateAboveThreshold(errorRatePercent) ? 'text-red-600 dark:text-red-400' : (errorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
{{ errorRatePercent == null ? '-' : `${errorRatePercent.toFixed(2)}%` }}
</div>
<div class="mt-3 space-y-1 text-xs">
@@ -1222,7 +1346,7 @@ function openJobsDetails() {
{{ t('admin.ops.requestDetails.details') }}
</button>
</div>
<div class="mt-2 text-3xl font-black" :class="(upstreamErrorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
<div class="mt-2 text-3xl font-black" :class="isUpstreamErrorRateAboveThreshold(upstreamErrorRatePercent) ? 'text-red-600 dark:text-red-400' : (upstreamErrorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
{{ upstreamErrorRatePercent == null ? '-' : `${upstreamErrorRatePercent.toFixed(2)}%` }}
</div>
<div class="mt-3 space-y-1 text-xs">

View File

@@ -174,69 +174,75 @@ watch(
<template>
<BaseDialog :show="show" :title="modalTitle" width="full" @close="close">
<!-- Filters -->
<div class="border-b border-gray-200 pb-4 mb-4 dark:border-dark-700">
<div class="grid grid-cols-1 gap-4 lg:grid-cols-12">
<div class="lg:col-span-5">
<div class="relative group">
<div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3.5">
<svg
class="h-4 w-4 text-gray-400 transition-colors group-focus-within:text-blue-500"
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
>
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
<div class="flex h-full min-h-0 flex-col">
<!-- Filters -->
<div class="mb-4 flex-shrink-0 border-b border-gray-200 pb-4 dark:border-dark-700">
<div class="grid grid-cols-1 gap-4 lg:grid-cols-12">
<div class="lg:col-span-5">
<div class="relative group">
<div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3.5">
<svg
class="h-4 w-4 text-gray-400 transition-colors group-focus-within:text-blue-500"
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
>
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2.5" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z" />
</svg>
</div>
<input
v-model="q"
type="text"
class="w-full rounded-2xl border-gray-200 bg-gray-50/50 py-2 pl-10 pr-4 text-sm font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-4 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
:placeholder="t('admin.ops.errorDetails.searchPlaceholder')"
/>
</div>
</div>
<div class="lg:col-span-2">
<Select :model-value="statusCode" :options="statusCodeSelectOptions" class="w-full" @update:model-value="statusCode = $event as any" />
</div>
<div class="lg:col-span-2">
<Select :model-value="phase" :options="phaseSelectOptions" class="w-full" @update:model-value="phase = String($event ?? '')" />
</div>
<div class="lg:col-span-2">
<input
v-model="q"
v-model="accountIdInput"
type="text"
class="w-full rounded-2xl border-gray-200 bg-gray-50/50 py-2 pl-10 pr-4 text-sm font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-4 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
:placeholder="t('admin.ops.errorDetails.searchPlaceholder')"
inputmode="numeric"
class="input w-full text-sm"
:placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
/>
</div>
</div>
<div class="lg:col-span-2">
<Select :model-value="statusCode" :options="statusCodeSelectOptions" class="w-full" @update:model-value="statusCode = $event as any" />
</div>
<div class="lg:col-span-2">
<Select :model-value="phase" :options="phaseSelectOptions" class="w-full" @update:model-value="phase = String($event ?? '')" />
</div>
<div class="lg:col-span-2">
<input
v-model="accountIdInput"
type="text"
inputmode="numeric"
class="input w-full text-sm"
:placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
/>
</div>
<div class="lg:col-span-1 flex items-center justify-end">
<button type="button" class="btn btn-secondary btn-sm" @click="resetFilters">
{{ t('common.reset') }}
</button>
<div class="lg:col-span-1 flex items-center justify-end">
<button type="button" class="btn btn-secondary btn-sm" @click="resetFilters">
{{ t('common.reset') }}
</button>
</div>
</div>
</div>
</div>
<!-- Body -->
<div class="text-xs text-gray-500 dark:text-gray-400 mb-2">
{{ t('admin.ops.errorDetails.total') }} {{ total }}
<!-- Body -->
<div class="flex min-h-0 flex-1 flex-col">
<div class="mb-2 flex-shrink-0 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.ops.errorDetails.total') }} {{ total }}
</div>
<OpsErrorLogTable
class="min-h-0 flex-1"
:rows="rows"
:total="total"
:loading="loading"
:page="page"
:page-size="pageSize"
@openErrorDetail="emit('openErrorDetail', $event)"
@update:page="page = $event"
@update:pageSize="pageSize = $event"
/>
</div>
</div>
<OpsErrorLogTable
:rows="rows"
:total="total"
:loading="loading"
:page="page"
:page-size="pageSize"
@openErrorDetail="emit('openErrorDetail', $event)"
@update:page="page = $event"
@update:pageSize="pageSize = $event"
/>
</BaseDialog>
</template>

View File

@@ -1,176 +1,178 @@
<template>
<div>
<div v-if="loading" class="flex items-center justify-center py-10">
<div class="flex h-full min-h-0 flex-col">
<div v-if="loading" class="flex flex-1 items-center justify-center py-10">
<div class="h-8 w-8 animate-spin rounded-full border-b-2 border-primary-600"></div>
</div>
<div v-else class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
<thead class="sticky top-0 z-10 bg-gray-50/50 dark:bg-dark-800/50">
<tr>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.timeId') }}
</th>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.context') }}
</th>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.status') }}
</th>
<th
scope="col"
class="px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.message') }}
</th>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.latency') }}
</th>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.action') }}
</th>
</tr>
</thead>
<tbody class="divide-y divide-gray-100 dark:divide-dark-700">
<tr v-if="rows.length === 0" class="bg-white dark:bg-dark-900">
<td colspan="6" class="py-16 text-center text-sm text-gray-400 dark:text-dark-500">
{{ t('admin.ops.errorLog.noErrors') }}
</td>
</tr>
<div v-else class="flex min-h-0 flex-1 flex-col">
<div class="min-h-0 flex-1 overflow-auto">
<table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
<thead class="sticky top-0 z-10 bg-gray-50/50 dark:bg-dark-800/50">
<tr>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.timeId') }}
</th>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.context') }}
</th>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.status') }}
</th>
<th
scope="col"
class="px-6 py-4 text-left text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.message') }}
</th>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.latency') }}
</th>
<th
scope="col"
class="whitespace-nowrap px-6 py-4 text-right text-xs font-bold uppercase tracking-wider text-gray-500 dark:text-dark-400"
>
{{ t('admin.ops.errorLog.action') }}
</th>
</tr>
</thead>
<tbody class="divide-y divide-gray-100 dark:divide-dark-700">
<tr v-if="rows.length === 0" class="bg-white dark:bg-dark-900">
<td colspan="6" class="py-16 text-center text-sm text-gray-400 dark:text-dark-500">
{{ t('admin.ops.errorLog.noErrors') }}
</td>
</tr>
<tr
v-for="log in rows"
:key="log.id"
class="group cursor-pointer transition-all duration-200 hover:bg-gray-50/80 focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2 dark:hover:bg-dark-800/50 dark:focus:ring-offset-dark-900"
tabindex="0"
role="button"
@click="emit('openErrorDetail', log.id)"
@keydown.enter.prevent="emit('openErrorDetail', log.id)"
@keydown.space.prevent="emit('openErrorDetail', log.id)"
>
<!-- Time & ID -->
<td class="px-6 py-4">
<div class="flex flex-col gap-0.5">
<span class="font-mono text-xs font-bold text-gray-900 dark:text-gray-200">
{{ formatDateTime(log.created_at).split(' ')[1] }}
</span>
<span
class="font-mono text-[10px] text-gray-400 transition-colors group-hover:text-primary-600 dark:group-hover:text-primary-400"
:title="log.request_id || log.client_request_id"
>
{{ (log.request_id || log.client_request_id || '').substring(0, 12) }}
</span>
</div>
</td>
<tr
v-for="log in rows"
:key="log.id"
class="group cursor-pointer transition-all duration-200 hover:bg-gray-50/80 focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2 dark:hover:bg-dark-800/50 dark:focus:ring-offset-dark-900"
tabindex="0"
role="button"
@click="emit('openErrorDetail', log.id)"
@keydown.enter.prevent="emit('openErrorDetail', log.id)"
@keydown.space.prevent="emit('openErrorDetail', log.id)"
>
<!-- Time & ID -->
<td class="px-6 py-4">
<div class="flex flex-col gap-0.5">
<span class="font-mono text-xs font-bold text-gray-900 dark:text-gray-200">
{{ formatDateTime(log.created_at).split(' ')[1] }}
</span>
<span
class="font-mono text-[10px] text-gray-400 transition-colors group-hover:text-primary-600 dark:group-hover:text-primary-400"
:title="log.request_id || log.client_request_id"
>
{{ (log.request_id || log.client_request_id || '').substring(0, 12) }}
</span>
</div>
</td>
<!-- Context (Platform/Model) -->
<td class="px-6 py-4">
<div class="flex flex-col items-start gap-1.5">
<span
class="inline-flex items-center rounded-md bg-gray-100 px-2 py-0.5 text-[10px] font-bold uppercase tracking-tight text-gray-600 dark:bg-dark-700 dark:text-gray-300"
>
{{ log.platform || '-' }}
</span>
<span
v-if="log.model"
class="max-w-[160px] truncate font-mono text-[10px] text-gray-500 dark:text-dark-400"
:title="log.model"
>
{{ log.model }}
</span>
<div
v-if="log.group_id || log.account_id"
class="flex flex-wrap items-center gap-2 font-mono text-[10px] font-semibold text-gray-400 dark:text-dark-500"
>
<span v-if="log.group_id">{{ t('admin.ops.errorLog.grp') }} {{ log.group_id }}</span>
<span v-if="log.account_id">{{ t('admin.ops.errorLog.acc') }} {{ log.account_id }}</span>
</div>
</div>
</td>
<!-- Status & Severity -->
<td class="px-6 py-4">
<div class="flex flex-wrap items-center gap-2">
<span
:class="[
'inline-flex items-center rounded-lg px-2 py-1 text-xs font-black ring-1 ring-inset shadow-sm',
getStatusClass(log.status_code)
]"
>
{{ log.status_code }}
</span>
<span
v-if="log.severity"
:class="['rounded-md px-2 py-0.5 text-[10px] font-black shadow-sm', getSeverityClass(log.severity)]"
>
{{ log.severity }}
</span>
</div>
</td>
<!-- Message -->
<td class="px-6 py-4">
<div class="max-w-md lg:max-w-2xl">
<p class="truncate text-xs font-semibold text-gray-700 dark:text-gray-300" :title="log.message">
{{ formatSmartMessage(log.message) || '-' }}
</p>
<div class="mt-1.5 flex flex-wrap gap-x-3 gap-y-1">
<div v-if="log.phase" class="flex items-center gap-1">
<span class="h-1 w-1 rounded-full bg-gray-300"></span>
<span class="text-[9px] font-black uppercase tracking-tighter text-gray-400">{{ log.phase }}</span>
</div>
<div v-if="log.client_ip" class="flex items-center gap-1">
<span class="h-1 w-1 rounded-full bg-gray-300"></span>
<span class="text-[9px] font-mono font-bold text-gray-400">{{ log.client_ip }}</span>
<!-- Context (Platform/Model) -->
<td class="px-6 py-4">
<div class="flex flex-col items-start gap-1.5">
<span
class="inline-flex items-center rounded-md bg-gray-100 px-2 py-0.5 text-[10px] font-bold uppercase tracking-tight text-gray-600 dark:bg-dark-700 dark:text-gray-300"
>
{{ log.platform || '-' }}
</span>
<span
v-if="log.model"
class="max-w-[160px] truncate font-mono text-[10px] text-gray-500 dark:text-dark-400"
:title="log.model"
>
{{ log.model }}
</span>
<div
v-if="log.group_id || log.account_id"
class="flex flex-wrap items-center gap-2 font-mono text-[10px] font-semibold text-gray-400 dark:text-dark-500"
>
<span v-if="log.group_id">{{ t('admin.ops.errorLog.grp') }} {{ log.group_id }}</span>
<span v-if="log.account_id">{{ t('admin.ops.errorLog.acc') }} {{ log.account_id }}</span>
</div>
</div>
</div>
</td>
</td>
<!-- Latency -->
<td class="px-6 py-4 text-right">
<div class="flex flex-col items-end">
<span class="font-mono text-xs font-black" :class="getLatencyClass(log.latency_ms ?? null)">
{{ log.latency_ms != null ? Math.round(log.latency_ms) + 'ms' : '--' }}
</span>
</div>
</td>
<!-- Status & Severity -->
<td class="px-6 py-4">
<div class="flex flex-wrap items-center gap-2">
<span
:class="[
'inline-flex items-center rounded-lg px-2 py-1 text-xs font-black ring-1 ring-inset shadow-sm',
getStatusClass(log.status_code)
]"
>
{{ log.status_code }}
</span>
<span
v-if="log.severity"
:class="['rounded-md px-2 py-0.5 text-[10px] font-black shadow-sm', getSeverityClass(log.severity)]"
>
{{ log.severity }}
</span>
</div>
</td>
<!-- Actions -->
<td class="px-6 py-4 text-right" @click.stop>
<button type="button" class="btn btn-secondary btn-sm" @click="emit('openErrorDetail', log.id)">
{{ t('admin.ops.errorLog.details') }}
</button>
</td>
</tr>
</tbody>
</table>
<!-- Message -->
<td class="px-6 py-4">
<div class="max-w-md lg:max-w-2xl">
<p class="truncate text-xs font-semibold text-gray-700 dark:text-gray-300" :title="log.message">
{{ formatSmartMessage(log.message) || '-' }}
</p>
<div class="mt-1.5 flex flex-wrap gap-x-3 gap-y-1">
<div v-if="log.phase" class="flex items-center gap-1">
<span class="h-1 w-1 rounded-full bg-gray-300"></span>
<span class="text-[9px] font-black uppercase tracking-tighter text-gray-400">{{ log.phase }}</span>
</div>
<div v-if="log.client_ip" class="flex items-center gap-1">
<span class="h-1 w-1 rounded-full bg-gray-300"></span>
<span class="text-[9px] font-mono font-bold text-gray-400">{{ log.client_ip }}</span>
</div>
</div>
</div>
</td>
<!-- Latency -->
<td class="px-6 py-4 text-right">
<div class="flex flex-col items-end">
<span class="font-mono text-xs font-black" :class="getLatencyClass(log.latency_ms ?? null)">
{{ log.latency_ms != null ? Math.round(log.latency_ms) + 'ms' : '--' }}
</span>
</div>
</td>
<!-- Actions -->
<td class="px-6 py-4 text-right" @click.stop>
<button type="button" class="btn btn-secondary btn-sm" @click="emit('openErrorDetail', log.id)">
{{ t('admin.ops.errorLog.details') }}
</button>
</td>
</tr>
</tbody>
</table>
</div>
<Pagination
v-if="total > 0"
:total="total"
:page="page"
:page-size="pageSize"
:page-size-options="[10, 20, 50, 100, 200, 500]"
@update:page="emit('update:page', $event)"
@update:pageSize="emit('update:pageSize', $event)"
/>
</div>
<Pagination
v-if="total > 0"
:total="total"
:page="page"
:page-size="pageSize"
:page-size-options="[10, 20, 50, 100, 200, 500]"
@update:page="emit('update:page', $event)"
@update:pageSize="emit('update:pageSize', $event)"
/>
</div>
</template>

View File

@@ -95,6 +95,7 @@ watch(
(open) => {
if (open) {
page.value = 1
pageSize.value = 20
fetchData()
}
}
@@ -150,45 +151,46 @@ const kindBadgeClass = (kind: string) => {
<template>
<BaseDialog :show="modelValue" :title="props.preset.title || t('admin.ops.requestDetails.title')" width="full" @close="close">
<template #default>
<div class="flex items-center justify-between mb-4">
<div class="text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.ops.requestDetails.rangeLabel', { range: rangeLabel }) }}
</div>
<button
type="button"
class="btn btn-secondary btn-sm"
@click="fetchData"
>
{{ t('common.refresh') }}
</button>
</div>
<!-- Loading -->
<div v-if="loading" class="flex items-center justify-center py-16">
<div class="flex flex-col items-center gap-3">
<svg class="h-8 w-8 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path
class="opacity-75"
fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
></path>
</svg>
<span class="text-sm font-medium text-gray-500 dark:text-gray-400">{{ t('common.loading') }}</span>
</div>
</div>
<!-- Table -->
<div v-else>
<div v-if="items.length === 0" class="rounded-xl border border-dashed border-gray-200 p-10 text-center dark:border-dark-700">
<div class="text-sm font-medium text-gray-600 dark:text-gray-300">{{ t('admin.ops.requestDetails.empty') }}</div>
<div class="mt-1 text-xs text-gray-400">{{ t('admin.ops.requestDetails.emptyHint') }}</div>
<div class="flex h-full min-h-0 flex-col">
<div class="mb-4 flex flex-shrink-0 items-center justify-between">
<div class="text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.ops.requestDetails.rangeLabel', { range: rangeLabel }) }}
</div>
<button
type="button"
class="btn btn-secondary btn-sm"
@click="fetchData"
>
{{ t('common.refresh') }}
</button>
</div>
<div v-else class="overflow-hidden rounded-xl border border-gray-200 dark:border-dark-700">
<div class="overflow-x-auto">
<table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
<thead class="bg-gray-50 dark:bg-dark-900">
<!-- Loading -->
<div v-if="loading" class="flex flex-1 items-center justify-center py-16">
<div class="flex flex-col items-center gap-3">
<svg class="h-8 w-8 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
<path
class="opacity-75"
fill="currentColor"
d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"
></path>
</svg>
<span class="text-sm font-medium text-gray-500 dark:text-gray-400">{{ t('common.loading') }}</span>
</div>
</div>
<!-- Table -->
<div v-else class="flex min-h-0 flex-1 flex-col">
<div v-if="items.length === 0" class="rounded-xl border border-dashed border-gray-200 p-10 text-center dark:border-dark-700">
<div class="text-sm font-medium text-gray-600 dark:text-gray-300">{{ t('admin.ops.requestDetails.empty') }}</div>
<div class="mt-1 text-xs text-gray-400">{{ t('admin.ops.requestDetails.emptyHint') }}</div>
</div>
<div v-else class="flex min-h-0 flex-1 flex-col overflow-hidden rounded-xl border border-gray-200 dark:border-dark-700">
<div class="min-h-0 flex-1 overflow-auto">
<table class="min-w-full divide-y divide-gray-200 dark:divide-dark-700">
<thead class="sticky top-0 z-10 bg-gray-50 dark:bg-dark-900">
<tr>
<th class="px-4 py-3 text-left text-[11px] font-bold uppercase tracking-wider text-gray-500 dark:text-gray-400">
{{ t('admin.ops.requestDetails.table.time') }}
@@ -265,15 +267,16 @@ const kindBadgeClass = (kind: string) => {
</tr>
</tbody>
</table>
</div>
</div>
<Pagination
:total="total"
:page="page"
:page-size="pageSize"
@update:page="handlePageChange"
@update:pageSize="handlePageSizeChange"
/>
<Pagination
:total="total"
:page="page"
:page-size="pageSize"
@update:page="handlePageChange"
@update:pageSize="handlePageSizeChange"
/>
</div>
</div>
</div>
</template>

View File

@@ -45,6 +45,36 @@ function validateRuntimeSettings(settings: OpsAlertRuntimeSettings): ValidationR
errors.push(t('admin.ops.runtime.validation.evalIntervalRange'))
}
// Thresholds validation
const thresholds = settings.thresholds
if (thresholds) {
if (thresholds.sla_percent_min != null) {
if (!Number.isFinite(thresholds.sla_percent_min) || thresholds.sla_percent_min < 0 || thresholds.sla_percent_min > 100) {
errors.push('SLA 最低值必须在 0-100 之间')
}
}
if (thresholds.latency_p99_ms_max != null) {
if (!Number.isFinite(thresholds.latency_p99_ms_max) || thresholds.latency_p99_ms_max < 0) {
errors.push('延迟 P99 最大值必须大于或等于 0')
}
}
if (thresholds.ttft_p99_ms_max != null) {
if (!Number.isFinite(thresholds.ttft_p99_ms_max) || thresholds.ttft_p99_ms_max < 0) {
errors.push('TTFT P99 最大值必须大于或等于 0')
}
}
if (thresholds.request_error_rate_percent_max != null) {
if (!Number.isFinite(thresholds.request_error_rate_percent_max) || thresholds.request_error_rate_percent_max < 0 || thresholds.request_error_rate_percent_max > 100) {
errors.push('请求错误率最大值必须在 0-100 之间')
}
}
if (thresholds.upstream_error_rate_percent_max != null) {
if (!Number.isFinite(thresholds.upstream_error_rate_percent_max) || thresholds.upstream_error_rate_percent_max < 0 || thresholds.upstream_error_rate_percent_max > 100) {
errors.push('上游错误率最大值必须在 0-100 之间')
}
}
}
const lock = settings.distributed_lock
if (lock?.enabled) {
if (!lock.key || lock.key.trim().length < 3) {
@@ -130,6 +160,15 @@ function openAlertEditor() {
if (!Array.isArray(draftAlert.value.silencing.entries)) {
draftAlert.value.silencing.entries = []
}
if (!draftAlert.value.thresholds) {
draftAlert.value.thresholds = {
sla_percent_min: 99.5,
latency_p99_ms_max: 2000,
ttft_p99_ms_max: 500,
request_error_rate_percent_max: 5,
upstream_error_rate_percent_max: 5
}
}
}
showAlertEditor.value = true
@@ -295,6 +334,81 @@ onMounted(() => {
<p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.runtime.evalIntervalHint') }}</p>
</div>
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-700/50">
<div class="mb-2 text-sm font-semibold text-gray-900 dark:text-white">指标阈值配置</div>
<p class="mb-4 text-xs text-gray-500 dark:text-gray-400">配置各项指标的告警阈值超出阈值的指标将在看板上以红色显示</p>
<div class="grid grid-cols-1 gap-4 md:grid-cols-2">
<div>
<div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">SLA 最低值 (%)</div>
<input
v-model.number="draftAlert.thresholds.sla_percent_min"
type="number"
min="0"
max="100"
step="0.1"
class="input"
placeholder="99.5"
/>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">SLA 低于此值时将显示为红色</p>
</div>
<div>
<div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">延迟 P99 最大值 (ms)</div>
<input
v-model.number="draftAlert.thresholds.latency_p99_ms_max"
type="number"
min="0"
step="100"
class="input"
placeholder="2000"
/>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">延迟 P99 高于此值时将显示为红色</p>
</div>
<div>
<div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">TTFT P99 最大值 (ms)</div>
<input
v-model.number="draftAlert.thresholds.ttft_p99_ms_max"
type="number"
min="0"
step="100"
class="input"
placeholder="500"
/>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">TTFT P99 高于此值时将显示为红色</p>
</div>
<div>
<div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">请求错误率最大值 (%)</div>
<input
v-model.number="draftAlert.thresholds.request_error_rate_percent_max"
type="number"
min="0"
max="100"
step="0.1"
class="input"
placeholder="5"
/>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">请求错误率高于此值时将显示为红色</p>
</div>
<div>
<div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">上游错误率最大值 (%)</div>
<input
v-model.number="draftAlert.thresholds.upstream_error_rate_percent_max"
type="number"
min="0"
max="100"
step="0.1"
class="input"
placeholder="5"
/>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">上游错误率高于此值时将显示为红色</p>
</div>
</div>
</div>
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-700/50">
<div class="mb-2 text-sm font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.runtime.silencing.title') }}</div>

View File

@@ -6,7 +6,7 @@ import { opsAPI } from '@/api/admin/ops'
import BaseDialog from '@/components/common/BaseDialog.vue'
import Select from '@/components/common/Select.vue'
import Toggle from '@/components/common/Toggle.vue'
import type { OpsAlertRuntimeSettings, EmailNotificationConfig, AlertSeverity, OpsAdvancedSettings } from '../types'
import type { OpsAlertRuntimeSettings, EmailNotificationConfig, AlertSeverity, OpsAdvancedSettings, OpsMetricThresholds } from '../types'
const { t } = useI18n()
const appStore = useAppStore()
@@ -29,19 +29,38 @@ const runtimeSettings = ref<OpsAlertRuntimeSettings | null>(null)
const emailConfig = ref<EmailNotificationConfig | null>(null)
// 高级设置
const advancedSettings = ref<OpsAdvancedSettings | null>(null)
// 指标阈值配置
const metricThresholds = ref<OpsMetricThresholds>({
sla_percent_min: 99.5,
latency_p99_ms_max: 2000,
ttft_p99_ms_max: 500,
request_error_rate_percent_max: 5,
upstream_error_rate_percent_max: 5
})
// 加载所有配置
async function loadAllSettings() {
loading.value = true
try {
const [runtime, email, advanced] = await Promise.all([
const [runtime, email, advanced, thresholds] = await Promise.all([
opsAPI.getAlertRuntimeSettings(),
opsAPI.getEmailNotificationConfig(),
opsAPI.getAdvancedSettings()
opsAPI.getAdvancedSettings(),
opsAPI.getMetricThresholds()
])
runtimeSettings.value = runtime
emailConfig.value = email
advancedSettings.value = advanced
// 如果后端返回了阈值,使用后端的值;否则保持默认值
if (thresholds && Object.keys(thresholds).length > 0) {
metricThresholds.value = {
sla_percent_min: thresholds.sla_percent_min ?? 99.5,
latency_p99_ms_max: thresholds.latency_p99_ms_max ?? 2000,
ttft_p99_ms_max: thresholds.ttft_p99_ms_max ?? 500,
request_error_rate_percent_max: thresholds.request_error_rate_percent_max ?? 5,
upstream_error_rate_percent_max: thresholds.upstream_error_rate_percent_max ?? 5
}
}
} catch (err: any) {
console.error('[OpsSettingsDialog] Failed to load settings', err)
appStore.showError(err?.response?.data?.detail || t('admin.ops.settings.loadFailed'))
@@ -138,6 +157,23 @@ const validation = computed(() => {
}
}
// 验证指标阈值
if (metricThresholds.value.sla_percent_min != null && (metricThresholds.value.sla_percent_min < 0 || metricThresholds.value.sla_percent_min > 100)) {
errors.push('SLA最低百分比必须在0-100之间')
}
if (metricThresholds.value.latency_p99_ms_max != null && metricThresholds.value.latency_p99_ms_max < 0) {
errors.push('延迟P99最大值必须大于等于0')
}
if (metricThresholds.value.ttft_p99_ms_max != null && metricThresholds.value.ttft_p99_ms_max < 0) {
errors.push('TTFT P99最大值必须大于等于0')
}
if (metricThresholds.value.request_error_rate_percent_max != null && (metricThresholds.value.request_error_rate_percent_max < 0 || metricThresholds.value.request_error_rate_percent_max > 100)) {
errors.push('请求错误率最大值必须在0-100之间')
}
if (metricThresholds.value.upstream_error_rate_percent_max != null && (metricThresholds.value.upstream_error_rate_percent_max < 0 || metricThresholds.value.upstream_error_rate_percent_max > 100)) {
errors.push('上游错误率最大值必须在0-100之间')
}
return { valid: errors.length === 0, errors }
})
@@ -153,14 +189,15 @@ async function saveAllSettings() {
await Promise.all([
runtimeSettings.value ? opsAPI.updateAlertRuntimeSettings(runtimeSettings.value) : Promise.resolve(),
emailConfig.value ? opsAPI.updateEmailNotificationConfig(emailConfig.value) : Promise.resolve(),
advancedSettings.value ? opsAPI.updateAdvancedSettings(advancedSettings.value) : Promise.resolve()
advancedSettings.value ? opsAPI.updateAdvancedSettings(advancedSettings.value) : Promise.resolve(),
opsAPI.updateMetricThresholds(metricThresholds.value)
])
appStore.showSuccess(t('admin.ops.settings.saveSuccess'))
emit('saved')
emit('close')
} catch (err: any) {
console.error('[OpsSettingsDialog] Failed to save settings', err)
appStore.showError(err?.response?.data?.detail || t('admin.ops.settings.saveFailed'))
appStore.showError(err?.response?.data?.message || err?.response?.data?.detail || t('admin.ops.settings.saveFailed'))
} finally {
saving.value = false
}
@@ -306,6 +343,77 @@ async function saveAllSettings() {
</div>
</div>
<!-- 指标阈值配置 -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-700/50">
<h4 class="mb-3 text-sm font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.settings.metricThresholds') }}</h4>
<p class="mb-4 text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.settings.metricThresholdsHint') }}</p>
<div class="space-y-4">
<div>
<label class="input-label">{{ t('admin.ops.settings.slaMinPercent') }}</label>
<input
v-model.number="metricThresholds.sla_percent_min"
type="number"
min="0"
max="100"
step="0.1"
class="input"
/>
<p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.slaMinPercentHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.ops.settings.latencyP99MaxMs') }}</label>
<input
v-model.number="metricThresholds.latency_p99_ms_max"
type="number"
min="0"
step="100"
class="input"
/>
<p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.latencyP99MaxMsHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.ops.settings.ttftP99MaxMs') }}</label>
<input
v-model.number="metricThresholds.ttft_p99_ms_max"
type="number"
min="0"
step="50"
class="input"
/>
<p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.ttftP99MaxMsHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.ops.settings.requestErrorRateMaxPercent') }}</label>
<input
v-model.number="metricThresholds.request_error_rate_percent_max"
type="number"
min="0"
max="100"
step="0.1"
class="input"
/>
<p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.requestErrorRateMaxPercentHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.ops.settings.upstreamErrorRateMaxPercent') }}</label>
<input
v-model.number="metricThresholds.upstream_error_rate_percent_max"
type="number"
min="0"
max="100"
step="0.1"
class="input"
/>
<p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.upstreamErrorRateMaxPercentHint') }}</p>
</div>
</div>
</div>
<!-- 高级设置 -->
<details class="rounded-2xl bg-gray-50 dark:bg-dark-700/50">
<summary class="cursor-pointer p-4 text-sm font-semibold text-gray-900 dark:text-white">

View File

@@ -14,6 +14,7 @@ export type {
EmailNotificationConfig,
OpsDistributedLockSettings,
OpsAlertRuntimeSettings,
OpsMetricThresholds,
OpsAdvancedSettings,
OpsDataRetentionSettings,
OpsAggregationSettings