Merge pull request #285 from IanShaw027/fix/ops-bug

feat(ops): 增强错误日志管理、告警静默和前端 UI 优化
This commit is contained in:
Wesley Liddick
2026-01-15 11:26:16 +08:00
committed by GitHub
42 changed files with 4039 additions and 1065 deletions

View File

@@ -129,6 +129,8 @@ export default {
all: 'All',
none: 'None',
noData: 'No data',
expand: 'Expand',
collapse: 'Collapse',
success: 'Success',
error: 'Error',
critical: 'Critical',
@@ -150,12 +152,13 @@ export default {
invalidEmail: 'Please enter a valid email address',
optional: 'optional',
selectOption: 'Select an option',
searchPlaceholder: 'Search...',
noOptionsFound: 'No options found',
noGroupsAvailable: 'No groups available',
unknownError: 'Unknown error occurred',
saving: 'Saving...',
selectedCount: '({count} selected)', refresh: 'Refresh',
searchPlaceholder: 'Search...',
noOptionsFound: 'No options found',
noGroupsAvailable: 'No groups available',
unknownError: 'Unknown error occurred',
saving: 'Saving...',
selectedCount: '({count} selected)',
refresh: 'Refresh',
settings: 'Settings',
notAvailable: 'N/A',
now: 'Now',
@@ -1882,10 +1885,8 @@ export default {
noSystemMetrics: 'No system metrics collected yet.',
collectedAt: 'Collected at:',
window: 'window',
cpu: 'CPU',
memory: 'Memory',
db: 'DB',
redis: 'Redis',
goroutines: 'Goroutines',
jobs: 'Jobs',
jobsHelp: 'Click “Details” to view job heartbeats and recent errors',
@@ -1911,7 +1912,7 @@ export default {
totalRequests: 'Total Requests',
avgQps: 'Avg QPS',
avgTps: 'Avg TPS',
avgLatency: 'Avg Latency',
avgLatency: 'Avg Request Duration',
avgTtft: 'Avg TTFT',
exceptions: 'Exceptions',
requestErrors: 'Request Errors',
@@ -1923,7 +1924,7 @@ export default {
errors: 'Errors',
errorRate: 'error_rate:',
upstreamRate: 'upstream_rate:',
latencyDuration: 'Latency (duration_ms)',
latencyDuration: 'Request Duration (ms)',
ttftLabel: 'TTFT (first_token_ms)',
p50: 'p50:',
p90: 'p90:',
@@ -1931,7 +1932,6 @@ export default {
p99: 'p99:',
avg: 'avg:',
max: 'max:',
qps: 'QPS',
requests: 'Requests',
requestsTitle: 'Requests',
upstream: 'Upstream',
@@ -1943,7 +1943,7 @@ export default {
failedToLoadData: 'Failed to load ops data.',
failedToLoadOverview: 'Failed to load overview',
failedToLoadThroughputTrend: 'Failed to load throughput trend',
failedToLoadLatencyHistogram: 'Failed to load latency histogram',
failedToLoadLatencyHistogram: 'Failed to load request duration histogram',
failedToLoadErrorTrend: 'Failed to load error trend',
failedToLoadErrorDistribution: 'Failed to load error distribution',
failedToLoadErrorDetail: 'Failed to load error detail',
@@ -1951,7 +1951,7 @@ export default {
tpsK: 'TPS (K)',
top: 'Top:',
throughputTrend: 'Throughput Trend',
latencyHistogram: 'Latency Histogram',
latencyHistogram: 'Request Duration Histogram',
errorTrend: 'Error Trend',
errorDistribution: 'Error Distribution',
// Health Score & Diagnosis
@@ -1966,7 +1966,9 @@ export default {
'30m': 'Last 30 minutes',
'1h': 'Last 1 hour',
'6h': 'Last 6 hours',
'24h': 'Last 24 hours'
'24h': 'Last 24 hours',
'7d': 'Last 7 days',
'30d': 'Last 30 days'
},
fullscreen: {
enter: 'Enter Fullscreen'
@@ -1995,14 +1997,7 @@ export default {
memoryHigh: 'Memory usage elevated ({usage}%)',
memoryHighImpact: 'Memory pressure is high, needs attention',
memoryHighAction: 'Monitor memory trends, check for memory leaks',
// Latency diagnostics
latencyCritical: 'Response latency critically high ({latency}ms)',
latencyCriticalImpact: 'User experience extremely poor, many requests timing out',
latencyCriticalAction: 'Check slow queries, database indexes, network latency, and upstream services',
latencyHigh: 'Response latency elevated ({latency}ms)',
latencyHighImpact: 'User experience degraded, needs optimization',
latencyHighAction: 'Analyze slow request logs, optimize database queries and business logic',
ttftHigh: 'Time to first byte elevated ({ttft}ms)',
ttftHigh: 'Time to first token elevated ({ttft}ms)',
ttftHighImpact: 'User perceived latency increased',
ttftHighAction: 'Optimize request processing flow, reduce pre-processing time',
// Error rate diagnostics
@@ -2038,27 +2033,106 @@ export default {
// Error Log
errorLog: {
timeId: 'Time / ID',
commonErrors: {
contextDeadlineExceeded: 'context deadline exceeded',
connectionRefused: 'connection refused',
rateLimit: 'rate limit'
},
time: 'Time',
type: 'Type',
context: 'Context',
platform: 'Platform',
model: 'Model',
group: 'Group',
user: 'User',
userId: 'User ID',
account: 'Account',
accountId: 'Account ID',
status: 'Status',
message: 'Message',
latency: 'Latency',
latency: 'Request Duration',
action: 'Action',
noErrors: 'No errors in this window.',
grp: 'GRP:',
acc: 'ACC:',
details: 'Details',
phase: 'Phase'
phase: 'Phase',
id: 'ID:',
typeUpstream: 'Upstream',
typeRequest: 'Request',
typeAuth: 'Auth',
typeRouting: 'Routing',
typeInternal: 'Internal'
},
// Error Details Modal
errorDetails: {
upstreamErrors: 'Upstream Errors',
requestErrors: 'Request Errors',
unresolved: 'Unresolved',
resolved: 'Resolved',
viewErrors: 'Errors',
viewExcluded: 'Excluded',
statusCodeOther: 'Other',
owner: {
provider: 'Provider',
client: 'Client',
platform: 'Platform'
},
phase: {
request: 'Request',
auth: 'Auth',
routing: 'Routing',
upstream: 'Upstream',
network: 'Network',
internal: 'Internal'
},
total: 'Total:',
searchPlaceholder: 'Search request_id / client_request_id / message',
accountIdPlaceholder: 'account_id'
},
// Error Detail Modal
errorDetail: {
title: 'Error Detail',
titleWithId: 'Error #{id}',
noErrorSelected: 'No error selected.',
resolution: 'Resolved:',
pinnedToOriginalAccountId: 'Pinned to original account_id',
missingUpstreamRequestBody: 'Missing upstream request body',
failedToLoadRetryHistory: 'Failed to load retry history',
failedToUpdateResolvedStatus: 'Failed to update resolved status',
unsupportedRetryMode: 'Unsupported retry mode',
classificationKeys: {
phase: 'Phase',
owner: 'Owner',
source: 'Source',
retryable: 'Retryable',
resolvedAt: 'Resolved At',
resolvedBy: 'Resolved By',
resolvedRetryId: 'Resolved Retry',
retryCount: 'Retry Count'
},
source: {
upstream_http: 'Upstream HTTP'
},
upstreamKeys: {
status: 'Status',
message: 'Message',
detail: 'Detail',
upstreamErrors: 'Upstream Errors'
},
upstreamEvent: {
account: 'Account',
status: 'Status',
requestId: 'Request ID'
},
responsePreview: {
expand: 'Response (click to expand)',
collapse: 'Response (click to collapse)'
},
retryMeta: {
used: 'Used',
success: 'Success',
pinned: 'Pinned'
},
loading: 'Loading…',
requestId: 'Request ID',
time: 'Time',
@@ -2068,8 +2142,10 @@ export default {
basicInfo: 'Basic Info',
platform: 'Platform',
model: 'Model',
latency: 'Latency',
ttft: 'TTFT',
group: 'Group',
user: 'User',
account: 'Account',
latency: 'Request Duration',
businessLimited: 'Business Limited',
requestPath: 'Request Path',
timings: 'Timings',
@@ -2077,6 +2153,8 @@ export default {
routing: 'Routing',
upstream: 'Upstream',
response: 'Response',
classification: 'Classification',
notRetryable: 'Not recommended to retry',
retry: 'Retry',
retryClient: 'Retry (Client)',
retryUpstream: 'Retry (Upstream pinned)',
@@ -2088,7 +2166,6 @@ export default {
confirmRetry: 'Confirm Retry',
retrySuccess: 'Retry succeeded',
retryFailed: 'Retry failed',
na: 'N/A',
retryHint: 'Retry will resend the request with the same parameters',
retryClientHint: 'Use client retry (no account pinning)',
retryUpstreamHint: 'Use upstream pinned retry (pin to the error account)',
@@ -2096,8 +2173,33 @@ export default {
retryNote1: 'Retry will use the same request body and parameters',
retryNote2: 'If the original request failed due to account issues, pinned retry may still fail',
retryNote3: 'Client retry will reselect an account',
retryNote4: 'You can force retry for non-retryable errors, but it is not recommended',
confirmRetryMessage: 'Confirm retry this request?',
confirmRetryHint: 'Will resend with the same request parameters'
confirmRetryHint: 'Will resend with the same request parameters',
forceRetry: 'I understand and want to force retry',
forceRetryHint: 'This error usually cannot be fixed by retry; check to proceed',
forceRetryNeedAck: 'Please check to force retry',
markResolved: 'Mark resolved',
markUnresolved: 'Mark unresolved',
viewRetries: 'Retry history',
retryHistory: 'Retry History',
tabOverview: 'Overview',
tabRetries: 'Retries',
tabRequest: 'Request',
tabResponse: 'Response',
responseBody: 'Response',
compareA: 'Compare A',
compareB: 'Compare B',
retrySummary: 'Retry Summary',
responseHintSucceeded: 'Showing succeeded retry response_preview (#{id})',
responseHintFallback: 'No succeeded retry found; showing stored error_body',
suggestion: 'Suggestion',
suggestUpstreamResolved: '✓ Upstream error resolved by retry; no action needed',
suggestUpstream: 'Upstream instability: check account status, consider switching accounts, or retry',
suggestRequest: 'Client request error: ask customer to fix request parameters',
suggestAuth: 'Auth failed: verify API key/credentials',
suggestPlatform: 'Platform error: prioritize investigation and fix',
suggestGeneric: 'See details for more context'
},
requestDetails: {
title: 'Request Details',
@@ -2133,13 +2235,46 @@ export default {
loading: 'Loading...',
empty: 'No alert events',
loadFailed: 'Failed to load alert events',
status: {
firing: 'FIRING',
resolved: 'RESOLVED',
manualResolved: 'MANUAL RESOLVED'
},
detail: {
title: 'Alert Detail',
loading: 'Loading detail...',
empty: 'No detail',
loadFailed: 'Failed to load alert detail',
manualResolve: 'Mark as Resolved',
manualResolvedSuccess: 'Marked as manually resolved',
manualResolvedFailed: 'Failed to mark as manually resolved',
silence: 'Ignore Alert',
silenceSuccess: 'Alert silenced',
silenceFailed: 'Failed to silence alert',
viewRule: 'View Rule',
viewLogs: 'View Logs',
firedAt: 'Fired At',
resolvedAt: 'Resolved At',
ruleId: 'Rule ID',
dimensions: 'Dimensions',
historyTitle: 'History',
historyHint: 'Recent events with same rule + dimensions',
historyLoading: 'Loading history...',
historyEmpty: 'No history'
},
table: {
time: 'Time',
status: 'Status',
severity: 'Severity',
platform: 'Platform',
ruleId: 'Rule ID',
title: 'Title',
duration: 'Duration',
metric: 'Metric / Threshold',
email: 'Email Sent'
dimensions: 'Dimensions',
email: 'Email Sent',
emailSent: 'Sent',
emailIgnored: 'Ignored'
}
},
alertRules: {
@@ -2253,7 +2388,6 @@ export default {
title: 'Alert Silencing (Maintenance Mode)',
enabled: 'Enable silencing',
globalUntil: 'Silence until (RFC3339)',
untilPlaceholder: '2026-01-05T00:00:00Z',
untilHint: 'Leave empty to only toggle silencing without an expiry (not recommended).',
reason: 'Reason',
reasonPlaceholder: 'e.g., planned maintenance',
@@ -2293,7 +2427,11 @@ export default {
lockKeyRequired: 'Distributed lock key is required when lock is enabled',
lockKeyPrefix: 'Distributed lock key must start with "{prefix}"',
lockKeyHint: 'Recommended: start with "{prefix}" to avoid conflicts',
lockTtlRange: 'Distributed lock TTL must be between 1 and 86400 seconds'
lockTtlRange: 'Distributed lock TTL must be between 1 and 86400 seconds',
slaMinPercentRange: 'SLA minimum percentage must be between 0 and 100',
ttftP99MaxRange: 'TTFT P99 maximum must be a number ≥ 0',
requestErrorRateMaxRange: 'Request error rate maximum must be between 0 and 100',
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100'
}
},
email: {
@@ -2358,8 +2496,6 @@ export default {
metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red',
slaMinPercent: 'SLA Minimum Percentage',
slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)',
latencyP99MaxMs: 'Latency P99 Maximum (ms)',
latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)',
ttftP99MaxMs: 'TTFT P99 Maximum (ms)',
ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)',
requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)',
@@ -2378,9 +2514,28 @@ export default {
aggregation: 'Pre-aggregation Tasks',
enableAggregation: 'Enable Pre-aggregation',
aggregationHint: 'Pre-aggregation improves query performance for long time windows',
errorFiltering: 'Error Filtering',
ignoreCountTokensErrors: 'Ignore count_tokens errors',
ignoreCountTokensErrorsHint: 'When enabled, errors from count_tokens requests will not be written to the error log.',
ignoreContextCanceled: 'Ignore client disconnect errors',
ignoreContextCanceledHint: 'When enabled, client disconnect (context canceled) errors will not be written to the error log.',
ignoreNoAvailableAccounts: 'Ignore no available accounts errors',
ignoreNoAvailableAccountsHint: 'When enabled, "No available accounts" errors will not be written to the error log (not recommended; usually a config issue).',
autoRefresh: 'Auto Refresh',
enableAutoRefresh: 'Enable auto refresh',
enableAutoRefreshHint: 'Automatically refresh dashboard data at a fixed interval.',
refreshInterval: 'Refresh Interval',
refreshInterval15s: '15 seconds',
refreshInterval30s: '30 seconds',
refreshInterval60s: '60 seconds',
autoRefreshCountdown: 'Auto refresh: {seconds}s',
validation: {
title: 'Please fix the following issues',
retentionDaysRange: 'Retention days must be between 1-365 days'
retentionDaysRange: 'Retention days must be between 1-365 days',
slaMinPercentRange: 'SLA minimum percentage must be between 0 and 100',
ttftP99MaxRange: 'TTFT P99 maximum must be a number ≥ 0',
requestErrorRateMaxRange: 'Request error rate maximum must be between 0 and 100',
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100'
}
},
concurrency: {
@@ -2418,7 +2573,7 @@ export default {
tooltips: {
totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.',
throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
latencyHistogram: 'Request duration distribution (ms) for successful requests.',
errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
errorDistribution: 'Error distribution by status code.',
goroutines:
@@ -2433,7 +2588,7 @@ export default {
sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
latency: 'Request duration statistics, including p50, p90, p95, p99 percentiles.',
ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
},