Merge pull request #285 from IanShaw027/fix/ops-bug
feat(ops): 增强错误日志管理、告警静默和前端 UI 优化
This commit is contained in:
@@ -129,6 +129,8 @@ export default {
|
||||
all: 'All',
|
||||
none: 'None',
|
||||
noData: 'No data',
|
||||
expand: 'Expand',
|
||||
collapse: 'Collapse',
|
||||
success: 'Success',
|
||||
error: 'Error',
|
||||
critical: 'Critical',
|
||||
@@ -150,12 +152,13 @@ export default {
|
||||
invalidEmail: 'Please enter a valid email address',
|
||||
optional: 'optional',
|
||||
selectOption: 'Select an option',
|
||||
searchPlaceholder: 'Search...',
|
||||
noOptionsFound: 'No options found',
|
||||
noGroupsAvailable: 'No groups available',
|
||||
unknownError: 'Unknown error occurred',
|
||||
saving: 'Saving...',
|
||||
selectedCount: '({count} selected)', refresh: 'Refresh',
|
||||
searchPlaceholder: 'Search...',
|
||||
noOptionsFound: 'No options found',
|
||||
noGroupsAvailable: 'No groups available',
|
||||
unknownError: 'Unknown error occurred',
|
||||
saving: 'Saving...',
|
||||
selectedCount: '({count} selected)',
|
||||
refresh: 'Refresh',
|
||||
settings: 'Settings',
|
||||
notAvailable: 'N/A',
|
||||
now: 'Now',
|
||||
@@ -1882,10 +1885,8 @@ export default {
|
||||
noSystemMetrics: 'No system metrics collected yet.',
|
||||
collectedAt: 'Collected at:',
|
||||
window: 'window',
|
||||
cpu: 'CPU',
|
||||
memory: 'Memory',
|
||||
db: 'DB',
|
||||
redis: 'Redis',
|
||||
goroutines: 'Goroutines',
|
||||
jobs: 'Jobs',
|
||||
jobsHelp: 'Click “Details” to view job heartbeats and recent errors',
|
||||
@@ -1911,7 +1912,7 @@ export default {
|
||||
totalRequests: 'Total Requests',
|
||||
avgQps: 'Avg QPS',
|
||||
avgTps: 'Avg TPS',
|
||||
avgLatency: 'Avg Latency',
|
||||
avgLatency: 'Avg Request Duration',
|
||||
avgTtft: 'Avg TTFT',
|
||||
exceptions: 'Exceptions',
|
||||
requestErrors: 'Request Errors',
|
||||
@@ -1923,7 +1924,7 @@ export default {
|
||||
errors: 'Errors',
|
||||
errorRate: 'error_rate:',
|
||||
upstreamRate: 'upstream_rate:',
|
||||
latencyDuration: 'Latency (duration_ms)',
|
||||
latencyDuration: 'Request Duration (ms)',
|
||||
ttftLabel: 'TTFT (first_token_ms)',
|
||||
p50: 'p50:',
|
||||
p90: 'p90:',
|
||||
@@ -1931,7 +1932,6 @@ export default {
|
||||
p99: 'p99:',
|
||||
avg: 'avg:',
|
||||
max: 'max:',
|
||||
qps: 'QPS',
|
||||
requests: 'Requests',
|
||||
requestsTitle: 'Requests',
|
||||
upstream: 'Upstream',
|
||||
@@ -1943,7 +1943,7 @@ export default {
|
||||
failedToLoadData: 'Failed to load ops data.',
|
||||
failedToLoadOverview: 'Failed to load overview',
|
||||
failedToLoadThroughputTrend: 'Failed to load throughput trend',
|
||||
failedToLoadLatencyHistogram: 'Failed to load latency histogram',
|
||||
failedToLoadLatencyHistogram: 'Failed to load request duration histogram',
|
||||
failedToLoadErrorTrend: 'Failed to load error trend',
|
||||
failedToLoadErrorDistribution: 'Failed to load error distribution',
|
||||
failedToLoadErrorDetail: 'Failed to load error detail',
|
||||
@@ -1951,7 +1951,7 @@ export default {
|
||||
tpsK: 'TPS (K)',
|
||||
top: 'Top:',
|
||||
throughputTrend: 'Throughput Trend',
|
||||
latencyHistogram: 'Latency Histogram',
|
||||
latencyHistogram: 'Request Duration Histogram',
|
||||
errorTrend: 'Error Trend',
|
||||
errorDistribution: 'Error Distribution',
|
||||
// Health Score & Diagnosis
|
||||
@@ -1966,7 +1966,9 @@ export default {
|
||||
'30m': 'Last 30 minutes',
|
||||
'1h': 'Last 1 hour',
|
||||
'6h': 'Last 6 hours',
|
||||
'24h': 'Last 24 hours'
|
||||
'24h': 'Last 24 hours',
|
||||
'7d': 'Last 7 days',
|
||||
'30d': 'Last 30 days'
|
||||
},
|
||||
fullscreen: {
|
||||
enter: 'Enter Fullscreen'
|
||||
@@ -1995,14 +1997,7 @@ export default {
|
||||
memoryHigh: 'Memory usage elevated ({usage}%)',
|
||||
memoryHighImpact: 'Memory pressure is high, needs attention',
|
||||
memoryHighAction: 'Monitor memory trends, check for memory leaks',
|
||||
// Latency diagnostics
|
||||
latencyCritical: 'Response latency critically high ({latency}ms)',
|
||||
latencyCriticalImpact: 'User experience extremely poor, many requests timing out',
|
||||
latencyCriticalAction: 'Check slow queries, database indexes, network latency, and upstream services',
|
||||
latencyHigh: 'Response latency elevated ({latency}ms)',
|
||||
latencyHighImpact: 'User experience degraded, needs optimization',
|
||||
latencyHighAction: 'Analyze slow request logs, optimize database queries and business logic',
|
||||
ttftHigh: 'Time to first byte elevated ({ttft}ms)',
|
||||
ttftHigh: 'Time to first token elevated ({ttft}ms)',
|
||||
ttftHighImpact: 'User perceived latency increased',
|
||||
ttftHighAction: 'Optimize request processing flow, reduce pre-processing time',
|
||||
// Error rate diagnostics
|
||||
@@ -2038,27 +2033,106 @@ export default {
|
||||
// Error Log
|
||||
errorLog: {
|
||||
timeId: 'Time / ID',
|
||||
commonErrors: {
|
||||
contextDeadlineExceeded: 'context deadline exceeded',
|
||||
connectionRefused: 'connection refused',
|
||||
rateLimit: 'rate limit'
|
||||
},
|
||||
time: 'Time',
|
||||
type: 'Type',
|
||||
context: 'Context',
|
||||
platform: 'Platform',
|
||||
model: 'Model',
|
||||
group: 'Group',
|
||||
user: 'User',
|
||||
userId: 'User ID',
|
||||
account: 'Account',
|
||||
accountId: 'Account ID',
|
||||
status: 'Status',
|
||||
message: 'Message',
|
||||
latency: 'Latency',
|
||||
latency: 'Request Duration',
|
||||
action: 'Action',
|
||||
noErrors: 'No errors in this window.',
|
||||
grp: 'GRP:',
|
||||
acc: 'ACC:',
|
||||
details: 'Details',
|
||||
phase: 'Phase'
|
||||
phase: 'Phase',
|
||||
id: 'ID:',
|
||||
typeUpstream: 'Upstream',
|
||||
typeRequest: 'Request',
|
||||
typeAuth: 'Auth',
|
||||
typeRouting: 'Routing',
|
||||
typeInternal: 'Internal'
|
||||
},
|
||||
// Error Details Modal
|
||||
errorDetails: {
|
||||
upstreamErrors: 'Upstream Errors',
|
||||
requestErrors: 'Request Errors',
|
||||
unresolved: 'Unresolved',
|
||||
resolved: 'Resolved',
|
||||
viewErrors: 'Errors',
|
||||
viewExcluded: 'Excluded',
|
||||
statusCodeOther: 'Other',
|
||||
owner: {
|
||||
provider: 'Provider',
|
||||
client: 'Client',
|
||||
platform: 'Platform'
|
||||
},
|
||||
phase: {
|
||||
request: 'Request',
|
||||
auth: 'Auth',
|
||||
routing: 'Routing',
|
||||
upstream: 'Upstream',
|
||||
network: 'Network',
|
||||
internal: 'Internal'
|
||||
},
|
||||
total: 'Total:',
|
||||
searchPlaceholder: 'Search request_id / client_request_id / message',
|
||||
accountIdPlaceholder: 'account_id'
|
||||
},
|
||||
// Error Detail Modal
|
||||
errorDetail: {
|
||||
title: 'Error Detail',
|
||||
titleWithId: 'Error #{id}',
|
||||
noErrorSelected: 'No error selected.',
|
||||
resolution: 'Resolved:',
|
||||
pinnedToOriginalAccountId: 'Pinned to original account_id',
|
||||
missingUpstreamRequestBody: 'Missing upstream request body',
|
||||
failedToLoadRetryHistory: 'Failed to load retry history',
|
||||
failedToUpdateResolvedStatus: 'Failed to update resolved status',
|
||||
unsupportedRetryMode: 'Unsupported retry mode',
|
||||
classificationKeys: {
|
||||
phase: 'Phase',
|
||||
owner: 'Owner',
|
||||
source: 'Source',
|
||||
retryable: 'Retryable',
|
||||
resolvedAt: 'Resolved At',
|
||||
resolvedBy: 'Resolved By',
|
||||
resolvedRetryId: 'Resolved Retry',
|
||||
retryCount: 'Retry Count'
|
||||
},
|
||||
source: {
|
||||
upstream_http: 'Upstream HTTP'
|
||||
},
|
||||
upstreamKeys: {
|
||||
status: 'Status',
|
||||
message: 'Message',
|
||||
detail: 'Detail',
|
||||
upstreamErrors: 'Upstream Errors'
|
||||
},
|
||||
upstreamEvent: {
|
||||
account: 'Account',
|
||||
status: 'Status',
|
||||
requestId: 'Request ID'
|
||||
},
|
||||
responsePreview: {
|
||||
expand: 'Response (click to expand)',
|
||||
collapse: 'Response (click to collapse)'
|
||||
},
|
||||
retryMeta: {
|
||||
used: 'Used',
|
||||
success: 'Success',
|
||||
pinned: 'Pinned'
|
||||
},
|
||||
loading: 'Loading…',
|
||||
requestId: 'Request ID',
|
||||
time: 'Time',
|
||||
@@ -2068,8 +2142,10 @@ export default {
|
||||
basicInfo: 'Basic Info',
|
||||
platform: 'Platform',
|
||||
model: 'Model',
|
||||
latency: 'Latency',
|
||||
ttft: 'TTFT',
|
||||
group: 'Group',
|
||||
user: 'User',
|
||||
account: 'Account',
|
||||
latency: 'Request Duration',
|
||||
businessLimited: 'Business Limited',
|
||||
requestPath: 'Request Path',
|
||||
timings: 'Timings',
|
||||
@@ -2077,6 +2153,8 @@ export default {
|
||||
routing: 'Routing',
|
||||
upstream: 'Upstream',
|
||||
response: 'Response',
|
||||
classification: 'Classification',
|
||||
notRetryable: 'Not recommended to retry',
|
||||
retry: 'Retry',
|
||||
retryClient: 'Retry (Client)',
|
||||
retryUpstream: 'Retry (Upstream pinned)',
|
||||
@@ -2088,7 +2166,6 @@ export default {
|
||||
confirmRetry: 'Confirm Retry',
|
||||
retrySuccess: 'Retry succeeded',
|
||||
retryFailed: 'Retry failed',
|
||||
na: 'N/A',
|
||||
retryHint: 'Retry will resend the request with the same parameters',
|
||||
retryClientHint: 'Use client retry (no account pinning)',
|
||||
retryUpstreamHint: 'Use upstream pinned retry (pin to the error account)',
|
||||
@@ -2096,8 +2173,33 @@ export default {
|
||||
retryNote1: 'Retry will use the same request body and parameters',
|
||||
retryNote2: 'If the original request failed due to account issues, pinned retry may still fail',
|
||||
retryNote3: 'Client retry will reselect an account',
|
||||
retryNote4: 'You can force retry for non-retryable errors, but it is not recommended',
|
||||
confirmRetryMessage: 'Confirm retry this request?',
|
||||
confirmRetryHint: 'Will resend with the same request parameters'
|
||||
confirmRetryHint: 'Will resend with the same request parameters',
|
||||
forceRetry: 'I understand and want to force retry',
|
||||
forceRetryHint: 'This error usually cannot be fixed by retry; check to proceed',
|
||||
forceRetryNeedAck: 'Please check to force retry',
|
||||
markResolved: 'Mark resolved',
|
||||
markUnresolved: 'Mark unresolved',
|
||||
viewRetries: 'Retry history',
|
||||
retryHistory: 'Retry History',
|
||||
tabOverview: 'Overview',
|
||||
tabRetries: 'Retries',
|
||||
tabRequest: 'Request',
|
||||
tabResponse: 'Response',
|
||||
responseBody: 'Response',
|
||||
compareA: 'Compare A',
|
||||
compareB: 'Compare B',
|
||||
retrySummary: 'Retry Summary',
|
||||
responseHintSucceeded: 'Showing succeeded retry response_preview (#{id})',
|
||||
responseHintFallback: 'No succeeded retry found; showing stored error_body',
|
||||
suggestion: 'Suggestion',
|
||||
suggestUpstreamResolved: '✓ Upstream error resolved by retry; no action needed',
|
||||
suggestUpstream: 'Upstream instability: check account status, consider switching accounts, or retry',
|
||||
suggestRequest: 'Client request error: ask customer to fix request parameters',
|
||||
suggestAuth: 'Auth failed: verify API key/credentials',
|
||||
suggestPlatform: 'Platform error: prioritize investigation and fix',
|
||||
suggestGeneric: 'See details for more context'
|
||||
},
|
||||
requestDetails: {
|
||||
title: 'Request Details',
|
||||
@@ -2133,13 +2235,46 @@ export default {
|
||||
loading: 'Loading...',
|
||||
empty: 'No alert events',
|
||||
loadFailed: 'Failed to load alert events',
|
||||
status: {
|
||||
firing: 'FIRING',
|
||||
resolved: 'RESOLVED',
|
||||
manualResolved: 'MANUAL RESOLVED'
|
||||
},
|
||||
detail: {
|
||||
title: 'Alert Detail',
|
||||
loading: 'Loading detail...',
|
||||
empty: 'No detail',
|
||||
loadFailed: 'Failed to load alert detail',
|
||||
manualResolve: 'Mark as Resolved',
|
||||
manualResolvedSuccess: 'Marked as manually resolved',
|
||||
manualResolvedFailed: 'Failed to mark as manually resolved',
|
||||
silence: 'Ignore Alert',
|
||||
silenceSuccess: 'Alert silenced',
|
||||
silenceFailed: 'Failed to silence alert',
|
||||
viewRule: 'View Rule',
|
||||
viewLogs: 'View Logs',
|
||||
firedAt: 'Fired At',
|
||||
resolvedAt: 'Resolved At',
|
||||
ruleId: 'Rule ID',
|
||||
dimensions: 'Dimensions',
|
||||
historyTitle: 'History',
|
||||
historyHint: 'Recent events with same rule + dimensions',
|
||||
historyLoading: 'Loading history...',
|
||||
historyEmpty: 'No history'
|
||||
},
|
||||
table: {
|
||||
time: 'Time',
|
||||
status: 'Status',
|
||||
severity: 'Severity',
|
||||
platform: 'Platform',
|
||||
ruleId: 'Rule ID',
|
||||
title: 'Title',
|
||||
duration: 'Duration',
|
||||
metric: 'Metric / Threshold',
|
||||
email: 'Email Sent'
|
||||
dimensions: 'Dimensions',
|
||||
email: 'Email Sent',
|
||||
emailSent: 'Sent',
|
||||
emailIgnored: 'Ignored'
|
||||
}
|
||||
},
|
||||
alertRules: {
|
||||
@@ -2253,7 +2388,6 @@ export default {
|
||||
title: 'Alert Silencing (Maintenance Mode)',
|
||||
enabled: 'Enable silencing',
|
||||
globalUntil: 'Silence until (RFC3339)',
|
||||
untilPlaceholder: '2026-01-05T00:00:00Z',
|
||||
untilHint: 'Leave empty to only toggle silencing without an expiry (not recommended).',
|
||||
reason: 'Reason',
|
||||
reasonPlaceholder: 'e.g., planned maintenance',
|
||||
@@ -2293,7 +2427,11 @@ export default {
|
||||
lockKeyRequired: 'Distributed lock key is required when lock is enabled',
|
||||
lockKeyPrefix: 'Distributed lock key must start with "{prefix}"',
|
||||
lockKeyHint: 'Recommended: start with "{prefix}" to avoid conflicts',
|
||||
lockTtlRange: 'Distributed lock TTL must be between 1 and 86400 seconds'
|
||||
lockTtlRange: 'Distributed lock TTL must be between 1 and 86400 seconds',
|
||||
slaMinPercentRange: 'SLA minimum percentage must be between 0 and 100',
|
||||
ttftP99MaxRange: 'TTFT P99 maximum must be a number ≥ 0',
|
||||
requestErrorRateMaxRange: 'Request error rate maximum must be between 0 and 100',
|
||||
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100'
|
||||
}
|
||||
},
|
||||
email: {
|
||||
@@ -2358,8 +2496,6 @@ export default {
|
||||
metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red',
|
||||
slaMinPercent: 'SLA Minimum Percentage',
|
||||
slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)',
|
||||
latencyP99MaxMs: 'Latency P99 Maximum (ms)',
|
||||
latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)',
|
||||
ttftP99MaxMs: 'TTFT P99 Maximum (ms)',
|
||||
ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)',
|
||||
requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)',
|
||||
@@ -2378,9 +2514,28 @@ export default {
|
||||
aggregation: 'Pre-aggregation Tasks',
|
||||
enableAggregation: 'Enable Pre-aggregation',
|
||||
aggregationHint: 'Pre-aggregation improves query performance for long time windows',
|
||||
errorFiltering: 'Error Filtering',
|
||||
ignoreCountTokensErrors: 'Ignore count_tokens errors',
|
||||
ignoreCountTokensErrorsHint: 'When enabled, errors from count_tokens requests will not be written to the error log.',
|
||||
ignoreContextCanceled: 'Ignore client disconnect errors',
|
||||
ignoreContextCanceledHint: 'When enabled, client disconnect (context canceled) errors will not be written to the error log.',
|
||||
ignoreNoAvailableAccounts: 'Ignore no available accounts errors',
|
||||
ignoreNoAvailableAccountsHint: 'When enabled, "No available accounts" errors will not be written to the error log (not recommended; usually a config issue).',
|
||||
autoRefresh: 'Auto Refresh',
|
||||
enableAutoRefresh: 'Enable auto refresh',
|
||||
enableAutoRefreshHint: 'Automatically refresh dashboard data at a fixed interval.',
|
||||
refreshInterval: 'Refresh Interval',
|
||||
refreshInterval15s: '15 seconds',
|
||||
refreshInterval30s: '30 seconds',
|
||||
refreshInterval60s: '60 seconds',
|
||||
autoRefreshCountdown: 'Auto refresh: {seconds}s',
|
||||
validation: {
|
||||
title: 'Please fix the following issues',
|
||||
retentionDaysRange: 'Retention days must be between 1-365 days'
|
||||
retentionDaysRange: 'Retention days must be between 1-365 days',
|
||||
slaMinPercentRange: 'SLA minimum percentage must be between 0 and 100',
|
||||
ttftP99MaxRange: 'TTFT P99 maximum must be a number ≥ 0',
|
||||
requestErrorRateMaxRange: 'Request error rate maximum must be between 0 and 100',
|
||||
upstreamErrorRateMaxRange: 'Upstream error rate maximum must be between 0 and 100'
|
||||
}
|
||||
},
|
||||
concurrency: {
|
||||
@@ -2418,7 +2573,7 @@ export default {
|
||||
tooltips: {
|
||||
totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.',
|
||||
throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
|
||||
latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
|
||||
latencyHistogram: 'Request duration distribution (ms) for successful requests.',
|
||||
errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
|
||||
errorDistribution: 'Error distribution by status code.',
|
||||
goroutines:
|
||||
@@ -2433,7 +2588,7 @@ export default {
|
||||
sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
|
||||
errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
|
||||
upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
|
||||
latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
|
||||
latency: 'Request duration statistics, including p50, p90, p95, p99 percentiles.',
|
||||
ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
|
||||
health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user