feat(前端API): 实现运维监控 API 客户端
- 新增 ops API 客户端(ops.ts) - 扩展 settings API 支持 ops 配置 - 更新 admin API 索引导出 ops 模块 - 扩展 API 客户端支持 WebSocket 连接
This commit is contained in:
@@ -16,6 +16,7 @@ import usageAPI from './usage'
|
||||
import geminiAPI from './gemini'
|
||||
import antigravityAPI from './antigravity'
|
||||
import userAttributesAPI from './userAttributes'
|
||||
import opsAPI from './ops'
|
||||
|
||||
/**
|
||||
* Unified admin API object for convenient access
|
||||
@@ -33,7 +34,8 @@ export const adminAPI = {
|
||||
usage: usageAPI,
|
||||
gemini: geminiAPI,
|
||||
antigravity: antigravityAPI,
|
||||
userAttributes: userAttributesAPI
|
||||
userAttributes: userAttributesAPI,
|
||||
ops: opsAPI
|
||||
}
|
||||
|
||||
export {
|
||||
@@ -49,7 +51,8 @@ export {
|
||||
usageAPI,
|
||||
geminiAPI,
|
||||
antigravityAPI,
|
||||
userAttributesAPI
|
||||
userAttributesAPI,
|
||||
opsAPI
|
||||
}
|
||||
|
||||
export default adminAPI
|
||||
|
||||
906
frontend/src/api/admin/ops.ts
Normal file
906
frontend/src/api/admin/ops.ts
Normal file
@@ -0,0 +1,906 @@
|
||||
/**
|
||||
* Admin Ops API endpoints (vNext)
|
||||
* - Error logs list/detail + retry (client/upstream)
|
||||
* - Dashboard overview (raw path)
|
||||
*/
|
||||
|
||||
import { apiClient } from '../client'
|
||||
import type { PaginatedResponse } from '@/types'
|
||||
|
||||
export type OpsRetryMode = 'client' | 'upstream'
|
||||
export type OpsQueryMode = 'auto' | 'raw' | 'preagg'
|
||||
|
||||
export interface OpsRequestOptions {
|
||||
signal?: AbortSignal
|
||||
}
|
||||
|
||||
export interface OpsRetryRequest {
|
||||
mode: OpsRetryMode
|
||||
pinned_account_id?: number
|
||||
}
|
||||
|
||||
export interface OpsRetryResult {
|
||||
attempt_id: number
|
||||
mode: OpsRetryMode
|
||||
status: 'running' | 'succeeded' | 'failed' | string
|
||||
|
||||
pinned_account_id?: number | null
|
||||
used_account_id?: number | null
|
||||
|
||||
http_status_code: number
|
||||
upstream_request_id: string
|
||||
|
||||
response_preview: string
|
||||
response_truncated: boolean
|
||||
|
||||
error_message: string
|
||||
|
||||
started_at: string
|
||||
finished_at: string
|
||||
duration_ms: number
|
||||
}
|
||||
|
||||
export interface OpsDashboardOverview {
|
||||
start_time: string
|
||||
end_time: string
|
||||
platform: string
|
||||
group_id?: number | null
|
||||
|
||||
system_metrics?: OpsSystemMetricsSnapshot | null
|
||||
job_heartbeats?: OpsJobHeartbeat[] | null
|
||||
|
||||
success_count: number
|
||||
error_count_total: number
|
||||
business_limited_count: number
|
||||
error_count_sla: number
|
||||
request_count_total: number
|
||||
request_count_sla: number
|
||||
|
||||
token_consumed: number
|
||||
|
||||
sla: number
|
||||
error_rate: number
|
||||
upstream_error_rate: number
|
||||
upstream_error_count_excl_429_529: number
|
||||
upstream_429_count: number
|
||||
upstream_529_count: number
|
||||
|
||||
qps: {
|
||||
current: number
|
||||
peak: number
|
||||
avg: number
|
||||
}
|
||||
tps: {
|
||||
current: number
|
||||
peak: number
|
||||
avg: number
|
||||
}
|
||||
|
||||
duration: OpsPercentiles
|
||||
ttft: OpsPercentiles
|
||||
}
|
||||
|
||||
export interface OpsPercentiles {
|
||||
p50_ms?: number | null
|
||||
p90_ms?: number | null
|
||||
p95_ms?: number | null
|
||||
p99_ms?: number | null
|
||||
avg_ms?: number | null
|
||||
max_ms?: number | null
|
||||
}
|
||||
|
||||
export interface OpsThroughputTrendPoint {
|
||||
bucket_start: string
|
||||
request_count: number
|
||||
token_consumed: number
|
||||
qps: number
|
||||
tps: number
|
||||
}
|
||||
|
||||
export interface OpsThroughputPlatformBreakdownItem {
|
||||
platform: string
|
||||
request_count: number
|
||||
token_consumed: number
|
||||
}
|
||||
|
||||
export interface OpsThroughputGroupBreakdownItem {
|
||||
group_id: number
|
||||
group_name: string
|
||||
request_count: number
|
||||
token_consumed: number
|
||||
}
|
||||
|
||||
export interface OpsThroughputTrendResponse {
|
||||
bucket: string
|
||||
points: OpsThroughputTrendPoint[]
|
||||
by_platform?: OpsThroughputPlatformBreakdownItem[]
|
||||
top_groups?: OpsThroughputGroupBreakdownItem[]
|
||||
}
|
||||
|
||||
export type OpsRequestKind = 'success' | 'error'
|
||||
export type OpsRequestDetailsKind = OpsRequestKind | 'all'
|
||||
export type OpsRequestDetailsSort = 'created_at_desc' | 'duration_desc'
|
||||
|
||||
export interface OpsRequestDetail {
|
||||
kind: OpsRequestKind
|
||||
created_at: string
|
||||
request_id: string
|
||||
|
||||
platform?: string
|
||||
model?: string
|
||||
duration_ms?: number | null
|
||||
status_code?: number | null
|
||||
|
||||
error_id?: number | null
|
||||
phase?: string
|
||||
severity?: string
|
||||
message?: string
|
||||
|
||||
user_id?: number | null
|
||||
api_key_id?: number | null
|
||||
account_id?: number | null
|
||||
group_id?: number | null
|
||||
|
||||
stream?: boolean
|
||||
}
|
||||
|
||||
export interface OpsRequestDetailsParams {
|
||||
time_range?: '5m' | '30m' | '1h' | '6h' | '24h'
|
||||
start_time?: string
|
||||
end_time?: string
|
||||
|
||||
kind?: OpsRequestDetailsKind
|
||||
|
||||
platform?: string
|
||||
group_id?: number | null
|
||||
|
||||
user_id?: number
|
||||
api_key_id?: number
|
||||
account_id?: number
|
||||
|
||||
model?: string
|
||||
request_id?: string
|
||||
q?: string
|
||||
|
||||
min_duration_ms?: number
|
||||
max_duration_ms?: number
|
||||
|
||||
sort?: OpsRequestDetailsSort
|
||||
|
||||
page?: number
|
||||
page_size?: number
|
||||
}
|
||||
|
||||
export type OpsRequestDetailsResponse = PaginatedResponse<OpsRequestDetail>
|
||||
|
||||
export interface OpsLatencyHistogramBucket {
|
||||
range: string
|
||||
count: number
|
||||
}
|
||||
|
||||
export interface OpsLatencyHistogramResponse {
|
||||
start_time: string
|
||||
end_time: string
|
||||
platform: string
|
||||
group_id?: number | null
|
||||
|
||||
total_requests: number
|
||||
buckets: OpsLatencyHistogramBucket[]
|
||||
}
|
||||
|
||||
export interface OpsErrorTrendPoint {
|
||||
bucket_start: string
|
||||
error_count_total: number
|
||||
business_limited_count: number
|
||||
error_count_sla: number
|
||||
upstream_error_count_excl_429_529: number
|
||||
upstream_429_count: number
|
||||
upstream_529_count: number
|
||||
}
|
||||
|
||||
export interface OpsErrorTrendResponse {
|
||||
bucket: string
|
||||
points: OpsErrorTrendPoint[]
|
||||
}
|
||||
|
||||
export interface OpsErrorDistributionItem {
|
||||
status_code: number
|
||||
total: number
|
||||
sla: number
|
||||
business_limited: number
|
||||
}
|
||||
|
||||
export interface OpsErrorDistributionResponse {
|
||||
total: number
|
||||
items: OpsErrorDistributionItem[]
|
||||
}
|
||||
|
||||
export interface OpsSystemMetricsSnapshot {
|
||||
id: number
|
||||
created_at: string
|
||||
window_minutes: number
|
||||
|
||||
cpu_usage_percent?: number | null
|
||||
memory_used_mb?: number | null
|
||||
memory_total_mb?: number | null
|
||||
memory_usage_percent?: number | null
|
||||
|
||||
db_ok?: boolean | null
|
||||
redis_ok?: boolean | null
|
||||
|
||||
db_conn_active?: number | null
|
||||
db_conn_idle?: number | null
|
||||
db_conn_waiting?: number | null
|
||||
|
||||
goroutine_count?: number | null
|
||||
concurrency_queue_depth?: number | null
|
||||
}
|
||||
|
||||
export interface OpsJobHeartbeat {
|
||||
job_name: string
|
||||
last_run_at?: string | null
|
||||
last_success_at?: string | null
|
||||
last_error_at?: string | null
|
||||
last_error?: string | null
|
||||
last_duration_ms?: number | null
|
||||
updated_at: string
|
||||
}
|
||||
|
||||
export interface PlatformConcurrencyInfo {
|
||||
platform: string
|
||||
current_in_use: number
|
||||
max_capacity: number
|
||||
load_percentage: number
|
||||
waiting_in_queue: number
|
||||
}
|
||||
|
||||
export interface GroupConcurrencyInfo {
|
||||
group_id: number
|
||||
group_name: string
|
||||
platform: string
|
||||
current_in_use: number
|
||||
max_capacity: number
|
||||
load_percentage: number
|
||||
waiting_in_queue: number
|
||||
}
|
||||
|
||||
export interface AccountConcurrencyInfo {
|
||||
account_id: number
|
||||
account_name?: string
|
||||
platform: string
|
||||
group_id: number
|
||||
group_name: string
|
||||
current_in_use: number
|
||||
max_capacity: number
|
||||
load_percentage: number
|
||||
waiting_in_queue: number
|
||||
}
|
||||
|
||||
export interface OpsConcurrencyStatsResponse {
|
||||
enabled: boolean
|
||||
platform: Record<string, PlatformConcurrencyInfo>
|
||||
group: Record<string, GroupConcurrencyInfo>
|
||||
account: Record<string, AccountConcurrencyInfo>
|
||||
timestamp?: string
|
||||
}
|
||||
|
||||
export async function getConcurrencyStats(platform?: string, groupId?: number | null): Promise<OpsConcurrencyStatsResponse> {
|
||||
const params: Record<string, any> = {}
|
||||
if (platform) {
|
||||
params.platform = platform
|
||||
}
|
||||
if (typeof groupId === 'number' && groupId > 0) {
|
||||
params.group_id = groupId
|
||||
}
|
||||
|
||||
const { data } = await apiClient.get<OpsConcurrencyStatsResponse>('/admin/ops/concurrency', { params })
|
||||
return data
|
||||
}
|
||||
|
||||
export interface PlatformAvailability {
|
||||
platform: string
|
||||
total_accounts: number
|
||||
available_count: number
|
||||
rate_limit_count: number
|
||||
error_count: number
|
||||
}
|
||||
|
||||
export interface GroupAvailability {
|
||||
group_id: number
|
||||
group_name: string
|
||||
platform: string
|
||||
total_accounts: number
|
||||
available_count: number
|
||||
rate_limit_count: number
|
||||
error_count: number
|
||||
}
|
||||
|
||||
export interface AccountAvailability {
|
||||
account_id: number
|
||||
account_name: string
|
||||
platform: string
|
||||
group_id: number
|
||||
group_name: string
|
||||
status: string
|
||||
is_available: boolean
|
||||
is_rate_limited: boolean
|
||||
rate_limit_reset_at?: string
|
||||
rate_limit_remaining_sec?: number
|
||||
is_overloaded: boolean
|
||||
overload_until?: string
|
||||
overload_remaining_sec?: number
|
||||
has_error: boolean
|
||||
error_message?: string
|
||||
}
|
||||
|
||||
export interface OpsAccountAvailabilityStatsResponse {
|
||||
enabled: boolean
|
||||
platform: Record<string, PlatformAvailability>
|
||||
group: Record<string, GroupAvailability>
|
||||
account: Record<string, AccountAvailability>
|
||||
timestamp?: string
|
||||
}
|
||||
|
||||
export async function getAccountAvailabilityStats(platform?: string, groupId?: number | null): Promise<OpsAccountAvailabilityStatsResponse> {
|
||||
const params: Record<string, any> = {}
|
||||
if (platform) {
|
||||
params.platform = platform
|
||||
}
|
||||
if (typeof groupId === 'number' && groupId > 0) {
|
||||
params.group_id = groupId
|
||||
}
|
||||
const { data } = await apiClient.get<OpsAccountAvailabilityStatsResponse>('/admin/ops/account-availability', { params })
|
||||
return data
|
||||
}
|
||||
|
||||
/**
|
||||
* Subscribe to realtime QPS updates via WebSocket.
|
||||
*
|
||||
* Note: browsers cannot set Authorization headers for WebSockets.
|
||||
* We authenticate via Sec-WebSocket-Protocol using a prefixed token item:
|
||||
* ["sub2api-admin", "jwt.<token>"]
|
||||
*/
|
||||
export interface SubscribeQPSOptions {
|
||||
token?: string | null
|
||||
onOpen?: () => void
|
||||
onClose?: (event: CloseEvent) => void
|
||||
onError?: (event: Event) => void
|
||||
/**
|
||||
* Called when the server closes with an application close code that indicates
|
||||
* reconnecting is not useful (e.g. feature flag disabled).
|
||||
*/
|
||||
onFatalClose?: (event: CloseEvent) => void
|
||||
/**
|
||||
* More granular status updates for UI (connecting/reconnecting/offline/etc).
|
||||
*/
|
||||
onStatusChange?: (status: OpsWSStatus) => void
|
||||
/**
|
||||
* Called when a reconnect is scheduled (helps display "retry in Xs").
|
||||
*/
|
||||
onReconnectScheduled?: (info: { attempt: number, delayMs: number }) => void
|
||||
wsBaseUrl?: string
|
||||
/**
|
||||
* Maximum reconnect attempts. Defaults to Infinity to keep the dashboard live.
|
||||
* Set to 0 to disable reconnect.
|
||||
*/
|
||||
maxReconnectAttempts?: number
|
||||
reconnectBaseDelayMs?: number
|
||||
reconnectMaxDelayMs?: number
|
||||
/**
|
||||
* Stale connection detection (heartbeat-by-observation).
|
||||
* If no messages are received within this window, the socket is closed to trigger a reconnect.
|
||||
* Set to 0 to disable.
|
||||
*/
|
||||
staleTimeoutMs?: number
|
||||
/**
|
||||
* How often to check staleness. Only used when `staleTimeoutMs > 0`.
|
||||
*/
|
||||
staleCheckIntervalMs?: number
|
||||
}
|
||||
|
||||
export type OpsWSStatus = 'connecting' | 'connected' | 'reconnecting' | 'offline' | 'closed'
|
||||
|
||||
export const OPS_WS_CLOSE_CODES = {
|
||||
REALTIME_DISABLED: 4001
|
||||
} as const
|
||||
|
||||
const OPS_WS_BASE_PROTOCOL = 'sub2api-admin'
|
||||
|
||||
export function subscribeQPS(onMessage: (data: any) => void, options: SubscribeQPSOptions = {}): () => void {
|
||||
let ws: WebSocket | null = null
|
||||
let reconnectAttempts = 0
|
||||
const maxReconnectAttempts = Number.isFinite(options.maxReconnectAttempts as number)
|
||||
? (options.maxReconnectAttempts as number)
|
||||
: Infinity
|
||||
const baseDelayMs = options.reconnectBaseDelayMs ?? 1000
|
||||
const maxDelayMs = options.reconnectMaxDelayMs ?? 30000
|
||||
let reconnectTimer: ReturnType<typeof setTimeout> | null = null
|
||||
let shouldReconnect = true
|
||||
let isConnecting = false
|
||||
let hasConnectedOnce = false
|
||||
let lastMessageAt = 0
|
||||
const staleTimeoutMs = options.staleTimeoutMs ?? 120_000
|
||||
const staleCheckIntervalMs = options.staleCheckIntervalMs ?? 30_000
|
||||
let staleTimer: ReturnType<typeof setInterval> | null = null
|
||||
|
||||
const setStatus = (status: OpsWSStatus) => {
|
||||
options.onStatusChange?.(status)
|
||||
}
|
||||
|
||||
const clearReconnectTimer = () => {
|
||||
if (reconnectTimer) {
|
||||
clearTimeout(reconnectTimer)
|
||||
reconnectTimer = null
|
||||
}
|
||||
}
|
||||
|
||||
const clearStaleTimer = () => {
|
||||
if (staleTimer) {
|
||||
clearInterval(staleTimer)
|
||||
staleTimer = null
|
||||
}
|
||||
}
|
||||
|
||||
const startStaleTimer = () => {
|
||||
clearStaleTimer()
|
||||
if (!staleTimeoutMs || staleTimeoutMs <= 0) return
|
||||
staleTimer = setInterval(() => {
|
||||
if (!shouldReconnect) return
|
||||
if (!ws || ws.readyState !== WebSocket.OPEN) return
|
||||
if (!lastMessageAt) return
|
||||
const ageMs = Date.now() - lastMessageAt
|
||||
if (ageMs > staleTimeoutMs) {
|
||||
// Treat as a half-open connection; closing triggers the normal reconnect path.
|
||||
ws.close()
|
||||
}
|
||||
}, staleCheckIntervalMs)
|
||||
}
|
||||
|
||||
const scheduleReconnect = () => {
|
||||
if (!shouldReconnect) return
|
||||
if (hasConnectedOnce && reconnectAttempts >= maxReconnectAttempts) return
|
||||
|
||||
// If we're offline, wait for the browser to come back online.
|
||||
if (typeof navigator !== 'undefined' && 'onLine' in navigator && !navigator.onLine) {
|
||||
setStatus('offline')
|
||||
return
|
||||
}
|
||||
|
||||
const expDelay = baseDelayMs * Math.pow(2, reconnectAttempts)
|
||||
const delay = Math.min(expDelay, maxDelayMs)
|
||||
const jitter = Math.floor(Math.random() * 250)
|
||||
clearReconnectTimer()
|
||||
reconnectTimer = setTimeout(() => {
|
||||
reconnectAttempts++
|
||||
connect()
|
||||
}, delay + jitter)
|
||||
options.onReconnectScheduled?.({ attempt: reconnectAttempts + 1, delayMs: delay + jitter })
|
||||
}
|
||||
|
||||
const handleOnline = () => {
|
||||
if (!shouldReconnect) return
|
||||
if (ws && (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING)) return
|
||||
connect()
|
||||
}
|
||||
|
||||
const handleOffline = () => {
|
||||
setStatus('offline')
|
||||
}
|
||||
|
||||
const connect = () => {
|
||||
if (!shouldReconnect) return
|
||||
if (isConnecting) return
|
||||
if (ws && (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING)) return
|
||||
if (hasConnectedOnce && reconnectAttempts >= maxReconnectAttempts) return
|
||||
|
||||
isConnecting = true
|
||||
setStatus(hasConnectedOnce ? 'reconnecting' : 'connecting')
|
||||
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'
|
||||
const wsBaseUrl = options.wsBaseUrl || import.meta.env.VITE_WS_BASE_URL || window.location.host
|
||||
const wsURL = new URL(`${protocol}//${wsBaseUrl}/api/v1/admin/ops/ws/qps`)
|
||||
|
||||
// Do NOT put admin JWT in the URL query string (it can leak via access logs, proxies, etc).
|
||||
// Browsers cannot set Authorization headers for WebSockets, so we pass the token via
|
||||
// Sec-WebSocket-Protocol (subprotocol list): ["sub2api-admin", "jwt.<token>"].
|
||||
const rawToken = String(options.token ?? localStorage.getItem('auth_token') ?? '').trim()
|
||||
const protocols: string[] = [OPS_WS_BASE_PROTOCOL]
|
||||
if (rawToken) protocols.push(`jwt.${rawToken}`)
|
||||
|
||||
ws = new WebSocket(wsURL.toString(), protocols)
|
||||
|
||||
ws.onopen = () => {
|
||||
reconnectAttempts = 0
|
||||
isConnecting = false
|
||||
hasConnectedOnce = true
|
||||
clearReconnectTimer()
|
||||
lastMessageAt = Date.now()
|
||||
startStaleTimer()
|
||||
setStatus('connected')
|
||||
options.onOpen?.()
|
||||
}
|
||||
|
||||
ws.onmessage = (e) => {
|
||||
try {
|
||||
const data = JSON.parse(e.data)
|
||||
lastMessageAt = Date.now()
|
||||
onMessage(data)
|
||||
} catch (err) {
|
||||
console.warn('[OpsWS] Failed to parse message:', err)
|
||||
}
|
||||
}
|
||||
|
||||
ws.onerror = (error) => {
|
||||
console.error('[OpsWS] Connection error:', error)
|
||||
options.onError?.(error)
|
||||
}
|
||||
|
||||
ws.onclose = (event) => {
|
||||
isConnecting = false
|
||||
options.onClose?.(event)
|
||||
clearStaleTimer()
|
||||
ws = null
|
||||
|
||||
// If the server explicitly tells us to stop reconnecting, honor it.
|
||||
if (event && typeof event.code === 'number' && event.code === OPS_WS_CLOSE_CODES.REALTIME_DISABLED) {
|
||||
shouldReconnect = false
|
||||
clearReconnectTimer()
|
||||
setStatus('closed')
|
||||
options.onFatalClose?.(event)
|
||||
return
|
||||
}
|
||||
|
||||
scheduleReconnect()
|
||||
}
|
||||
}
|
||||
|
||||
window.addEventListener('online', handleOnline)
|
||||
window.addEventListener('offline', handleOffline)
|
||||
connect()
|
||||
|
||||
return () => {
|
||||
shouldReconnect = false
|
||||
window.removeEventListener('online', handleOnline)
|
||||
window.removeEventListener('offline', handleOffline)
|
||||
clearReconnectTimer()
|
||||
clearStaleTimer()
|
||||
if (ws) ws.close()
|
||||
ws = null
|
||||
setStatus('closed')
|
||||
}
|
||||
}
|
||||
|
||||
export type OpsSeverity = string
|
||||
export type OpsPhase = string
|
||||
|
||||
export type AlertSeverity = 'critical' | 'warning' | 'info'
|
||||
export type ThresholdMode = 'count' | 'percentage' | 'both'
|
||||
export type MetricType =
|
||||
| 'success_rate'
|
||||
| 'error_rate'
|
||||
| 'upstream_error_rate'
|
||||
| 'p95_latency_ms'
|
||||
| 'p99_latency_ms'
|
||||
| 'cpu_usage_percent'
|
||||
| 'memory_usage_percent'
|
||||
| 'concurrency_queue_depth'
|
||||
export type Operator = '>' | '>=' | '<' | '<=' | '==' | '!='
|
||||
|
||||
export interface AlertRule {
|
||||
id?: number
|
||||
name: string
|
||||
description?: string
|
||||
enabled: boolean
|
||||
metric_type: MetricType
|
||||
operator: Operator
|
||||
threshold: number
|
||||
window_minutes: number
|
||||
sustained_minutes: number
|
||||
severity: OpsSeverity
|
||||
cooldown_minutes: number
|
||||
notify_email: boolean
|
||||
filters?: Record<string, any>
|
||||
created_at?: string
|
||||
updated_at?: string
|
||||
last_triggered_at?: string | null
|
||||
}
|
||||
|
||||
export interface AlertEvent {
|
||||
id: number
|
||||
rule_id: number
|
||||
severity: OpsSeverity | string
|
||||
status: 'firing' | 'resolved' | string
|
||||
title?: string
|
||||
description?: string
|
||||
metric_value?: number
|
||||
threshold_value?: number
|
||||
dimensions?: Record<string, any>
|
||||
fired_at: string
|
||||
resolved_at?: string | null
|
||||
email_sent: boolean
|
||||
created_at: string
|
||||
}
|
||||
|
||||
export interface EmailNotificationConfig {
|
||||
alert: {
|
||||
enabled: boolean
|
||||
recipients: string[]
|
||||
min_severity: AlertSeverity | ''
|
||||
rate_limit_per_hour: number
|
||||
batching_window_seconds: number
|
||||
include_resolved_alerts: boolean
|
||||
}
|
||||
report: {
|
||||
enabled: boolean
|
||||
recipients: string[]
|
||||
daily_summary_enabled: boolean
|
||||
daily_summary_schedule: string
|
||||
weekly_summary_enabled: boolean
|
||||
weekly_summary_schedule: string
|
||||
error_digest_enabled: boolean
|
||||
error_digest_schedule: string
|
||||
error_digest_min_count: number
|
||||
account_health_enabled: boolean
|
||||
account_health_schedule: string
|
||||
account_health_error_rate_threshold: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface OpsDistributedLockSettings {
|
||||
enabled: boolean
|
||||
key: string
|
||||
ttl_seconds: number
|
||||
}
|
||||
|
||||
export interface OpsAlertRuntimeSettings {
|
||||
evaluation_interval_seconds: number
|
||||
distributed_lock: OpsDistributedLockSettings
|
||||
silencing: {
|
||||
enabled: boolean
|
||||
global_until_rfc3339: string
|
||||
global_reason: string
|
||||
entries?: Array<{
|
||||
rule_id?: number
|
||||
severities?: Array<OpsSeverity | string>
|
||||
until_rfc3339: string
|
||||
reason: string
|
||||
}>
|
||||
}
|
||||
}
|
||||
|
||||
export interface OpsErrorLog {
|
||||
id: number
|
||||
created_at: string
|
||||
phase: OpsPhase
|
||||
type: string
|
||||
severity: OpsSeverity
|
||||
status_code: number
|
||||
platform: string
|
||||
model: string
|
||||
latency_ms?: number | null
|
||||
client_request_id: string
|
||||
request_id: string
|
||||
message: string
|
||||
|
||||
user_id?: number | null
|
||||
api_key_id?: number | null
|
||||
account_id?: number | null
|
||||
group_id?: number | null
|
||||
|
||||
client_ip?: string | null
|
||||
request_path?: string
|
||||
stream?: boolean
|
||||
}
|
||||
|
||||
export interface OpsErrorDetail extends OpsErrorLog {
|
||||
error_body: string
|
||||
user_agent: string
|
||||
|
||||
auth_latency_ms?: number | null
|
||||
routing_latency_ms?: number | null
|
||||
upstream_latency_ms?: number | null
|
||||
response_latency_ms?: number | null
|
||||
time_to_first_token_ms?: number | null
|
||||
|
||||
request_body: string
|
||||
request_body_truncated: boolean
|
||||
request_body_bytes?: number | null
|
||||
|
||||
is_business_limited: boolean
|
||||
}
|
||||
|
||||
export type OpsErrorLogsResponse = PaginatedResponse<OpsErrorLog>
|
||||
|
||||
export async function getDashboardOverview(
|
||||
params: {
|
||||
time_range?: '5m' | '30m' | '1h' | '6h' | '24h'
|
||||
start_time?: string
|
||||
end_time?: string
|
||||
platform?: string
|
||||
group_id?: number | null
|
||||
mode?: OpsQueryMode
|
||||
},
|
||||
options: OpsRequestOptions = {}
|
||||
): Promise<OpsDashboardOverview> {
|
||||
const { data } = await apiClient.get<OpsDashboardOverview>('/admin/ops/dashboard/overview', {
|
||||
params,
|
||||
signal: options.signal
|
||||
})
|
||||
return data
|
||||
}
|
||||
|
||||
export async function getThroughputTrend(
|
||||
params: {
|
||||
time_range?: '5m' | '30m' | '1h' | '6h' | '24h'
|
||||
start_time?: string
|
||||
end_time?: string
|
||||
platform?: string
|
||||
group_id?: number | null
|
||||
mode?: OpsQueryMode
|
||||
},
|
||||
options: OpsRequestOptions = {}
|
||||
): Promise<OpsThroughputTrendResponse> {
|
||||
const { data } = await apiClient.get<OpsThroughputTrendResponse>('/admin/ops/dashboard/throughput-trend', {
|
||||
params,
|
||||
signal: options.signal
|
||||
})
|
||||
return data
|
||||
}
|
||||
|
||||
export async function getLatencyHistogram(
|
||||
params: {
|
||||
time_range?: '5m' | '30m' | '1h' | '6h' | '24h'
|
||||
start_time?: string
|
||||
end_time?: string
|
||||
platform?: string
|
||||
group_id?: number | null
|
||||
mode?: OpsQueryMode
|
||||
},
|
||||
options: OpsRequestOptions = {}
|
||||
): Promise<OpsLatencyHistogramResponse> {
|
||||
const { data } = await apiClient.get<OpsLatencyHistogramResponse>('/admin/ops/dashboard/latency-histogram', {
|
||||
params,
|
||||
signal: options.signal
|
||||
})
|
||||
return data
|
||||
}
|
||||
|
||||
export async function getErrorTrend(
|
||||
params: {
|
||||
time_range?: '5m' | '30m' | '1h' | '6h' | '24h'
|
||||
start_time?: string
|
||||
end_time?: string
|
||||
platform?: string
|
||||
group_id?: number | null
|
||||
mode?: OpsQueryMode
|
||||
},
|
||||
options: OpsRequestOptions = {}
|
||||
): Promise<OpsErrorTrendResponse> {
|
||||
const { data } = await apiClient.get<OpsErrorTrendResponse>('/admin/ops/dashboard/error-trend', {
|
||||
params,
|
||||
signal: options.signal
|
||||
})
|
||||
return data
|
||||
}
|
||||
|
||||
export async function getErrorDistribution(
|
||||
params: {
|
||||
time_range?: '5m' | '30m' | '1h' | '6h' | '24h'
|
||||
start_time?: string
|
||||
end_time?: string
|
||||
platform?: string
|
||||
group_id?: number | null
|
||||
mode?: OpsQueryMode
|
||||
},
|
||||
options: OpsRequestOptions = {}
|
||||
): Promise<OpsErrorDistributionResponse> {
|
||||
const { data } = await apiClient.get<OpsErrorDistributionResponse>('/admin/ops/dashboard/error-distribution', {
|
||||
params,
|
||||
signal: options.signal
|
||||
})
|
||||
return data
|
||||
}
|
||||
|
||||
export async function listErrorLogs(params: {
|
||||
page?: number
|
||||
page_size?: number
|
||||
time_range?: string
|
||||
start_time?: string
|
||||
end_time?: string
|
||||
platform?: string
|
||||
group_id?: number | null
|
||||
account_id?: number | null
|
||||
phase?: string
|
||||
q?: string
|
||||
status_codes?: string
|
||||
}): Promise<OpsErrorLogsResponse> {
|
||||
const { data } = await apiClient.get<OpsErrorLogsResponse>('/admin/ops/errors', { params })
|
||||
return data
|
||||
}
|
||||
|
||||
export async function getErrorLogDetail(id: number): Promise<OpsErrorDetail> {
|
||||
const { data } = await apiClient.get<OpsErrorDetail>(`/admin/ops/errors/${id}`)
|
||||
return data
|
||||
}
|
||||
|
||||
export async function retryErrorRequest(id: number, req: OpsRetryRequest): Promise<OpsRetryResult> {
|
||||
const { data } = await apiClient.post<OpsRetryResult>(`/admin/ops/errors/${id}/retry`, req)
|
||||
return data
|
||||
}
|
||||
|
||||
export async function listRequestDetails(params: OpsRequestDetailsParams): Promise<OpsRequestDetailsResponse> {
|
||||
const { data } = await apiClient.get<OpsRequestDetailsResponse>('/admin/ops/requests', { params })
|
||||
return data
|
||||
}
|
||||
|
||||
// Alert rules
|
||||
export async function listAlertRules(): Promise<AlertRule[]> {
|
||||
const { data } = await apiClient.get<AlertRule[]>('/admin/ops/alert-rules')
|
||||
return data
|
||||
}
|
||||
|
||||
export async function createAlertRule(rule: AlertRule): Promise<AlertRule> {
|
||||
const { data } = await apiClient.post<AlertRule>('/admin/ops/alert-rules', rule)
|
||||
return data
|
||||
}
|
||||
|
||||
export async function updateAlertRule(id: number, rule: Partial<AlertRule>): Promise<AlertRule> {
|
||||
const { data } = await apiClient.put<AlertRule>(`/admin/ops/alert-rules/${id}`, rule)
|
||||
return data
|
||||
}
|
||||
|
||||
export async function deleteAlertRule(id: number): Promise<void> {
|
||||
await apiClient.delete(`/admin/ops/alert-rules/${id}`)
|
||||
}
|
||||
|
||||
export async function listAlertEvents(limit = 100): Promise<AlertEvent[]> {
|
||||
const { data } = await apiClient.get<AlertEvent[]>('/admin/ops/alert-events', { params: { limit } })
|
||||
return data
|
||||
}
|
||||
|
||||
// Email notification config
|
||||
export async function getEmailNotificationConfig(): Promise<EmailNotificationConfig> {
|
||||
const { data } = await apiClient.get<EmailNotificationConfig>('/admin/ops/email-notification/config')
|
||||
return data
|
||||
}
|
||||
|
||||
export async function updateEmailNotificationConfig(config: EmailNotificationConfig): Promise<EmailNotificationConfig> {
|
||||
const { data } = await apiClient.put<EmailNotificationConfig>('/admin/ops/email-notification/config', config)
|
||||
return data
|
||||
}
|
||||
|
||||
// Runtime settings (DB-backed)
|
||||
export async function getAlertRuntimeSettings(): Promise<OpsAlertRuntimeSettings> {
|
||||
const { data } = await apiClient.get<OpsAlertRuntimeSettings>('/admin/ops/runtime/alert')
|
||||
return data
|
||||
}
|
||||
|
||||
export async function updateAlertRuntimeSettings(config: OpsAlertRuntimeSettings): Promise<OpsAlertRuntimeSettings> {
|
||||
const { data } = await apiClient.put<OpsAlertRuntimeSettings>('/admin/ops/runtime/alert', config)
|
||||
return data
|
||||
}
|
||||
|
||||
export const opsAPI = {
|
||||
getDashboardOverview,
|
||||
getThroughputTrend,
|
||||
getLatencyHistogram,
|
||||
getErrorTrend,
|
||||
getErrorDistribution,
|
||||
getConcurrencyStats,
|
||||
getAccountAvailabilityStats,
|
||||
subscribeQPS,
|
||||
listErrorLogs,
|
||||
getErrorLogDetail,
|
||||
retryErrorRequest,
|
||||
listRequestDetails,
|
||||
listAlertRules,
|
||||
createAlertRule,
|
||||
updateAlertRule,
|
||||
deleteAlertRule,
|
||||
listAlertEvents,
|
||||
getEmailNotificationConfig,
|
||||
updateEmailNotificationConfig,
|
||||
getAlertRuntimeSettings,
|
||||
updateAlertRuntimeSettings
|
||||
}
|
||||
|
||||
export default opsAPI
|
||||
@@ -34,9 +34,22 @@ export interface SystemSettings {
|
||||
turnstile_enabled: boolean
|
||||
turnstile_site_key: string
|
||||
turnstile_secret_key_configured: boolean
|
||||
|
||||
// Model fallback configuration
|
||||
enable_model_fallback: boolean
|
||||
fallback_model_anthropic: string
|
||||
fallback_model_openai: string
|
||||
fallback_model_gemini: string
|
||||
fallback_model_antigravity: string
|
||||
|
||||
// Identity patch configuration (Claude -> Gemini)
|
||||
enable_identity_patch: boolean
|
||||
identity_patch_prompt: string
|
||||
|
||||
// Ops Monitoring (vNext)
|
||||
ops_monitoring_enabled: boolean
|
||||
ops_realtime_monitoring_enabled: boolean
|
||||
ops_query_mode_default: 'auto' | 'raw' | 'preagg' | string
|
||||
}
|
||||
|
||||
export interface UpdateSettingsRequest {
|
||||
@@ -60,8 +73,16 @@ export interface UpdateSettingsRequest {
|
||||
turnstile_enabled?: boolean
|
||||
turnstile_site_key?: string
|
||||
turnstile_secret_key?: string
|
||||
enable_model_fallback?: boolean
|
||||
fallback_model_anthropic?: string
|
||||
fallback_model_openai?: string
|
||||
fallback_model_gemini?: string
|
||||
fallback_model_antigravity?: string
|
||||
enable_identity_patch?: boolean
|
||||
identity_patch_prompt?: string
|
||||
ops_monitoring_enabled?: boolean
|
||||
ops_realtime_monitoring_enabled?: boolean
|
||||
ops_query_mode_default?: 'auto' | 'raw' | 'preagg' | string
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -80,9 +80,45 @@ apiClient.interceptors.response.use(
|
||||
return response
|
||||
},
|
||||
(error: AxiosError<ApiResponse<unknown>>) => {
|
||||
// Request cancellation: keep the original axios cancellation error so callers can ignore it.
|
||||
// Otherwise we'd misclassify it as a generic "network error".
|
||||
if (error.code === 'ERR_CANCELED' || axios.isCancel(error)) {
|
||||
return Promise.reject(error)
|
||||
}
|
||||
|
||||
// Handle common errors
|
||||
if (error.response) {
|
||||
const { status, data } = error.response
|
||||
const url = String(error.config?.url || '')
|
||||
|
||||
// Validate `data` shape to avoid HTML error pages breaking our error handling.
|
||||
const apiData = (typeof data === 'object' && data !== null ? data : {}) as Record<string, any>
|
||||
|
||||
// Ops monitoring disabled: treat as feature-flagged 404, and proactively redirect away
|
||||
// from ops pages to avoid broken UI states.
|
||||
if (status === 404 && apiData.message === 'Ops monitoring is disabled') {
|
||||
try {
|
||||
localStorage.setItem('ops_monitoring_enabled_cached', 'false')
|
||||
} catch {
|
||||
// ignore localStorage failures
|
||||
}
|
||||
try {
|
||||
window.dispatchEvent(new CustomEvent('ops-monitoring-disabled'))
|
||||
} catch {
|
||||
// ignore event failures
|
||||
}
|
||||
|
||||
if (window.location.pathname.startsWith('/admin/ops')) {
|
||||
window.location.href = '/admin/settings'
|
||||
}
|
||||
|
||||
return Promise.reject({
|
||||
status,
|
||||
code: 'OPS_DISABLED',
|
||||
message: apiData.message || error.message,
|
||||
url
|
||||
})
|
||||
}
|
||||
|
||||
// 401: Unauthorized - clear token and redirect to login
|
||||
if (status === 401) {
|
||||
@@ -113,8 +149,8 @@ apiClient.interceptors.response.use(
|
||||
// Return structured error
|
||||
return Promise.reject({
|
||||
status,
|
||||
code: data?.code,
|
||||
message: data?.message || error.message
|
||||
code: apiData.code,
|
||||
message: apiData.message || apiData.detail || error.message
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user