diff --git a/frontend/src/api/admin/index.ts b/frontend/src/api/admin/index.ts index ea12f6d2..9e719a90 100644 --- a/frontend/src/api/admin/index.ts +++ b/frontend/src/api/admin/index.ts @@ -16,6 +16,7 @@ import usageAPI from './usage' import geminiAPI from './gemini' import antigravityAPI from './antigravity' import userAttributesAPI from './userAttributes' +import opsAPI from './ops' /** * Unified admin API object for convenient access @@ -33,7 +34,8 @@ export const adminAPI = { usage: usageAPI, gemini: geminiAPI, antigravity: antigravityAPI, - userAttributes: userAttributesAPI + userAttributes: userAttributesAPI, + ops: opsAPI } export { @@ -49,7 +51,8 @@ export { usageAPI, geminiAPI, antigravityAPI, - userAttributesAPI + userAttributesAPI, + opsAPI } export default adminAPI diff --git a/frontend/src/api/admin/ops.ts b/frontend/src/api/admin/ops.ts new file mode 100644 index 00000000..3c3529a9 --- /dev/null +++ b/frontend/src/api/admin/ops.ts @@ -0,0 +1,906 @@ +/** + * Admin Ops API endpoints (vNext) + * - Error logs list/detail + retry (client/upstream) + * - Dashboard overview (raw path) + */ + +import { apiClient } from '../client' +import type { PaginatedResponse } from '@/types' + +export type OpsRetryMode = 'client' | 'upstream' +export type OpsQueryMode = 'auto' | 'raw' | 'preagg' + +export interface OpsRequestOptions { + signal?: AbortSignal +} + +export interface OpsRetryRequest { + mode: OpsRetryMode + pinned_account_id?: number +} + +export interface OpsRetryResult { + attempt_id: number + mode: OpsRetryMode + status: 'running' | 'succeeded' | 'failed' | string + + pinned_account_id?: number | null + used_account_id?: number | null + + http_status_code: number + upstream_request_id: string + + response_preview: string + response_truncated: boolean + + error_message: string + + started_at: string + finished_at: string + duration_ms: number +} + +export interface OpsDashboardOverview { + start_time: string + end_time: string + platform: string + group_id?: number | null + + system_metrics?: OpsSystemMetricsSnapshot | null + job_heartbeats?: OpsJobHeartbeat[] | null + + success_count: number + error_count_total: number + business_limited_count: number + error_count_sla: number + request_count_total: number + request_count_sla: number + + token_consumed: number + + sla: number + error_rate: number + upstream_error_rate: number + upstream_error_count_excl_429_529: number + upstream_429_count: number + upstream_529_count: number + + qps: { + current: number + peak: number + avg: number + } + tps: { + current: number + peak: number + avg: number + } + + duration: OpsPercentiles + ttft: OpsPercentiles +} + +export interface OpsPercentiles { + p50_ms?: number | null + p90_ms?: number | null + p95_ms?: number | null + p99_ms?: number | null + avg_ms?: number | null + max_ms?: number | null +} + +export interface OpsThroughputTrendPoint { + bucket_start: string + request_count: number + token_consumed: number + qps: number + tps: number +} + +export interface OpsThroughputPlatformBreakdownItem { + platform: string + request_count: number + token_consumed: number +} + +export interface OpsThroughputGroupBreakdownItem { + group_id: number + group_name: string + request_count: number + token_consumed: number +} + +export interface OpsThroughputTrendResponse { + bucket: string + points: OpsThroughputTrendPoint[] + by_platform?: OpsThroughputPlatformBreakdownItem[] + top_groups?: OpsThroughputGroupBreakdownItem[] +} + +export type OpsRequestKind = 'success' | 'error' +export type OpsRequestDetailsKind = OpsRequestKind | 'all' +export type OpsRequestDetailsSort = 'created_at_desc' | 'duration_desc' + +export interface OpsRequestDetail { + kind: OpsRequestKind + created_at: string + request_id: string + + platform?: string + model?: string + duration_ms?: number | null + status_code?: number | null + + error_id?: number | null + phase?: string + severity?: string + message?: string + + user_id?: number | null + api_key_id?: number | null + account_id?: number | null + group_id?: number | null + + stream?: boolean +} + +export interface OpsRequestDetailsParams { + time_range?: '5m' | '30m' | '1h' | '6h' | '24h' + start_time?: string + end_time?: string + + kind?: OpsRequestDetailsKind + + platform?: string + group_id?: number | null + + user_id?: number + api_key_id?: number + account_id?: number + + model?: string + request_id?: string + q?: string + + min_duration_ms?: number + max_duration_ms?: number + + sort?: OpsRequestDetailsSort + + page?: number + page_size?: number +} + +export type OpsRequestDetailsResponse = PaginatedResponse + +export interface OpsLatencyHistogramBucket { + range: string + count: number +} + +export interface OpsLatencyHistogramResponse { + start_time: string + end_time: string + platform: string + group_id?: number | null + + total_requests: number + buckets: OpsLatencyHistogramBucket[] +} + +export interface OpsErrorTrendPoint { + bucket_start: string + error_count_total: number + business_limited_count: number + error_count_sla: number + upstream_error_count_excl_429_529: number + upstream_429_count: number + upstream_529_count: number +} + +export interface OpsErrorTrendResponse { + bucket: string + points: OpsErrorTrendPoint[] +} + +export interface OpsErrorDistributionItem { + status_code: number + total: number + sla: number + business_limited: number +} + +export interface OpsErrorDistributionResponse { + total: number + items: OpsErrorDistributionItem[] +} + +export interface OpsSystemMetricsSnapshot { + id: number + created_at: string + window_minutes: number + + cpu_usage_percent?: number | null + memory_used_mb?: number | null + memory_total_mb?: number | null + memory_usage_percent?: number | null + + db_ok?: boolean | null + redis_ok?: boolean | null + + db_conn_active?: number | null + db_conn_idle?: number | null + db_conn_waiting?: number | null + + goroutine_count?: number | null + concurrency_queue_depth?: number | null +} + +export interface OpsJobHeartbeat { + job_name: string + last_run_at?: string | null + last_success_at?: string | null + last_error_at?: string | null + last_error?: string | null + last_duration_ms?: number | null + updated_at: string +} + +export interface PlatformConcurrencyInfo { + platform: string + current_in_use: number + max_capacity: number + load_percentage: number + waiting_in_queue: number +} + +export interface GroupConcurrencyInfo { + group_id: number + group_name: string + platform: string + current_in_use: number + max_capacity: number + load_percentage: number + waiting_in_queue: number +} + +export interface AccountConcurrencyInfo { + account_id: number + account_name?: string + platform: string + group_id: number + group_name: string + current_in_use: number + max_capacity: number + load_percentage: number + waiting_in_queue: number +} + +export interface OpsConcurrencyStatsResponse { + enabled: boolean + platform: Record + group: Record + account: Record + timestamp?: string +} + +export async function getConcurrencyStats(platform?: string, groupId?: number | null): Promise { + const params: Record = {} + if (platform) { + params.platform = platform + } + if (typeof groupId === 'number' && groupId > 0) { + params.group_id = groupId + } + + const { data } = await apiClient.get('/admin/ops/concurrency', { params }) + return data +} + +export interface PlatformAvailability { + platform: string + total_accounts: number + available_count: number + rate_limit_count: number + error_count: number +} + +export interface GroupAvailability { + group_id: number + group_name: string + platform: string + total_accounts: number + available_count: number + rate_limit_count: number + error_count: number +} + +export interface AccountAvailability { + account_id: number + account_name: string + platform: string + group_id: number + group_name: string + status: string + is_available: boolean + is_rate_limited: boolean + rate_limit_reset_at?: string + rate_limit_remaining_sec?: number + is_overloaded: boolean + overload_until?: string + overload_remaining_sec?: number + has_error: boolean + error_message?: string +} + +export interface OpsAccountAvailabilityStatsResponse { + enabled: boolean + platform: Record + group: Record + account: Record + timestamp?: string +} + +export async function getAccountAvailabilityStats(platform?: string, groupId?: number | null): Promise { + const params: Record = {} + if (platform) { + params.platform = platform + } + if (typeof groupId === 'number' && groupId > 0) { + params.group_id = groupId + } + const { data } = await apiClient.get('/admin/ops/account-availability', { params }) + return data +} + +/** + * Subscribe to realtime QPS updates via WebSocket. + * + * Note: browsers cannot set Authorization headers for WebSockets. + * We authenticate via Sec-WebSocket-Protocol using a prefixed token item: + * ["sub2api-admin", "jwt."] + */ +export interface SubscribeQPSOptions { + token?: string | null + onOpen?: () => void + onClose?: (event: CloseEvent) => void + onError?: (event: Event) => void + /** + * Called when the server closes with an application close code that indicates + * reconnecting is not useful (e.g. feature flag disabled). + */ + onFatalClose?: (event: CloseEvent) => void + /** + * More granular status updates for UI (connecting/reconnecting/offline/etc). + */ + onStatusChange?: (status: OpsWSStatus) => void + /** + * Called when a reconnect is scheduled (helps display "retry in Xs"). + */ + onReconnectScheduled?: (info: { attempt: number, delayMs: number }) => void + wsBaseUrl?: string + /** + * Maximum reconnect attempts. Defaults to Infinity to keep the dashboard live. + * Set to 0 to disable reconnect. + */ + maxReconnectAttempts?: number + reconnectBaseDelayMs?: number + reconnectMaxDelayMs?: number + /** + * Stale connection detection (heartbeat-by-observation). + * If no messages are received within this window, the socket is closed to trigger a reconnect. + * Set to 0 to disable. + */ + staleTimeoutMs?: number + /** + * How often to check staleness. Only used when `staleTimeoutMs > 0`. + */ + staleCheckIntervalMs?: number +} + +export type OpsWSStatus = 'connecting' | 'connected' | 'reconnecting' | 'offline' | 'closed' + +export const OPS_WS_CLOSE_CODES = { + REALTIME_DISABLED: 4001 +} as const + +const OPS_WS_BASE_PROTOCOL = 'sub2api-admin' + +export function subscribeQPS(onMessage: (data: any) => void, options: SubscribeQPSOptions = {}): () => void { + let ws: WebSocket | null = null + let reconnectAttempts = 0 + const maxReconnectAttempts = Number.isFinite(options.maxReconnectAttempts as number) + ? (options.maxReconnectAttempts as number) + : Infinity + const baseDelayMs = options.reconnectBaseDelayMs ?? 1000 + const maxDelayMs = options.reconnectMaxDelayMs ?? 30000 + let reconnectTimer: ReturnType | null = null + let shouldReconnect = true + let isConnecting = false + let hasConnectedOnce = false + let lastMessageAt = 0 + const staleTimeoutMs = options.staleTimeoutMs ?? 120_000 + const staleCheckIntervalMs = options.staleCheckIntervalMs ?? 30_000 + let staleTimer: ReturnType | null = null + + const setStatus = (status: OpsWSStatus) => { + options.onStatusChange?.(status) + } + + const clearReconnectTimer = () => { + if (reconnectTimer) { + clearTimeout(reconnectTimer) + reconnectTimer = null + } + } + + const clearStaleTimer = () => { + if (staleTimer) { + clearInterval(staleTimer) + staleTimer = null + } + } + + const startStaleTimer = () => { + clearStaleTimer() + if (!staleTimeoutMs || staleTimeoutMs <= 0) return + staleTimer = setInterval(() => { + if (!shouldReconnect) return + if (!ws || ws.readyState !== WebSocket.OPEN) return + if (!lastMessageAt) return + const ageMs = Date.now() - lastMessageAt + if (ageMs > staleTimeoutMs) { + // Treat as a half-open connection; closing triggers the normal reconnect path. + ws.close() + } + }, staleCheckIntervalMs) + } + + const scheduleReconnect = () => { + if (!shouldReconnect) return + if (hasConnectedOnce && reconnectAttempts >= maxReconnectAttempts) return + + // If we're offline, wait for the browser to come back online. + if (typeof navigator !== 'undefined' && 'onLine' in navigator && !navigator.onLine) { + setStatus('offline') + return + } + + const expDelay = baseDelayMs * Math.pow(2, reconnectAttempts) + const delay = Math.min(expDelay, maxDelayMs) + const jitter = Math.floor(Math.random() * 250) + clearReconnectTimer() + reconnectTimer = setTimeout(() => { + reconnectAttempts++ + connect() + }, delay + jitter) + options.onReconnectScheduled?.({ attempt: reconnectAttempts + 1, delayMs: delay + jitter }) + } + + const handleOnline = () => { + if (!shouldReconnect) return + if (ws && (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING)) return + connect() + } + + const handleOffline = () => { + setStatus('offline') + } + + const connect = () => { + if (!shouldReconnect) return + if (isConnecting) return + if (ws && (ws.readyState === WebSocket.OPEN || ws.readyState === WebSocket.CONNECTING)) return + if (hasConnectedOnce && reconnectAttempts >= maxReconnectAttempts) return + + isConnecting = true + setStatus(hasConnectedOnce ? 'reconnecting' : 'connecting') + const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:' + const wsBaseUrl = options.wsBaseUrl || import.meta.env.VITE_WS_BASE_URL || window.location.host + const wsURL = new URL(`${protocol}//${wsBaseUrl}/api/v1/admin/ops/ws/qps`) + + // Do NOT put admin JWT in the URL query string (it can leak via access logs, proxies, etc). + // Browsers cannot set Authorization headers for WebSockets, so we pass the token via + // Sec-WebSocket-Protocol (subprotocol list): ["sub2api-admin", "jwt."]. + const rawToken = String(options.token ?? localStorage.getItem('auth_token') ?? '').trim() + const protocols: string[] = [OPS_WS_BASE_PROTOCOL] + if (rawToken) protocols.push(`jwt.${rawToken}`) + + ws = new WebSocket(wsURL.toString(), protocols) + + ws.onopen = () => { + reconnectAttempts = 0 + isConnecting = false + hasConnectedOnce = true + clearReconnectTimer() + lastMessageAt = Date.now() + startStaleTimer() + setStatus('connected') + options.onOpen?.() + } + + ws.onmessage = (e) => { + try { + const data = JSON.parse(e.data) + lastMessageAt = Date.now() + onMessage(data) + } catch (err) { + console.warn('[OpsWS] Failed to parse message:', err) + } + } + + ws.onerror = (error) => { + console.error('[OpsWS] Connection error:', error) + options.onError?.(error) + } + + ws.onclose = (event) => { + isConnecting = false + options.onClose?.(event) + clearStaleTimer() + ws = null + + // If the server explicitly tells us to stop reconnecting, honor it. + if (event && typeof event.code === 'number' && event.code === OPS_WS_CLOSE_CODES.REALTIME_DISABLED) { + shouldReconnect = false + clearReconnectTimer() + setStatus('closed') + options.onFatalClose?.(event) + return + } + + scheduleReconnect() + } + } + + window.addEventListener('online', handleOnline) + window.addEventListener('offline', handleOffline) + connect() + + return () => { + shouldReconnect = false + window.removeEventListener('online', handleOnline) + window.removeEventListener('offline', handleOffline) + clearReconnectTimer() + clearStaleTimer() + if (ws) ws.close() + ws = null + setStatus('closed') + } +} + +export type OpsSeverity = string +export type OpsPhase = string + +export type AlertSeverity = 'critical' | 'warning' | 'info' +export type ThresholdMode = 'count' | 'percentage' | 'both' +export type MetricType = + | 'success_rate' + | 'error_rate' + | 'upstream_error_rate' + | 'p95_latency_ms' + | 'p99_latency_ms' + | 'cpu_usage_percent' + | 'memory_usage_percent' + | 'concurrency_queue_depth' +export type Operator = '>' | '>=' | '<' | '<=' | '==' | '!=' + +export interface AlertRule { + id?: number + name: string + description?: string + enabled: boolean + metric_type: MetricType + operator: Operator + threshold: number + window_minutes: number + sustained_minutes: number + severity: OpsSeverity + cooldown_minutes: number + notify_email: boolean + filters?: Record + created_at?: string + updated_at?: string + last_triggered_at?: string | null +} + +export interface AlertEvent { + id: number + rule_id: number + severity: OpsSeverity | string + status: 'firing' | 'resolved' | string + title?: string + description?: string + metric_value?: number + threshold_value?: number + dimensions?: Record + fired_at: string + resolved_at?: string | null + email_sent: boolean + created_at: string +} + +export interface EmailNotificationConfig { + alert: { + enabled: boolean + recipients: string[] + min_severity: AlertSeverity | '' + rate_limit_per_hour: number + batching_window_seconds: number + include_resolved_alerts: boolean + } + report: { + enabled: boolean + recipients: string[] + daily_summary_enabled: boolean + daily_summary_schedule: string + weekly_summary_enabled: boolean + weekly_summary_schedule: string + error_digest_enabled: boolean + error_digest_schedule: string + error_digest_min_count: number + account_health_enabled: boolean + account_health_schedule: string + account_health_error_rate_threshold: number + } +} + +export interface OpsDistributedLockSettings { + enabled: boolean + key: string + ttl_seconds: number +} + +export interface OpsAlertRuntimeSettings { + evaluation_interval_seconds: number + distributed_lock: OpsDistributedLockSettings + silencing: { + enabled: boolean + global_until_rfc3339: string + global_reason: string + entries?: Array<{ + rule_id?: number + severities?: Array + until_rfc3339: string + reason: string + }> + } +} + +export interface OpsErrorLog { + id: number + created_at: string + phase: OpsPhase + type: string + severity: OpsSeverity + status_code: number + platform: string + model: string + latency_ms?: number | null + client_request_id: string + request_id: string + message: string + + user_id?: number | null + api_key_id?: number | null + account_id?: number | null + group_id?: number | null + + client_ip?: string | null + request_path?: string + stream?: boolean +} + +export interface OpsErrorDetail extends OpsErrorLog { + error_body: string + user_agent: string + + auth_latency_ms?: number | null + routing_latency_ms?: number | null + upstream_latency_ms?: number | null + response_latency_ms?: number | null + time_to_first_token_ms?: number | null + + request_body: string + request_body_truncated: boolean + request_body_bytes?: number | null + + is_business_limited: boolean +} + +export type OpsErrorLogsResponse = PaginatedResponse + +export async function getDashboardOverview( + params: { + time_range?: '5m' | '30m' | '1h' | '6h' | '24h' + start_time?: string + end_time?: string + platform?: string + group_id?: number | null + mode?: OpsQueryMode + }, + options: OpsRequestOptions = {} +): Promise { + const { data } = await apiClient.get('/admin/ops/dashboard/overview', { + params, + signal: options.signal + }) + return data +} + +export async function getThroughputTrend( + params: { + time_range?: '5m' | '30m' | '1h' | '6h' | '24h' + start_time?: string + end_time?: string + platform?: string + group_id?: number | null + mode?: OpsQueryMode + }, + options: OpsRequestOptions = {} +): Promise { + const { data } = await apiClient.get('/admin/ops/dashboard/throughput-trend', { + params, + signal: options.signal + }) + return data +} + +export async function getLatencyHistogram( + params: { + time_range?: '5m' | '30m' | '1h' | '6h' | '24h' + start_time?: string + end_time?: string + platform?: string + group_id?: number | null + mode?: OpsQueryMode + }, + options: OpsRequestOptions = {} +): Promise { + const { data } = await apiClient.get('/admin/ops/dashboard/latency-histogram', { + params, + signal: options.signal + }) + return data +} + +export async function getErrorTrend( + params: { + time_range?: '5m' | '30m' | '1h' | '6h' | '24h' + start_time?: string + end_time?: string + platform?: string + group_id?: number | null + mode?: OpsQueryMode + }, + options: OpsRequestOptions = {} +): Promise { + const { data } = await apiClient.get('/admin/ops/dashboard/error-trend', { + params, + signal: options.signal + }) + return data +} + +export async function getErrorDistribution( + params: { + time_range?: '5m' | '30m' | '1h' | '6h' | '24h' + start_time?: string + end_time?: string + platform?: string + group_id?: number | null + mode?: OpsQueryMode + }, + options: OpsRequestOptions = {} +): Promise { + const { data } = await apiClient.get('/admin/ops/dashboard/error-distribution', { + params, + signal: options.signal + }) + return data +} + +export async function listErrorLogs(params: { + page?: number + page_size?: number + time_range?: string + start_time?: string + end_time?: string + platform?: string + group_id?: number | null + account_id?: number | null + phase?: string + q?: string + status_codes?: string +}): Promise { + const { data } = await apiClient.get('/admin/ops/errors', { params }) + return data +} + +export async function getErrorLogDetail(id: number): Promise { + const { data } = await apiClient.get(`/admin/ops/errors/${id}`) + return data +} + +export async function retryErrorRequest(id: number, req: OpsRetryRequest): Promise { + const { data } = await apiClient.post(`/admin/ops/errors/${id}/retry`, req) + return data +} + +export async function listRequestDetails(params: OpsRequestDetailsParams): Promise { + const { data } = await apiClient.get('/admin/ops/requests', { params }) + return data +} + +// Alert rules +export async function listAlertRules(): Promise { + const { data } = await apiClient.get('/admin/ops/alert-rules') + return data +} + +export async function createAlertRule(rule: AlertRule): Promise { + const { data } = await apiClient.post('/admin/ops/alert-rules', rule) + return data +} + +export async function updateAlertRule(id: number, rule: Partial): Promise { + const { data } = await apiClient.put(`/admin/ops/alert-rules/${id}`, rule) + return data +} + +export async function deleteAlertRule(id: number): Promise { + await apiClient.delete(`/admin/ops/alert-rules/${id}`) +} + +export async function listAlertEvents(limit = 100): Promise { + const { data } = await apiClient.get('/admin/ops/alert-events', { params: { limit } }) + return data +} + +// Email notification config +export async function getEmailNotificationConfig(): Promise { + const { data } = await apiClient.get('/admin/ops/email-notification/config') + return data +} + +export async function updateEmailNotificationConfig(config: EmailNotificationConfig): Promise { + const { data } = await apiClient.put('/admin/ops/email-notification/config', config) + return data +} + +// Runtime settings (DB-backed) +export async function getAlertRuntimeSettings(): Promise { + const { data } = await apiClient.get('/admin/ops/runtime/alert') + return data +} + +export async function updateAlertRuntimeSettings(config: OpsAlertRuntimeSettings): Promise { + const { data } = await apiClient.put('/admin/ops/runtime/alert', config) + return data +} + +export const opsAPI = { + getDashboardOverview, + getThroughputTrend, + getLatencyHistogram, + getErrorTrend, + getErrorDistribution, + getConcurrencyStats, + getAccountAvailabilityStats, + subscribeQPS, + listErrorLogs, + getErrorLogDetail, + retryErrorRequest, + listRequestDetails, + listAlertRules, + createAlertRule, + updateAlertRule, + deleteAlertRule, + listAlertEvents, + getEmailNotificationConfig, + updateEmailNotificationConfig, + getAlertRuntimeSettings, + updateAlertRuntimeSettings +} + +export default opsAPI diff --git a/frontend/src/api/admin/settings.ts b/frontend/src/api/admin/settings.ts index 6b46de7d..37b12e40 100644 --- a/frontend/src/api/admin/settings.ts +++ b/frontend/src/api/admin/settings.ts @@ -34,9 +34,22 @@ export interface SystemSettings { turnstile_enabled: boolean turnstile_site_key: string turnstile_secret_key_configured: boolean + + // Model fallback configuration + enable_model_fallback: boolean + fallback_model_anthropic: string + fallback_model_openai: string + fallback_model_gemini: string + fallback_model_antigravity: string + // Identity patch configuration (Claude -> Gemini) enable_identity_patch: boolean identity_patch_prompt: string + + // Ops Monitoring (vNext) + ops_monitoring_enabled: boolean + ops_realtime_monitoring_enabled: boolean + ops_query_mode_default: 'auto' | 'raw' | 'preagg' | string } export interface UpdateSettingsRequest { @@ -60,8 +73,16 @@ export interface UpdateSettingsRequest { turnstile_enabled?: boolean turnstile_site_key?: string turnstile_secret_key?: string + enable_model_fallback?: boolean + fallback_model_anthropic?: string + fallback_model_openai?: string + fallback_model_gemini?: string + fallback_model_antigravity?: string enable_identity_patch?: boolean identity_patch_prompt?: string + ops_monitoring_enabled?: boolean + ops_realtime_monitoring_enabled?: boolean + ops_query_mode_default?: 'auto' | 'raw' | 'preagg' | string } /** diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index 4e53069a..3827498b 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -80,9 +80,45 @@ apiClient.interceptors.response.use( return response }, (error: AxiosError>) => { + // Request cancellation: keep the original axios cancellation error so callers can ignore it. + // Otherwise we'd misclassify it as a generic "network error". + if (error.code === 'ERR_CANCELED' || axios.isCancel(error)) { + return Promise.reject(error) + } + // Handle common errors if (error.response) { const { status, data } = error.response + const url = String(error.config?.url || '') + + // Validate `data` shape to avoid HTML error pages breaking our error handling. + const apiData = (typeof data === 'object' && data !== null ? data : {}) as Record + + // Ops monitoring disabled: treat as feature-flagged 404, and proactively redirect away + // from ops pages to avoid broken UI states. + if (status === 404 && apiData.message === 'Ops monitoring is disabled') { + try { + localStorage.setItem('ops_monitoring_enabled_cached', 'false') + } catch { + // ignore localStorage failures + } + try { + window.dispatchEvent(new CustomEvent('ops-monitoring-disabled')) + } catch { + // ignore event failures + } + + if (window.location.pathname.startsWith('/admin/ops')) { + window.location.href = '/admin/settings' + } + + return Promise.reject({ + status, + code: 'OPS_DISABLED', + message: apiData.message || error.message, + url + }) + } // 401: Unauthorized - clear token and redirect to login if (status === 401) { @@ -113,8 +149,8 @@ apiClient.interceptors.response.use( // Return structured error return Promise.reject({ status, - code: data?.code, - message: data?.message || error.message + code: apiData.code, + message: apiData.message || apiData.detail || error.message }) }