feat(sync): full code sync from release
This commit is contained in:
@@ -364,6 +364,8 @@ type GatewayConfig struct {
|
||||
// OpenAIPassthroughAllowTimeoutHeaders: OpenAI 透传模式是否放行客户端超时头
|
||||
// 关闭(默认)可避免 x-stainless-timeout 等头导致上游提前断流。
|
||||
OpenAIPassthroughAllowTimeoutHeaders bool `mapstructure:"openai_passthrough_allow_timeout_headers"`
|
||||
// OpenAIWS: OpenAI Responses WebSocket 配置(默认开启,可按需回滚到 HTTP)
|
||||
OpenAIWS GatewayOpenAIWSConfig `mapstructure:"openai_ws"`
|
||||
|
||||
// HTTP 上游连接池配置(性能优化:支持高并发场景调优)
|
||||
// MaxIdleConns: 所有主机的最大空闲连接总数
|
||||
@@ -450,6 +452,101 @@ type GatewayConfig struct {
|
||||
ModelsListCacheTTLSeconds int `mapstructure:"models_list_cache_ttl_seconds"`
|
||||
}
|
||||
|
||||
// GatewayOpenAIWSConfig OpenAI Responses WebSocket 配置。
|
||||
// 注意:默认全局开启;如需回滚可使用 force_http 或关闭 enabled。
|
||||
type GatewayOpenAIWSConfig struct {
|
||||
// ModeRouterV2Enabled: 新版 WS mode 路由开关(默认 false;关闭时保持 legacy 行为)
|
||||
ModeRouterV2Enabled bool `mapstructure:"mode_router_v2_enabled"`
|
||||
// IngressModeDefault: ingress 默认模式(off/shared/dedicated)
|
||||
IngressModeDefault string `mapstructure:"ingress_mode_default"`
|
||||
// Enabled: 全局总开关(默认 true)
|
||||
Enabled bool `mapstructure:"enabled"`
|
||||
// OAuthEnabled: 是否允许 OpenAI OAuth 账号使用 WS
|
||||
OAuthEnabled bool `mapstructure:"oauth_enabled"`
|
||||
// APIKeyEnabled: 是否允许 OpenAI API Key 账号使用 WS
|
||||
APIKeyEnabled bool `mapstructure:"apikey_enabled"`
|
||||
// ForceHTTP: 全局强制 HTTP(用于紧急回滚)
|
||||
ForceHTTP bool `mapstructure:"force_http"`
|
||||
// AllowStoreRecovery: 允许在 WSv2 下按策略恢复 store=true(默认 false)
|
||||
AllowStoreRecovery bool `mapstructure:"allow_store_recovery"`
|
||||
// IngressPreviousResponseRecoveryEnabled: ingress 模式收到 previous_response_not_found 时,是否允许自动去掉 previous_response_id 重试一次(默认 true)
|
||||
IngressPreviousResponseRecoveryEnabled bool `mapstructure:"ingress_previous_response_recovery_enabled"`
|
||||
// StoreDisabledConnMode: store=false 且无可复用会话连接时的建连策略(strict/adaptive/off)
|
||||
// - strict: 强制新建连接(隔离优先)
|
||||
// - adaptive: 仅在高风险失败后强制新建连接(性能与隔离折中)
|
||||
// - off: 不强制新建连接(复用优先)
|
||||
StoreDisabledConnMode string `mapstructure:"store_disabled_conn_mode"`
|
||||
// StoreDisabledForceNewConn: store=false 且无可复用粘连连接时是否强制新建连接(默认 true,保障会话隔离)
|
||||
// 兼容旧配置;当 StoreDisabledConnMode 为空时才生效。
|
||||
StoreDisabledForceNewConn bool `mapstructure:"store_disabled_force_new_conn"`
|
||||
// PrewarmGenerateEnabled: 是否启用 WSv2 generate=false 预热(默认 false)
|
||||
PrewarmGenerateEnabled bool `mapstructure:"prewarm_generate_enabled"`
|
||||
|
||||
// Feature 开关:v2 优先于 v1
|
||||
ResponsesWebsockets bool `mapstructure:"responses_websockets"`
|
||||
ResponsesWebsocketsV2 bool `mapstructure:"responses_websockets_v2"`
|
||||
|
||||
// 连接池参数
|
||||
MaxConnsPerAccount int `mapstructure:"max_conns_per_account"`
|
||||
MinIdlePerAccount int `mapstructure:"min_idle_per_account"`
|
||||
MaxIdlePerAccount int `mapstructure:"max_idle_per_account"`
|
||||
// DynamicMaxConnsByAccountConcurrencyEnabled: 是否按账号并发动态计算连接池上限
|
||||
DynamicMaxConnsByAccountConcurrencyEnabled bool `mapstructure:"dynamic_max_conns_by_account_concurrency_enabled"`
|
||||
// OAuthMaxConnsFactor: OAuth 账号连接池系数(effective=ceil(concurrency*factor))
|
||||
OAuthMaxConnsFactor float64 `mapstructure:"oauth_max_conns_factor"`
|
||||
// APIKeyMaxConnsFactor: API Key 账号连接池系数(effective=ceil(concurrency*factor))
|
||||
APIKeyMaxConnsFactor float64 `mapstructure:"apikey_max_conns_factor"`
|
||||
DialTimeoutSeconds int `mapstructure:"dial_timeout_seconds"`
|
||||
ReadTimeoutSeconds int `mapstructure:"read_timeout_seconds"`
|
||||
WriteTimeoutSeconds int `mapstructure:"write_timeout_seconds"`
|
||||
PoolTargetUtilization float64 `mapstructure:"pool_target_utilization"`
|
||||
QueueLimitPerConn int `mapstructure:"queue_limit_per_conn"`
|
||||
// EventFlushBatchSize: WS 流式写出批量 flush 阈值(事件条数)
|
||||
EventFlushBatchSize int `mapstructure:"event_flush_batch_size"`
|
||||
// EventFlushIntervalMS: WS 流式写出最大等待时间(毫秒);0 表示仅按 batch 触发
|
||||
EventFlushIntervalMS int `mapstructure:"event_flush_interval_ms"`
|
||||
// PrewarmCooldownMS: 连接池预热触发冷却时间(毫秒)
|
||||
PrewarmCooldownMS int `mapstructure:"prewarm_cooldown_ms"`
|
||||
// FallbackCooldownSeconds: WS 回退冷却窗口,避免 WS/HTTP 抖动;0 表示关闭冷却
|
||||
FallbackCooldownSeconds int `mapstructure:"fallback_cooldown_seconds"`
|
||||
// RetryBackoffInitialMS: WS 重试初始退避(毫秒);<=0 表示关闭退避
|
||||
RetryBackoffInitialMS int `mapstructure:"retry_backoff_initial_ms"`
|
||||
// RetryBackoffMaxMS: WS 重试最大退避(毫秒)
|
||||
RetryBackoffMaxMS int `mapstructure:"retry_backoff_max_ms"`
|
||||
// RetryJitterRatio: WS 重试退避抖动比例(0-1)
|
||||
RetryJitterRatio float64 `mapstructure:"retry_jitter_ratio"`
|
||||
// RetryTotalBudgetMS: WS 单次请求重试总预算(毫秒);0 表示关闭预算限制
|
||||
RetryTotalBudgetMS int `mapstructure:"retry_total_budget_ms"`
|
||||
// PayloadLogSampleRate: payload_schema 日志采样率(0-1)
|
||||
PayloadLogSampleRate float64 `mapstructure:"payload_log_sample_rate"`
|
||||
|
||||
// 账号调度与粘连参数
|
||||
LBTopK int `mapstructure:"lb_top_k"`
|
||||
// StickySessionTTLSeconds: session_hash -> account_id 粘连 TTL
|
||||
StickySessionTTLSeconds int `mapstructure:"sticky_session_ttl_seconds"`
|
||||
// SessionHashReadOldFallback: 会话哈希迁移期是否允许“新 key 未命中时回退读旧 SHA-256 key”
|
||||
SessionHashReadOldFallback bool `mapstructure:"session_hash_read_old_fallback"`
|
||||
// SessionHashDualWriteOld: 会话哈希迁移期是否双写旧 SHA-256 key(短 TTL)
|
||||
SessionHashDualWriteOld bool `mapstructure:"session_hash_dual_write_old"`
|
||||
// MetadataBridgeEnabled: RequestMetadata 迁移期是否保留旧 ctxkey.* 兼容桥接
|
||||
MetadataBridgeEnabled bool `mapstructure:"metadata_bridge_enabled"`
|
||||
// StickyResponseIDTTLSeconds: response_id -> account_id 粘连 TTL
|
||||
StickyResponseIDTTLSeconds int `mapstructure:"sticky_response_id_ttl_seconds"`
|
||||
// StickyPreviousResponseTTLSeconds: 兼容旧键(当新键未设置时回退)
|
||||
StickyPreviousResponseTTLSeconds int `mapstructure:"sticky_previous_response_ttl_seconds"`
|
||||
|
||||
SchedulerScoreWeights GatewayOpenAIWSSchedulerScoreWeights `mapstructure:"scheduler_score_weights"`
|
||||
}
|
||||
|
||||
// GatewayOpenAIWSSchedulerScoreWeights 账号调度打分权重。
|
||||
type GatewayOpenAIWSSchedulerScoreWeights struct {
|
||||
Priority float64 `mapstructure:"priority"`
|
||||
Load float64 `mapstructure:"load"`
|
||||
Queue float64 `mapstructure:"queue"`
|
||||
ErrorRate float64 `mapstructure:"error_rate"`
|
||||
TTFT float64 `mapstructure:"ttft"`
|
||||
}
|
||||
|
||||
// GatewayUsageRecordConfig 使用量记录异步队列配置
|
||||
type GatewayUsageRecordConfig struct {
|
||||
// WorkerCount: worker 初始数量(自动扩缩容开启时作为初始并发上限)
|
||||
@@ -886,6 +983,12 @@ func load(allowMissingJWTSecret bool) (*Config, error) {
|
||||
cfg.Log.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.Log.StacktraceLevel))
|
||||
cfg.Log.Output.FilePath = strings.TrimSpace(cfg.Log.Output.FilePath)
|
||||
|
||||
// 兼容旧键 gateway.openai_ws.sticky_previous_response_ttl_seconds。
|
||||
// 新键未配置(<=0)时回退旧键;新键优先。
|
||||
if cfg.Gateway.OpenAIWS.StickyResponseIDTTLSeconds <= 0 && cfg.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds > 0 {
|
||||
cfg.Gateway.OpenAIWS.StickyResponseIDTTLSeconds = cfg.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds
|
||||
}
|
||||
|
||||
// Auto-generate TOTP encryption key if not set (32 bytes = 64 hex chars for AES-256)
|
||||
cfg.Totp.EncryptionKey = strings.TrimSpace(cfg.Totp.EncryptionKey)
|
||||
if cfg.Totp.EncryptionKey == "" {
|
||||
@@ -945,7 +1048,7 @@ func setDefaults() {
|
||||
viper.SetDefault("server.read_header_timeout", 30) // 30秒读取请求头
|
||||
viper.SetDefault("server.idle_timeout", 120) // 120秒空闲超时
|
||||
viper.SetDefault("server.trusted_proxies", []string{})
|
||||
viper.SetDefault("server.max_request_body_size", int64(100*1024*1024))
|
||||
viper.SetDefault("server.max_request_body_size", int64(256*1024*1024))
|
||||
// H2C 默认配置
|
||||
viper.SetDefault("server.h2c.enabled", false)
|
||||
viper.SetDefault("server.h2c.max_concurrent_streams", uint32(50)) // 50 个并发流
|
||||
@@ -1088,9 +1191,9 @@ func setDefaults() {
|
||||
// RateLimit
|
||||
viper.SetDefault("rate_limit.overload_cooldown_minutes", 10)
|
||||
|
||||
// Pricing - 从 model-price-repo 同步模型定价和上下文窗口数据的配置
|
||||
viper.SetDefault("pricing.remote_url", "https://github.com/Wei-Shaw/model-price-repo/raw/refs/heads/main/model_prices_and_context_window.json")
|
||||
viper.SetDefault("pricing.hash_url", "https://github.com/Wei-Shaw/model-price-repo/raw/refs/heads/main/model_prices_and_context_window.sha256")
|
||||
// Pricing - 从 model-price-repo 同步模型定价和上下文窗口数据(固定到 commit,避免分支漂移)
|
||||
viper.SetDefault("pricing.remote_url", "https://raw.githubusercontent.com/Wei-Shaw/model-price-repo/c7947e9871687e664180bc971d4837f1fc2784a9/model_prices_and_context_window.json")
|
||||
viper.SetDefault("pricing.hash_url", "https://raw.githubusercontent.com/Wei-Shaw/model-price-repo/c7947e9871687e664180bc971d4837f1fc2784a9/model_prices_and_context_window.sha256")
|
||||
viper.SetDefault("pricing.data_dir", "./data")
|
||||
viper.SetDefault("pricing.fallback_file", "./resources/model-pricing/model_prices_and_context_window.json")
|
||||
viper.SetDefault("pricing.update_interval_hours", 24)
|
||||
@@ -1157,9 +1260,55 @@ func setDefaults() {
|
||||
viper.SetDefault("gateway.max_account_switches_gemini", 3)
|
||||
viper.SetDefault("gateway.force_codex_cli", false)
|
||||
viper.SetDefault("gateway.openai_passthrough_allow_timeout_headers", false)
|
||||
// OpenAI Responses WebSocket(默认开启;可通过 force_http 紧急回滚)
|
||||
viper.SetDefault("gateway.openai_ws.enabled", true)
|
||||
viper.SetDefault("gateway.openai_ws.mode_router_v2_enabled", false)
|
||||
viper.SetDefault("gateway.openai_ws.ingress_mode_default", "shared")
|
||||
viper.SetDefault("gateway.openai_ws.oauth_enabled", true)
|
||||
viper.SetDefault("gateway.openai_ws.apikey_enabled", true)
|
||||
viper.SetDefault("gateway.openai_ws.force_http", false)
|
||||
viper.SetDefault("gateway.openai_ws.allow_store_recovery", false)
|
||||
viper.SetDefault("gateway.openai_ws.ingress_previous_response_recovery_enabled", true)
|
||||
viper.SetDefault("gateway.openai_ws.store_disabled_conn_mode", "strict")
|
||||
viper.SetDefault("gateway.openai_ws.store_disabled_force_new_conn", true)
|
||||
viper.SetDefault("gateway.openai_ws.prewarm_generate_enabled", false)
|
||||
viper.SetDefault("gateway.openai_ws.responses_websockets", false)
|
||||
viper.SetDefault("gateway.openai_ws.responses_websockets_v2", true)
|
||||
viper.SetDefault("gateway.openai_ws.max_conns_per_account", 128)
|
||||
viper.SetDefault("gateway.openai_ws.min_idle_per_account", 4)
|
||||
viper.SetDefault("gateway.openai_ws.max_idle_per_account", 12)
|
||||
viper.SetDefault("gateway.openai_ws.dynamic_max_conns_by_account_concurrency_enabled", true)
|
||||
viper.SetDefault("gateway.openai_ws.oauth_max_conns_factor", 1.0)
|
||||
viper.SetDefault("gateway.openai_ws.apikey_max_conns_factor", 1.0)
|
||||
viper.SetDefault("gateway.openai_ws.dial_timeout_seconds", 10)
|
||||
viper.SetDefault("gateway.openai_ws.read_timeout_seconds", 900)
|
||||
viper.SetDefault("gateway.openai_ws.write_timeout_seconds", 120)
|
||||
viper.SetDefault("gateway.openai_ws.pool_target_utilization", 0.7)
|
||||
viper.SetDefault("gateway.openai_ws.queue_limit_per_conn", 64)
|
||||
viper.SetDefault("gateway.openai_ws.event_flush_batch_size", 1)
|
||||
viper.SetDefault("gateway.openai_ws.event_flush_interval_ms", 10)
|
||||
viper.SetDefault("gateway.openai_ws.prewarm_cooldown_ms", 300)
|
||||
viper.SetDefault("gateway.openai_ws.fallback_cooldown_seconds", 30)
|
||||
viper.SetDefault("gateway.openai_ws.retry_backoff_initial_ms", 120)
|
||||
viper.SetDefault("gateway.openai_ws.retry_backoff_max_ms", 2000)
|
||||
viper.SetDefault("gateway.openai_ws.retry_jitter_ratio", 0.2)
|
||||
viper.SetDefault("gateway.openai_ws.retry_total_budget_ms", 5000)
|
||||
viper.SetDefault("gateway.openai_ws.payload_log_sample_rate", 0.2)
|
||||
viper.SetDefault("gateway.openai_ws.lb_top_k", 7)
|
||||
viper.SetDefault("gateway.openai_ws.sticky_session_ttl_seconds", 3600)
|
||||
viper.SetDefault("gateway.openai_ws.session_hash_read_old_fallback", true)
|
||||
viper.SetDefault("gateway.openai_ws.session_hash_dual_write_old", true)
|
||||
viper.SetDefault("gateway.openai_ws.metadata_bridge_enabled", true)
|
||||
viper.SetDefault("gateway.openai_ws.sticky_response_id_ttl_seconds", 3600)
|
||||
viper.SetDefault("gateway.openai_ws.sticky_previous_response_ttl_seconds", 3600)
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.priority", 1.0)
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.load", 1.0)
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.queue", 0.7)
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.error_rate", 0.8)
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.ttft", 0.5)
|
||||
viper.SetDefault("gateway.antigravity_fallback_cooldown_minutes", 1)
|
||||
viper.SetDefault("gateway.antigravity_extra_retries", 10)
|
||||
viper.SetDefault("gateway.max_body_size", int64(100*1024*1024))
|
||||
viper.SetDefault("gateway.max_body_size", int64(256*1024*1024))
|
||||
viper.SetDefault("gateway.upstream_response_read_max_bytes", int64(8*1024*1024))
|
||||
viper.SetDefault("gateway.proxy_probe_response_read_max_bytes", int64(1024*1024))
|
||||
viper.SetDefault("gateway.gemini_debug_response_headers", false)
|
||||
@@ -1747,6 +1896,118 @@ func (c *Config) Validate() error {
|
||||
(c.Gateway.StreamKeepaliveInterval < 5 || c.Gateway.StreamKeepaliveInterval > 30) {
|
||||
return fmt.Errorf("gateway.stream_keepalive_interval must be 0 or between 5-30 seconds")
|
||||
}
|
||||
// 兼容旧键 sticky_previous_response_ttl_seconds
|
||||
if c.Gateway.OpenAIWS.StickyResponseIDTTLSeconds <= 0 && c.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds > 0 {
|
||||
c.Gateway.OpenAIWS.StickyResponseIDTTLSeconds = c.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds
|
||||
}
|
||||
if c.Gateway.OpenAIWS.MaxConnsPerAccount <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.max_conns_per_account must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.MinIdlePerAccount < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.min_idle_per_account must be non-negative")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.MaxIdlePerAccount < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.max_idle_per_account must be non-negative")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.MinIdlePerAccount > c.Gateway.OpenAIWS.MaxIdlePerAccount {
|
||||
return fmt.Errorf("gateway.openai_ws.min_idle_per_account must be <= max_idle_per_account")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.MaxIdlePerAccount > c.Gateway.OpenAIWS.MaxConnsPerAccount {
|
||||
return fmt.Errorf("gateway.openai_ws.max_idle_per_account must be <= max_conns_per_account")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.OAuthMaxConnsFactor <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.oauth_max_conns_factor must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.APIKeyMaxConnsFactor <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.apikey_max_conns_factor must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.DialTimeoutSeconds <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.dial_timeout_seconds must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.ReadTimeoutSeconds <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.read_timeout_seconds must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.WriteTimeoutSeconds <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.write_timeout_seconds must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.PoolTargetUtilization <= 0 || c.Gateway.OpenAIWS.PoolTargetUtilization > 1 {
|
||||
return fmt.Errorf("gateway.openai_ws.pool_target_utilization must be within (0,1]")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.QueueLimitPerConn <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.queue_limit_per_conn must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.EventFlushBatchSize <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.event_flush_batch_size must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.EventFlushIntervalMS < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.event_flush_interval_ms must be non-negative")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.PrewarmCooldownMS < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.prewarm_cooldown_ms must be non-negative")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.FallbackCooldownSeconds < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.fallback_cooldown_seconds must be non-negative")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.RetryBackoffInitialMS < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.retry_backoff_initial_ms must be non-negative")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.RetryBackoffMaxMS < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.retry_backoff_max_ms must be non-negative")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.RetryBackoffInitialMS > 0 && c.Gateway.OpenAIWS.RetryBackoffMaxMS > 0 &&
|
||||
c.Gateway.OpenAIWS.RetryBackoffMaxMS < c.Gateway.OpenAIWS.RetryBackoffInitialMS {
|
||||
return fmt.Errorf("gateway.openai_ws.retry_backoff_max_ms must be >= retry_backoff_initial_ms")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.RetryJitterRatio < 0 || c.Gateway.OpenAIWS.RetryJitterRatio > 1 {
|
||||
return fmt.Errorf("gateway.openai_ws.retry_jitter_ratio must be within [0,1]")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.RetryTotalBudgetMS < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.retry_total_budget_ms must be non-negative")
|
||||
}
|
||||
if mode := strings.ToLower(strings.TrimSpace(c.Gateway.OpenAIWS.IngressModeDefault)); mode != "" {
|
||||
switch mode {
|
||||
case "off", "shared", "dedicated":
|
||||
default:
|
||||
return fmt.Errorf("gateway.openai_ws.ingress_mode_default must be one of off|shared|dedicated")
|
||||
}
|
||||
}
|
||||
if mode := strings.ToLower(strings.TrimSpace(c.Gateway.OpenAIWS.StoreDisabledConnMode)); mode != "" {
|
||||
switch mode {
|
||||
case "strict", "adaptive", "off":
|
||||
default:
|
||||
return fmt.Errorf("gateway.openai_ws.store_disabled_conn_mode must be one of strict|adaptive|off")
|
||||
}
|
||||
}
|
||||
if c.Gateway.OpenAIWS.PayloadLogSampleRate < 0 || c.Gateway.OpenAIWS.PayloadLogSampleRate > 1 {
|
||||
return fmt.Errorf("gateway.openai_ws.payload_log_sample_rate must be within [0,1]")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.LBTopK <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.lb_top_k must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.StickySessionTTLSeconds <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.sticky_session_ttl_seconds must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.StickyResponseIDTTLSeconds <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.sticky_response_id_ttl_seconds must be positive")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.sticky_previous_response_ttl_seconds must be non-negative")
|
||||
}
|
||||
if c.Gateway.OpenAIWS.SchedulerScoreWeights.Priority < 0 ||
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.Load < 0 ||
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.Queue < 0 ||
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.ErrorRate < 0 ||
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.TTFT < 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.scheduler_score_weights.* must be non-negative")
|
||||
}
|
||||
weightSum := c.Gateway.OpenAIWS.SchedulerScoreWeights.Priority +
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.Load +
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.Queue +
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.ErrorRate +
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.TTFT
|
||||
if weightSum <= 0 {
|
||||
return fmt.Errorf("gateway.openai_ws.scheduler_score_weights must not all be zero")
|
||||
}
|
||||
if c.Gateway.MaxLineSize < 0 {
|
||||
return fmt.Errorf("gateway.max_line_size must be non-negative")
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/spf13/viper"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func resetViperWithJWTSecret(t *testing.T) {
|
||||
@@ -75,6 +76,103 @@ func TestLoadDefaultSchedulingConfig(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadDefaultOpenAIWSConfig(t *testing.T) {
|
||||
resetViperWithJWTSecret(t)
|
||||
|
||||
cfg, err := Load()
|
||||
if err != nil {
|
||||
t.Fatalf("Load() error: %v", err)
|
||||
}
|
||||
|
||||
if !cfg.Gateway.OpenAIWS.Enabled {
|
||||
t.Fatalf("Gateway.OpenAIWS.Enabled = false, want true")
|
||||
}
|
||||
if !cfg.Gateway.OpenAIWS.ResponsesWebsocketsV2 {
|
||||
t.Fatalf("Gateway.OpenAIWS.ResponsesWebsocketsV2 = false, want true")
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.ResponsesWebsockets {
|
||||
t.Fatalf("Gateway.OpenAIWS.ResponsesWebsockets = true, want false")
|
||||
}
|
||||
if !cfg.Gateway.OpenAIWS.DynamicMaxConnsByAccountConcurrencyEnabled {
|
||||
t.Fatalf("Gateway.OpenAIWS.DynamicMaxConnsByAccountConcurrencyEnabled = false, want true")
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.OAuthMaxConnsFactor != 1.0 {
|
||||
t.Fatalf("Gateway.OpenAIWS.OAuthMaxConnsFactor = %v, want 1.0", cfg.Gateway.OpenAIWS.OAuthMaxConnsFactor)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.APIKeyMaxConnsFactor != 1.0 {
|
||||
t.Fatalf("Gateway.OpenAIWS.APIKeyMaxConnsFactor = %v, want 1.0", cfg.Gateway.OpenAIWS.APIKeyMaxConnsFactor)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.StickySessionTTLSeconds != 3600 {
|
||||
t.Fatalf("Gateway.OpenAIWS.StickySessionTTLSeconds = %d, want 3600", cfg.Gateway.OpenAIWS.StickySessionTTLSeconds)
|
||||
}
|
||||
if !cfg.Gateway.OpenAIWS.SessionHashReadOldFallback {
|
||||
t.Fatalf("Gateway.OpenAIWS.SessionHashReadOldFallback = false, want true")
|
||||
}
|
||||
if !cfg.Gateway.OpenAIWS.SessionHashDualWriteOld {
|
||||
t.Fatalf("Gateway.OpenAIWS.SessionHashDualWriteOld = false, want true")
|
||||
}
|
||||
if !cfg.Gateway.OpenAIWS.MetadataBridgeEnabled {
|
||||
t.Fatalf("Gateway.OpenAIWS.MetadataBridgeEnabled = false, want true")
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.StickyResponseIDTTLSeconds != 3600 {
|
||||
t.Fatalf("Gateway.OpenAIWS.StickyResponseIDTTLSeconds = %d, want 3600", cfg.Gateway.OpenAIWS.StickyResponseIDTTLSeconds)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.FallbackCooldownSeconds != 30 {
|
||||
t.Fatalf("Gateway.OpenAIWS.FallbackCooldownSeconds = %d, want 30", cfg.Gateway.OpenAIWS.FallbackCooldownSeconds)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.EventFlushBatchSize != 1 {
|
||||
t.Fatalf("Gateway.OpenAIWS.EventFlushBatchSize = %d, want 1", cfg.Gateway.OpenAIWS.EventFlushBatchSize)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.EventFlushIntervalMS != 10 {
|
||||
t.Fatalf("Gateway.OpenAIWS.EventFlushIntervalMS = %d, want 10", cfg.Gateway.OpenAIWS.EventFlushIntervalMS)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.PrewarmCooldownMS != 300 {
|
||||
t.Fatalf("Gateway.OpenAIWS.PrewarmCooldownMS = %d, want 300", cfg.Gateway.OpenAIWS.PrewarmCooldownMS)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.RetryBackoffInitialMS != 120 {
|
||||
t.Fatalf("Gateway.OpenAIWS.RetryBackoffInitialMS = %d, want 120", cfg.Gateway.OpenAIWS.RetryBackoffInitialMS)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.RetryBackoffMaxMS != 2000 {
|
||||
t.Fatalf("Gateway.OpenAIWS.RetryBackoffMaxMS = %d, want 2000", cfg.Gateway.OpenAIWS.RetryBackoffMaxMS)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.RetryJitterRatio != 0.2 {
|
||||
t.Fatalf("Gateway.OpenAIWS.RetryJitterRatio = %v, want 0.2", cfg.Gateway.OpenAIWS.RetryJitterRatio)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.RetryTotalBudgetMS != 5000 {
|
||||
t.Fatalf("Gateway.OpenAIWS.RetryTotalBudgetMS = %d, want 5000", cfg.Gateway.OpenAIWS.RetryTotalBudgetMS)
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.PayloadLogSampleRate != 0.2 {
|
||||
t.Fatalf("Gateway.OpenAIWS.PayloadLogSampleRate = %v, want 0.2", cfg.Gateway.OpenAIWS.PayloadLogSampleRate)
|
||||
}
|
||||
if !cfg.Gateway.OpenAIWS.StoreDisabledForceNewConn {
|
||||
t.Fatalf("Gateway.OpenAIWS.StoreDisabledForceNewConn = false, want true")
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.StoreDisabledConnMode != "strict" {
|
||||
t.Fatalf("Gateway.OpenAIWS.StoreDisabledConnMode = %q, want %q", cfg.Gateway.OpenAIWS.StoreDisabledConnMode, "strict")
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.ModeRouterV2Enabled {
|
||||
t.Fatalf("Gateway.OpenAIWS.ModeRouterV2Enabled = true, want false")
|
||||
}
|
||||
if cfg.Gateway.OpenAIWS.IngressModeDefault != "shared" {
|
||||
t.Fatalf("Gateway.OpenAIWS.IngressModeDefault = %q, want %q", cfg.Gateway.OpenAIWS.IngressModeDefault, "shared")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadOpenAIWSStickyTTLCompatibility(t *testing.T) {
|
||||
resetViperWithJWTSecret(t)
|
||||
t.Setenv("GATEWAY_OPENAI_WS_STICKY_RESPONSE_ID_TTL_SECONDS", "0")
|
||||
t.Setenv("GATEWAY_OPENAI_WS_STICKY_PREVIOUS_RESPONSE_TTL_SECONDS", "7200")
|
||||
|
||||
cfg, err := Load()
|
||||
if err != nil {
|
||||
t.Fatalf("Load() error: %v", err)
|
||||
}
|
||||
|
||||
if cfg.Gateway.OpenAIWS.StickyResponseIDTTLSeconds != 7200 {
|
||||
t.Fatalf("StickyResponseIDTTLSeconds = %d, want 7200", cfg.Gateway.OpenAIWS.StickyResponseIDTTLSeconds)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadDefaultIdempotencyConfig(t *testing.T) {
|
||||
resetViperWithJWTSecret(t)
|
||||
|
||||
@@ -993,6 +1091,16 @@ func TestValidateConfigErrors(t *testing.T) {
|
||||
mutate: func(c *Config) { c.Gateway.StreamKeepaliveInterval = 4 },
|
||||
wantErr: "gateway.stream_keepalive_interval",
|
||||
},
|
||||
{
|
||||
name: "gateway openai ws oauth max conns factor",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.OAuthMaxConnsFactor = 0 },
|
||||
wantErr: "gateway.openai_ws.oauth_max_conns_factor",
|
||||
},
|
||||
{
|
||||
name: "gateway openai ws apikey max conns factor",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.APIKeyMaxConnsFactor = 0 },
|
||||
wantErr: "gateway.openai_ws.apikey_max_conns_factor",
|
||||
},
|
||||
{
|
||||
name: "gateway stream data interval range",
|
||||
mutate: func(c *Config) { c.Gateway.StreamDataIntervalTimeout = 5 },
|
||||
@@ -1174,6 +1282,165 @@ func TestValidateConfigErrors(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateConfig_OpenAIWSRules(t *testing.T) {
|
||||
buildValid := func(t *testing.T) *Config {
|
||||
t.Helper()
|
||||
resetViperWithJWTSecret(t)
|
||||
cfg, err := Load()
|
||||
require.NoError(t, err)
|
||||
return cfg
|
||||
}
|
||||
|
||||
t.Run("sticky response id ttl 兼容旧键回填", func(t *testing.T) {
|
||||
cfg := buildValid(t)
|
||||
cfg.Gateway.OpenAIWS.StickyResponseIDTTLSeconds = 0
|
||||
cfg.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds = 7200
|
||||
|
||||
require.NoError(t, cfg.Validate())
|
||||
require.Equal(t, 7200, cfg.Gateway.OpenAIWS.StickyResponseIDTTLSeconds)
|
||||
})
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
mutate func(*Config)
|
||||
wantErr string
|
||||
}{
|
||||
{
|
||||
name: "max_conns_per_account 必须为正数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.MaxConnsPerAccount = 0 },
|
||||
wantErr: "gateway.openai_ws.max_conns_per_account",
|
||||
},
|
||||
{
|
||||
name: "min_idle_per_account 不能为负数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.MinIdlePerAccount = -1 },
|
||||
wantErr: "gateway.openai_ws.min_idle_per_account",
|
||||
},
|
||||
{
|
||||
name: "max_idle_per_account 不能为负数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.MaxIdlePerAccount = -1 },
|
||||
wantErr: "gateway.openai_ws.max_idle_per_account",
|
||||
},
|
||||
{
|
||||
name: "min_idle_per_account 不能大于 max_idle_per_account",
|
||||
mutate: func(c *Config) {
|
||||
c.Gateway.OpenAIWS.MinIdlePerAccount = 3
|
||||
c.Gateway.OpenAIWS.MaxIdlePerAccount = 2
|
||||
},
|
||||
wantErr: "gateway.openai_ws.min_idle_per_account must be <= max_idle_per_account",
|
||||
},
|
||||
{
|
||||
name: "max_idle_per_account 不能大于 max_conns_per_account",
|
||||
mutate: func(c *Config) {
|
||||
c.Gateway.OpenAIWS.MaxConnsPerAccount = 2
|
||||
c.Gateway.OpenAIWS.MinIdlePerAccount = 1
|
||||
c.Gateway.OpenAIWS.MaxIdlePerAccount = 3
|
||||
},
|
||||
wantErr: "gateway.openai_ws.max_idle_per_account must be <= max_conns_per_account",
|
||||
},
|
||||
{
|
||||
name: "dial_timeout_seconds 必须为正数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.DialTimeoutSeconds = 0 },
|
||||
wantErr: "gateway.openai_ws.dial_timeout_seconds",
|
||||
},
|
||||
{
|
||||
name: "read_timeout_seconds 必须为正数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.ReadTimeoutSeconds = 0 },
|
||||
wantErr: "gateway.openai_ws.read_timeout_seconds",
|
||||
},
|
||||
{
|
||||
name: "write_timeout_seconds 必须为正数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.WriteTimeoutSeconds = 0 },
|
||||
wantErr: "gateway.openai_ws.write_timeout_seconds",
|
||||
},
|
||||
{
|
||||
name: "pool_target_utilization 必须在 (0,1]",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.PoolTargetUtilization = 0 },
|
||||
wantErr: "gateway.openai_ws.pool_target_utilization",
|
||||
},
|
||||
{
|
||||
name: "queue_limit_per_conn 必须为正数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.QueueLimitPerConn = 0 },
|
||||
wantErr: "gateway.openai_ws.queue_limit_per_conn",
|
||||
},
|
||||
{
|
||||
name: "fallback_cooldown_seconds 不能为负数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.FallbackCooldownSeconds = -1 },
|
||||
wantErr: "gateway.openai_ws.fallback_cooldown_seconds",
|
||||
},
|
||||
{
|
||||
name: "store_disabled_conn_mode 必须为 strict|adaptive|off",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.StoreDisabledConnMode = "invalid" },
|
||||
wantErr: "gateway.openai_ws.store_disabled_conn_mode",
|
||||
},
|
||||
{
|
||||
name: "ingress_mode_default 必须为 off|shared|dedicated",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.IngressModeDefault = "invalid" },
|
||||
wantErr: "gateway.openai_ws.ingress_mode_default",
|
||||
},
|
||||
{
|
||||
name: "payload_log_sample_rate 必须在 [0,1] 范围内",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.PayloadLogSampleRate = 1.2 },
|
||||
wantErr: "gateway.openai_ws.payload_log_sample_rate",
|
||||
},
|
||||
{
|
||||
name: "retry_total_budget_ms 不能为负数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.RetryTotalBudgetMS = -1 },
|
||||
wantErr: "gateway.openai_ws.retry_total_budget_ms",
|
||||
},
|
||||
{
|
||||
name: "lb_top_k 必须为正数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.LBTopK = 0 },
|
||||
wantErr: "gateway.openai_ws.lb_top_k",
|
||||
},
|
||||
{
|
||||
name: "sticky_session_ttl_seconds 必须为正数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.StickySessionTTLSeconds = 0 },
|
||||
wantErr: "gateway.openai_ws.sticky_session_ttl_seconds",
|
||||
},
|
||||
{
|
||||
name: "sticky_response_id_ttl_seconds 必须为正数",
|
||||
mutate: func(c *Config) {
|
||||
c.Gateway.OpenAIWS.StickyResponseIDTTLSeconds = 0
|
||||
c.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds = 0
|
||||
},
|
||||
wantErr: "gateway.openai_ws.sticky_response_id_ttl_seconds",
|
||||
},
|
||||
{
|
||||
name: "sticky_previous_response_ttl_seconds 不能为负数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds = -1 },
|
||||
wantErr: "gateway.openai_ws.sticky_previous_response_ttl_seconds",
|
||||
},
|
||||
{
|
||||
name: "scheduler_score_weights 不能为负数",
|
||||
mutate: func(c *Config) { c.Gateway.OpenAIWS.SchedulerScoreWeights.Queue = -0.1 },
|
||||
wantErr: "gateway.openai_ws.scheduler_score_weights.* must be non-negative",
|
||||
},
|
||||
{
|
||||
name: "scheduler_score_weights 不能全为 0",
|
||||
mutate: func(c *Config) {
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.Priority = 0
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.Load = 0
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.Queue = 0
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.ErrorRate = 0
|
||||
c.Gateway.OpenAIWS.SchedulerScoreWeights.TTFT = 0
|
||||
},
|
||||
wantErr: "gateway.openai_ws.scheduler_score_weights must not all be zero",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
tc := tc
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
cfg := buildValid(t)
|
||||
tc.mutate(cfg)
|
||||
|
||||
err := cfg.Validate()
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), tc.wantErr)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateConfig_AutoScaleDisabledIgnoreAutoScaleFields(t *testing.T) {
|
||||
resetViperWithJWTSecret(t)
|
||||
cfg, err := Load()
|
||||
|
||||
Reference in New Issue
Block a user