feat: add OpenAI image generation controls
This commit is contained in:
@@ -575,6 +575,24 @@ type ConcurrencyConfig struct {
|
||||
PingInterval int `mapstructure:"ping_interval"`
|
||||
}
|
||||
|
||||
type ImageConcurrencyConfig struct {
|
||||
// Enabled: 是否启用图片生成独立并发限制,默认关闭以保持现有行为
|
||||
Enabled bool `mapstructure:"enabled"`
|
||||
// MaxConcurrentRequests: 当前进程允许同时处理的图片生成请求数,0表示不限制
|
||||
MaxConcurrentRequests int `mapstructure:"max_concurrent_requests"`
|
||||
// OverflowMode: 图片并发达到上限后的处理方式:reject/wait
|
||||
OverflowMode string `mapstructure:"overflow_mode"`
|
||||
// WaitTimeoutSeconds: overflow_mode=wait 时等待图片并发槽位的超时时间(秒)
|
||||
WaitTimeoutSeconds int `mapstructure:"wait_timeout_seconds"`
|
||||
// MaxWaitingRequests: overflow_mode=wait 时当前进程允许排队等待的图片请求数
|
||||
MaxWaitingRequests int `mapstructure:"max_waiting_requests"`
|
||||
}
|
||||
|
||||
const (
|
||||
ImageConcurrencyOverflowModeReject = "reject"
|
||||
ImageConcurrencyOverflowModeWait = "wait"
|
||||
)
|
||||
|
||||
// GatewayConfig API网关相关配置
|
||||
type GatewayConfig struct {
|
||||
// 等待上游响应头的超时时间(秒),0表示无超时
|
||||
@@ -604,6 +622,8 @@ type GatewayConfig struct {
|
||||
OpenAIPassthroughAllowTimeoutHeaders bool `mapstructure:"openai_passthrough_allow_timeout_headers"`
|
||||
// OpenAIWS: OpenAI Responses WebSocket 配置(默认开启,可按需回滚到 HTTP)
|
||||
OpenAIWS GatewayOpenAIWSConfig `mapstructure:"openai_ws"`
|
||||
// ImageConcurrency: 图片生成独立并发限制配置(默认关闭)
|
||||
ImageConcurrency ImageConcurrencyConfig `mapstructure:"image_concurrency"`
|
||||
|
||||
// HTTP 上游连接池配置(性能优化:支持高并发场景调优)
|
||||
// MaxIdleConns: 所有主机的最大空闲连接总数
|
||||
@@ -635,6 +655,10 @@ type GatewayConfig struct {
|
||||
StreamDataIntervalTimeout int `mapstructure:"stream_data_interval_timeout"`
|
||||
// StreamKeepaliveInterval: 流式 keepalive 间隔(秒),0表示禁用
|
||||
StreamKeepaliveInterval int `mapstructure:"stream_keepalive_interval"`
|
||||
// ImageStreamDataIntervalTimeout: 图片流数据间隔超时(秒),0表示禁用
|
||||
ImageStreamDataIntervalTimeout int `mapstructure:"image_stream_data_interval_timeout"`
|
||||
// ImageStreamKeepaliveInterval: 图片流式 keepalive 间隔(秒),0表示禁用
|
||||
ImageStreamKeepaliveInterval int `mapstructure:"image_stream_keepalive_interval"`
|
||||
// MaxLineSize: 上游 SSE 单行最大字节数(0使用默认值)
|
||||
MaxLineSize int `mapstructure:"max_line_size"`
|
||||
|
||||
@@ -1672,6 +1696,11 @@ func setDefaults() {
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.queue", 0.7)
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.error_rate", 0.8)
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.ttft", 0.5)
|
||||
viper.SetDefault("gateway.image_concurrency.enabled", false)
|
||||
viper.SetDefault("gateway.image_concurrency.max_concurrent_requests", 0)
|
||||
viper.SetDefault("gateway.image_concurrency.overflow_mode", ImageConcurrencyOverflowModeReject)
|
||||
viper.SetDefault("gateway.image_concurrency.wait_timeout_seconds", 30)
|
||||
viper.SetDefault("gateway.image_concurrency.max_waiting_requests", 100)
|
||||
viper.SetDefault("gateway.antigravity_fallback_cooldown_minutes", 1)
|
||||
viper.SetDefault("gateway.antigravity_extra_retries", 10)
|
||||
viper.SetDefault("gateway.max_body_size", int64(256*1024*1024))
|
||||
@@ -1689,6 +1718,8 @@ func setDefaults() {
|
||||
viper.SetDefault("gateway.concurrency_slot_ttl_minutes", 30) // 并发槽位过期时间(支持超长请求)
|
||||
viper.SetDefault("gateway.stream_data_interval_timeout", 180)
|
||||
viper.SetDefault("gateway.stream_keepalive_interval", 10)
|
||||
viper.SetDefault("gateway.image_stream_data_interval_timeout", 900)
|
||||
viper.SetDefault("gateway.image_stream_keepalive_interval", 10)
|
||||
viper.SetDefault("gateway.max_line_size", 500*1024*1024)
|
||||
viper.SetDefault("gateway.scheduling.sticky_session_max_waiting", 3)
|
||||
viper.SetDefault("gateway.scheduling.sticky_session_wait_timeout", 120*time.Second)
|
||||
@@ -2239,6 +2270,21 @@ func (c *Config) Validate() error {
|
||||
ConnectionPoolIsolationProxy, ConnectionPoolIsolationAccount, ConnectionPoolIsolationAccountProxy)
|
||||
}
|
||||
}
|
||||
if c.Gateway.ImageConcurrency.MaxConcurrentRequests < 0 {
|
||||
return fmt.Errorf("gateway.image_concurrency.max_concurrent_requests must be non-negative")
|
||||
}
|
||||
switch strings.TrimSpace(c.Gateway.ImageConcurrency.OverflowMode) {
|
||||
case "", ImageConcurrencyOverflowModeReject, ImageConcurrencyOverflowModeWait:
|
||||
default:
|
||||
return fmt.Errorf("gateway.image_concurrency.overflow_mode must be one of: %s/%s",
|
||||
ImageConcurrencyOverflowModeReject, ImageConcurrencyOverflowModeWait)
|
||||
}
|
||||
if c.Gateway.ImageConcurrency.WaitTimeoutSeconds < 0 {
|
||||
return fmt.Errorf("gateway.image_concurrency.wait_timeout_seconds must be non-negative")
|
||||
}
|
||||
if c.Gateway.ImageConcurrency.MaxWaitingRequests < 0 {
|
||||
return fmt.Errorf("gateway.image_concurrency.max_waiting_requests must be non-negative")
|
||||
}
|
||||
if c.Gateway.MaxIdleConns <= 0 {
|
||||
return fmt.Errorf("gateway.max_idle_conns must be positive")
|
||||
}
|
||||
@@ -2277,6 +2323,20 @@ func (c *Config) Validate() error {
|
||||
(c.Gateway.StreamKeepaliveInterval < 5 || c.Gateway.StreamKeepaliveInterval > 30) {
|
||||
return fmt.Errorf("gateway.stream_keepalive_interval must be 0 or between 5-30 seconds")
|
||||
}
|
||||
if c.Gateway.ImageStreamDataIntervalTimeout < 0 {
|
||||
return fmt.Errorf("gateway.image_stream_data_interval_timeout must be non-negative")
|
||||
}
|
||||
if c.Gateway.ImageStreamDataIntervalTimeout != 0 &&
|
||||
(c.Gateway.ImageStreamDataIntervalTimeout < 60 || c.Gateway.ImageStreamDataIntervalTimeout > 1800) {
|
||||
return fmt.Errorf("gateway.image_stream_data_interval_timeout must be 0 or between 60-1800 seconds")
|
||||
}
|
||||
if c.Gateway.ImageStreamKeepaliveInterval < 0 {
|
||||
return fmt.Errorf("gateway.image_stream_keepalive_interval must be non-negative")
|
||||
}
|
||||
if c.Gateway.ImageStreamKeepaliveInterval != 0 &&
|
||||
(c.Gateway.ImageStreamKeepaliveInterval < 5 || c.Gateway.ImageStreamKeepaliveInterval > 60) {
|
||||
return fmt.Errorf("gateway.image_stream_keepalive_interval must be 0 or between 5-60 seconds")
|
||||
}
|
||||
// 兼容旧键 sticky_previous_response_ttl_seconds
|
||||
if c.Gateway.OpenAIWS.StickyResponseIDTTLSeconds <= 0 && c.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds > 0 {
|
||||
c.Gateway.OpenAIWS.StickyResponseIDTTLSeconds = c.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds
|
||||
|
||||
Reference in New Issue
Block a user