feat: add OpenAI image generation controls
This commit is contained in:
@@ -575,6 +575,24 @@ type ConcurrencyConfig struct {
|
||||
PingInterval int `mapstructure:"ping_interval"`
|
||||
}
|
||||
|
||||
type ImageConcurrencyConfig struct {
|
||||
// Enabled: 是否启用图片生成独立并发限制,默认关闭以保持现有行为
|
||||
Enabled bool `mapstructure:"enabled"`
|
||||
// MaxConcurrentRequests: 当前进程允许同时处理的图片生成请求数,0表示不限制
|
||||
MaxConcurrentRequests int `mapstructure:"max_concurrent_requests"`
|
||||
// OverflowMode: 图片并发达到上限后的处理方式:reject/wait
|
||||
OverflowMode string `mapstructure:"overflow_mode"`
|
||||
// WaitTimeoutSeconds: overflow_mode=wait 时等待图片并发槽位的超时时间(秒)
|
||||
WaitTimeoutSeconds int `mapstructure:"wait_timeout_seconds"`
|
||||
// MaxWaitingRequests: overflow_mode=wait 时当前进程允许排队等待的图片请求数
|
||||
MaxWaitingRequests int `mapstructure:"max_waiting_requests"`
|
||||
}
|
||||
|
||||
const (
|
||||
ImageConcurrencyOverflowModeReject = "reject"
|
||||
ImageConcurrencyOverflowModeWait = "wait"
|
||||
)
|
||||
|
||||
// GatewayConfig API网关相关配置
|
||||
type GatewayConfig struct {
|
||||
// 等待上游响应头的超时时间(秒),0表示无超时
|
||||
@@ -604,6 +622,8 @@ type GatewayConfig struct {
|
||||
OpenAIPassthroughAllowTimeoutHeaders bool `mapstructure:"openai_passthrough_allow_timeout_headers"`
|
||||
// OpenAIWS: OpenAI Responses WebSocket 配置(默认开启,可按需回滚到 HTTP)
|
||||
OpenAIWS GatewayOpenAIWSConfig `mapstructure:"openai_ws"`
|
||||
// ImageConcurrency: 图片生成独立并发限制配置(默认关闭)
|
||||
ImageConcurrency ImageConcurrencyConfig `mapstructure:"image_concurrency"`
|
||||
|
||||
// HTTP 上游连接池配置(性能优化:支持高并发场景调优)
|
||||
// MaxIdleConns: 所有主机的最大空闲连接总数
|
||||
@@ -635,6 +655,10 @@ type GatewayConfig struct {
|
||||
StreamDataIntervalTimeout int `mapstructure:"stream_data_interval_timeout"`
|
||||
// StreamKeepaliveInterval: 流式 keepalive 间隔(秒),0表示禁用
|
||||
StreamKeepaliveInterval int `mapstructure:"stream_keepalive_interval"`
|
||||
// ImageStreamDataIntervalTimeout: 图片流数据间隔超时(秒),0表示禁用
|
||||
ImageStreamDataIntervalTimeout int `mapstructure:"image_stream_data_interval_timeout"`
|
||||
// ImageStreamKeepaliveInterval: 图片流式 keepalive 间隔(秒),0表示禁用
|
||||
ImageStreamKeepaliveInterval int `mapstructure:"image_stream_keepalive_interval"`
|
||||
// MaxLineSize: 上游 SSE 单行最大字节数(0使用默认值)
|
||||
MaxLineSize int `mapstructure:"max_line_size"`
|
||||
|
||||
@@ -1672,6 +1696,11 @@ func setDefaults() {
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.queue", 0.7)
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.error_rate", 0.8)
|
||||
viper.SetDefault("gateway.openai_ws.scheduler_score_weights.ttft", 0.5)
|
||||
viper.SetDefault("gateway.image_concurrency.enabled", false)
|
||||
viper.SetDefault("gateway.image_concurrency.max_concurrent_requests", 0)
|
||||
viper.SetDefault("gateway.image_concurrency.overflow_mode", ImageConcurrencyOverflowModeReject)
|
||||
viper.SetDefault("gateway.image_concurrency.wait_timeout_seconds", 30)
|
||||
viper.SetDefault("gateway.image_concurrency.max_waiting_requests", 100)
|
||||
viper.SetDefault("gateway.antigravity_fallback_cooldown_minutes", 1)
|
||||
viper.SetDefault("gateway.antigravity_extra_retries", 10)
|
||||
viper.SetDefault("gateway.max_body_size", int64(256*1024*1024))
|
||||
@@ -1689,6 +1718,8 @@ func setDefaults() {
|
||||
viper.SetDefault("gateway.concurrency_slot_ttl_minutes", 30) // 并发槽位过期时间(支持超长请求)
|
||||
viper.SetDefault("gateway.stream_data_interval_timeout", 180)
|
||||
viper.SetDefault("gateway.stream_keepalive_interval", 10)
|
||||
viper.SetDefault("gateway.image_stream_data_interval_timeout", 900)
|
||||
viper.SetDefault("gateway.image_stream_keepalive_interval", 10)
|
||||
viper.SetDefault("gateway.max_line_size", 500*1024*1024)
|
||||
viper.SetDefault("gateway.scheduling.sticky_session_max_waiting", 3)
|
||||
viper.SetDefault("gateway.scheduling.sticky_session_wait_timeout", 120*time.Second)
|
||||
@@ -2239,6 +2270,21 @@ func (c *Config) Validate() error {
|
||||
ConnectionPoolIsolationProxy, ConnectionPoolIsolationAccount, ConnectionPoolIsolationAccountProxy)
|
||||
}
|
||||
}
|
||||
if c.Gateway.ImageConcurrency.MaxConcurrentRequests < 0 {
|
||||
return fmt.Errorf("gateway.image_concurrency.max_concurrent_requests must be non-negative")
|
||||
}
|
||||
switch strings.TrimSpace(c.Gateway.ImageConcurrency.OverflowMode) {
|
||||
case "", ImageConcurrencyOverflowModeReject, ImageConcurrencyOverflowModeWait:
|
||||
default:
|
||||
return fmt.Errorf("gateway.image_concurrency.overflow_mode must be one of: %s/%s",
|
||||
ImageConcurrencyOverflowModeReject, ImageConcurrencyOverflowModeWait)
|
||||
}
|
||||
if c.Gateway.ImageConcurrency.WaitTimeoutSeconds < 0 {
|
||||
return fmt.Errorf("gateway.image_concurrency.wait_timeout_seconds must be non-negative")
|
||||
}
|
||||
if c.Gateway.ImageConcurrency.MaxWaitingRequests < 0 {
|
||||
return fmt.Errorf("gateway.image_concurrency.max_waiting_requests must be non-negative")
|
||||
}
|
||||
if c.Gateway.MaxIdleConns <= 0 {
|
||||
return fmt.Errorf("gateway.max_idle_conns must be positive")
|
||||
}
|
||||
@@ -2277,6 +2323,20 @@ func (c *Config) Validate() error {
|
||||
(c.Gateway.StreamKeepaliveInterval < 5 || c.Gateway.StreamKeepaliveInterval > 30) {
|
||||
return fmt.Errorf("gateway.stream_keepalive_interval must be 0 or between 5-30 seconds")
|
||||
}
|
||||
if c.Gateway.ImageStreamDataIntervalTimeout < 0 {
|
||||
return fmt.Errorf("gateway.image_stream_data_interval_timeout must be non-negative")
|
||||
}
|
||||
if c.Gateway.ImageStreamDataIntervalTimeout != 0 &&
|
||||
(c.Gateway.ImageStreamDataIntervalTimeout < 60 || c.Gateway.ImageStreamDataIntervalTimeout > 1800) {
|
||||
return fmt.Errorf("gateway.image_stream_data_interval_timeout must be 0 or between 60-1800 seconds")
|
||||
}
|
||||
if c.Gateway.ImageStreamKeepaliveInterval < 0 {
|
||||
return fmt.Errorf("gateway.image_stream_keepalive_interval must be non-negative")
|
||||
}
|
||||
if c.Gateway.ImageStreamKeepaliveInterval != 0 &&
|
||||
(c.Gateway.ImageStreamKeepaliveInterval < 5 || c.Gateway.ImageStreamKeepaliveInterval > 60) {
|
||||
return fmt.Errorf("gateway.image_stream_keepalive_interval must be 0 or between 5-60 seconds")
|
||||
}
|
||||
// 兼容旧键 sticky_previous_response_ttl_seconds
|
||||
if c.Gateway.OpenAIWS.StickyResponseIDTTLSeconds <= 0 && c.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds > 0 {
|
||||
c.Gateway.OpenAIWS.StickyResponseIDTTLSeconds = c.Gateway.OpenAIWS.StickyPreviousResponseTTLSeconds
|
||||
|
||||
@@ -1282,6 +1282,46 @@ func TestValidateConfigErrors(t *testing.T) {
|
||||
mutate: func(c *Config) { c.Gateway.StreamDataIntervalTimeout = -1 },
|
||||
wantErr: "gateway.stream_data_interval_timeout must be non-negative",
|
||||
},
|
||||
{
|
||||
name: "gateway image stream keepalive range",
|
||||
mutate: func(c *Config) { c.Gateway.ImageStreamKeepaliveInterval = 4 },
|
||||
wantErr: "gateway.image_stream_keepalive_interval",
|
||||
},
|
||||
{
|
||||
name: "gateway image stream keepalive negative",
|
||||
mutate: func(c *Config) { c.Gateway.ImageStreamKeepaliveInterval = -1 },
|
||||
wantErr: "gateway.image_stream_keepalive_interval must be non-negative",
|
||||
},
|
||||
{
|
||||
name: "gateway image stream data interval range",
|
||||
mutate: func(c *Config) { c.Gateway.ImageStreamDataIntervalTimeout = 30 },
|
||||
wantErr: "gateway.image_stream_data_interval_timeout",
|
||||
},
|
||||
{
|
||||
name: "gateway image stream data interval negative",
|
||||
mutate: func(c *Config) { c.Gateway.ImageStreamDataIntervalTimeout = -1 },
|
||||
wantErr: "gateway.image_stream_data_interval_timeout must be non-negative",
|
||||
},
|
||||
{
|
||||
name: "gateway image concurrency max negative",
|
||||
mutate: func(c *Config) { c.Gateway.ImageConcurrency.MaxConcurrentRequests = -1 },
|
||||
wantErr: "gateway.image_concurrency.max_concurrent_requests must be non-negative",
|
||||
},
|
||||
{
|
||||
name: "gateway image concurrency overflow mode invalid",
|
||||
mutate: func(c *Config) { c.Gateway.ImageConcurrency.OverflowMode = "queue" },
|
||||
wantErr: "gateway.image_concurrency.overflow_mode",
|
||||
},
|
||||
{
|
||||
name: "gateway image concurrency wait timeout negative",
|
||||
mutate: func(c *Config) { c.Gateway.ImageConcurrency.WaitTimeoutSeconds = -1 },
|
||||
wantErr: "gateway.image_concurrency.wait_timeout_seconds must be non-negative",
|
||||
},
|
||||
{
|
||||
name: "gateway image concurrency max waiting negative",
|
||||
mutate: func(c *Config) { c.Gateway.ImageConcurrency.MaxWaitingRequests = -1 },
|
||||
wantErr: "gateway.image_concurrency.max_waiting_requests must be non-negative",
|
||||
},
|
||||
{
|
||||
name: "gateway max line size",
|
||||
mutate: func(c *Config) { c.Gateway.MaxLineSize = 1024 },
|
||||
@@ -1754,3 +1794,41 @@ func TestLoad_DefaultGatewayUsageRecordConfig(t *testing.T) {
|
||||
t.Fatalf("auto_scale_cooldown_seconds = %d, want 10", cfg.Gateway.UsageRecord.AutoScaleCooldownSeconds)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoad_DefaultGatewayImageStreamConfig(t *testing.T) {
|
||||
resetViperWithJWTSecret(t)
|
||||
cfg, err := Load()
|
||||
if err != nil {
|
||||
t.Fatalf("Load() error: %v", err)
|
||||
}
|
||||
if cfg.Gateway.StreamDataIntervalTimeout != 180 {
|
||||
t.Fatalf("stream_data_interval_timeout = %d, want 180", cfg.Gateway.StreamDataIntervalTimeout)
|
||||
}
|
||||
if cfg.Gateway.StreamKeepaliveInterval != 10 {
|
||||
t.Fatalf("stream_keepalive_interval = %d, want 10", cfg.Gateway.StreamKeepaliveInterval)
|
||||
}
|
||||
if cfg.Gateway.ImageStreamDataIntervalTimeout != 900 {
|
||||
t.Fatalf("image_stream_data_interval_timeout = %d, want 900", cfg.Gateway.ImageStreamDataIntervalTimeout)
|
||||
}
|
||||
if cfg.Gateway.ImageStreamKeepaliveInterval != 10 {
|
||||
t.Fatalf("image_stream_keepalive_interval = %d, want 10", cfg.Gateway.ImageStreamKeepaliveInterval)
|
||||
}
|
||||
if cfg.Gateway.ImageConcurrency.Enabled {
|
||||
t.Fatalf("image_concurrency.enabled = true, want false")
|
||||
}
|
||||
if cfg.Gateway.ImageConcurrency.MaxConcurrentRequests != 0 {
|
||||
t.Fatalf("image_concurrency.max_concurrent_requests = %d, want 0", cfg.Gateway.ImageConcurrency.MaxConcurrentRequests)
|
||||
}
|
||||
if cfg.Gateway.ImageConcurrency.OverflowMode != ImageConcurrencyOverflowModeReject {
|
||||
t.Fatalf("image_concurrency.overflow_mode = %q, want %q", cfg.Gateway.ImageConcurrency.OverflowMode, ImageConcurrencyOverflowModeReject)
|
||||
}
|
||||
if cfg.Gateway.ImageConcurrency.WaitTimeoutSeconds != 30 {
|
||||
t.Fatalf("image_concurrency.wait_timeout_seconds = %d, want 30", cfg.Gateway.ImageConcurrency.WaitTimeoutSeconds)
|
||||
}
|
||||
if cfg.Gateway.ImageConcurrency.MaxWaitingRequests != 100 {
|
||||
t.Fatalf("image_concurrency.max_waiting_requests = %d, want 100", cfg.Gateway.ImageConcurrency.MaxWaitingRequests)
|
||||
}
|
||||
if cfg.Gateway.ImageStreamDataIntervalTimeout <= cfg.Gateway.StreamDataIntervalTimeout {
|
||||
t.Fatalf("image stream timeout = %d, want greater than ordinary stream timeout %d", cfg.Gateway.ImageStreamDataIntervalTimeout, cfg.Gateway.StreamDataIntervalTimeout)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user