feat(gateway): 引入使用量记录有界 worker 池与自动扩缩容

- 新增 UsageRecordWorkerPool,支持有界队列、溢出降级策略与自动扩缩容
- 将 Gateway/OpenAI/Sora/Gemini 使用量记录改为提交到统一任务池执行
- 增加 usage_record 配置默认值与校验规则,并补充配置与任务提交相关测试
- 注入并托管 worker 池生命周期,服务退出时统一 StopAndWait

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
yangjianbo
2026-02-22 12:56:57 +08:00
parent 50b9897182
commit 33db7a0fb6
15 changed files with 1575 additions and 70 deletions

View File

@@ -19,6 +19,13 @@ const (
RunModeSimple = "simple"
)
// 使用量记录队列溢出策略
const (
UsageRecordOverflowPolicyDrop = "drop"
UsageRecordOverflowPolicySample = "sample"
UsageRecordOverflowPolicySync = "sync"
)
// DefaultCSPPolicy is the default Content-Security-Policy with nonce support
// __CSP_NONCE__ will be replaced with actual nonce at request time by the SecurityHeaders middleware
const DefaultCSPPolicy = "default-src 'self'; script-src 'self' __CSP_NONCE__ https://challenges.cloudflare.com https://static.cloudflareinsights.com; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; img-src 'self' data: https:; font-src 'self' data: https://fonts.gstatic.com; connect-src 'self' https:; frame-src https://challenges.cloudflare.com; frame-ancestors 'none'; base-uri 'self'; form-action 'self'"
@@ -413,6 +420,42 @@ type GatewayConfig struct {
// TLSFingerprint: TLS指纹伪装配置
TLSFingerprint TLSFingerprintConfig `mapstructure:"tls_fingerprint"`
// UsageRecord: 使用量记录异步队列配置(有界队列 + 固定 worker
UsageRecord GatewayUsageRecordConfig `mapstructure:"usage_record"`
}
// GatewayUsageRecordConfig 使用量记录异步队列配置
type GatewayUsageRecordConfig struct {
// WorkerCount: worker 初始数量(自动扩缩容开启时作为初始并发上限)
WorkerCount int `mapstructure:"worker_count"`
// QueueSize: 队列容量(有界)
QueueSize int `mapstructure:"queue_size"`
// TaskTimeoutSeconds: 单个使用量记录任务超时(秒)
TaskTimeoutSeconds int `mapstructure:"task_timeout_seconds"`
// OverflowPolicy: 队列满时策略drop/sample/sync
OverflowPolicy string `mapstructure:"overflow_policy"`
// OverflowSamplePercent: sample 策略下同步回写采样百分比1-100
OverflowSamplePercent int `mapstructure:"overflow_sample_percent"`
// AutoScaleEnabled: 是否启用 worker 自动扩缩容
AutoScaleEnabled bool `mapstructure:"auto_scale_enabled"`
// AutoScaleMinWorkers: 自动扩缩容最小 worker 数
AutoScaleMinWorkers int `mapstructure:"auto_scale_min_workers"`
// AutoScaleMaxWorkers: 自动扩缩容最大 worker 数
AutoScaleMaxWorkers int `mapstructure:"auto_scale_max_workers"`
// AutoScaleUpQueuePercent: 队列占用率达到该阈值时触发扩容
AutoScaleUpQueuePercent int `mapstructure:"auto_scale_up_queue_percent"`
// AutoScaleDownQueuePercent: 队列占用率低于该阈值时触发缩容
AutoScaleDownQueuePercent int `mapstructure:"auto_scale_down_queue_percent"`
// AutoScaleUpStep: 每次扩容步长
AutoScaleUpStep int `mapstructure:"auto_scale_up_step"`
// AutoScaleDownStep: 每次缩容步长
AutoScaleDownStep int `mapstructure:"auto_scale_down_step"`
// AutoScaleCheckIntervalSeconds: 自动扩缩容检测间隔(秒)
AutoScaleCheckIntervalSeconds int `mapstructure:"auto_scale_check_interval_seconds"`
// AutoScaleCooldownSeconds: 自动扩缩容冷却时间(秒)
AutoScaleCooldownSeconds int `mapstructure:"auto_scale_cooldown_seconds"`
}
// SoraModelFiltersConfig Sora 模型过滤配置
@@ -1118,6 +1161,20 @@ func setDefaults() {
viper.SetDefault("gateway.scheduling.outbox_lag_rebuild_failures", 3)
viper.SetDefault("gateway.scheduling.outbox_backlog_rebuild_rows", 10000)
viper.SetDefault("gateway.scheduling.full_rebuild_interval_seconds", 300)
viper.SetDefault("gateway.usage_record.worker_count", 128)
viper.SetDefault("gateway.usage_record.queue_size", 16384)
viper.SetDefault("gateway.usage_record.task_timeout_seconds", 5)
viper.SetDefault("gateway.usage_record.overflow_policy", UsageRecordOverflowPolicySample)
viper.SetDefault("gateway.usage_record.overflow_sample_percent", 10)
viper.SetDefault("gateway.usage_record.auto_scale_enabled", true)
viper.SetDefault("gateway.usage_record.auto_scale_min_workers", 128)
viper.SetDefault("gateway.usage_record.auto_scale_max_workers", 512)
viper.SetDefault("gateway.usage_record.auto_scale_up_queue_percent", 70)
viper.SetDefault("gateway.usage_record.auto_scale_down_queue_percent", 15)
viper.SetDefault("gateway.usage_record.auto_scale_up_step", 32)
viper.SetDefault("gateway.usage_record.auto_scale_down_step", 16)
viper.SetDefault("gateway.usage_record.auto_scale_check_interval_seconds", 3)
viper.SetDefault("gateway.usage_record.auto_scale_cooldown_seconds", 10)
// TLS指纹伪装配置默认关闭需要账号级别单独启用
viper.SetDefault("gateway.tls_fingerprint.enabled", true)
viper.SetDefault("concurrency.ping_interval", 10)
@@ -1636,6 +1693,64 @@ func (c *Config) Validate() error {
if c.Gateway.MaxLineSize != 0 && c.Gateway.MaxLineSize < 1024*1024 {
return fmt.Errorf("gateway.max_line_size must be at least 1MB")
}
if c.Gateway.UsageRecord.WorkerCount <= 0 {
return fmt.Errorf("gateway.usage_record.worker_count must be positive")
}
if c.Gateway.UsageRecord.QueueSize <= 0 {
return fmt.Errorf("gateway.usage_record.queue_size must be positive")
}
if c.Gateway.UsageRecord.TaskTimeoutSeconds <= 0 {
return fmt.Errorf("gateway.usage_record.task_timeout_seconds must be positive")
}
switch strings.ToLower(strings.TrimSpace(c.Gateway.UsageRecord.OverflowPolicy)) {
case UsageRecordOverflowPolicyDrop, UsageRecordOverflowPolicySample, UsageRecordOverflowPolicySync:
default:
return fmt.Errorf("gateway.usage_record.overflow_policy must be one of: %s/%s/%s",
UsageRecordOverflowPolicyDrop, UsageRecordOverflowPolicySample, UsageRecordOverflowPolicySync)
}
if c.Gateway.UsageRecord.OverflowSamplePercent < 0 || c.Gateway.UsageRecord.OverflowSamplePercent > 100 {
return fmt.Errorf("gateway.usage_record.overflow_sample_percent must be between 0-100")
}
if strings.EqualFold(strings.TrimSpace(c.Gateway.UsageRecord.OverflowPolicy), UsageRecordOverflowPolicySample) &&
c.Gateway.UsageRecord.OverflowSamplePercent <= 0 {
return fmt.Errorf("gateway.usage_record.overflow_sample_percent must be positive when overflow_policy=sample")
}
if c.Gateway.UsageRecord.AutoScaleEnabled {
if c.Gateway.UsageRecord.AutoScaleMinWorkers <= 0 {
return fmt.Errorf("gateway.usage_record.auto_scale_min_workers must be positive")
}
if c.Gateway.UsageRecord.AutoScaleMaxWorkers <= 0 {
return fmt.Errorf("gateway.usage_record.auto_scale_max_workers must be positive")
}
if c.Gateway.UsageRecord.AutoScaleMaxWorkers < c.Gateway.UsageRecord.AutoScaleMinWorkers {
return fmt.Errorf("gateway.usage_record.auto_scale_max_workers must be >= auto_scale_min_workers")
}
if c.Gateway.UsageRecord.WorkerCount < c.Gateway.UsageRecord.AutoScaleMinWorkers ||
c.Gateway.UsageRecord.WorkerCount > c.Gateway.UsageRecord.AutoScaleMaxWorkers {
return fmt.Errorf("gateway.usage_record.worker_count must be between auto_scale_min_workers and auto_scale_max_workers")
}
if c.Gateway.UsageRecord.AutoScaleUpQueuePercent <= 0 || c.Gateway.UsageRecord.AutoScaleUpQueuePercent > 100 {
return fmt.Errorf("gateway.usage_record.auto_scale_up_queue_percent must be between 1-100")
}
if c.Gateway.UsageRecord.AutoScaleDownQueuePercent < 0 || c.Gateway.UsageRecord.AutoScaleDownQueuePercent >= 100 {
return fmt.Errorf("gateway.usage_record.auto_scale_down_queue_percent must be between 0-99")
}
if c.Gateway.UsageRecord.AutoScaleDownQueuePercent >= c.Gateway.UsageRecord.AutoScaleUpQueuePercent {
return fmt.Errorf("gateway.usage_record.auto_scale_down_queue_percent must be less than auto_scale_up_queue_percent")
}
if c.Gateway.UsageRecord.AutoScaleUpStep <= 0 {
return fmt.Errorf("gateway.usage_record.auto_scale_up_step must be positive")
}
if c.Gateway.UsageRecord.AutoScaleDownStep <= 0 {
return fmt.Errorf("gateway.usage_record.auto_scale_down_step must be positive")
}
if c.Gateway.UsageRecord.AutoScaleCheckIntervalSeconds <= 0 {
return fmt.Errorf("gateway.usage_record.auto_scale_check_interval_seconds must be positive")
}
if c.Gateway.UsageRecord.AutoScaleCooldownSeconds < 0 {
return fmt.Errorf("gateway.usage_record.auto_scale_cooldown_seconds must be non-negative")
}
}
if c.Gateway.Scheduling.StickySessionMaxWaiting <= 0 {
return fmt.Errorf("gateway.scheduling.sticky_session_max_waiting must be positive")
}

View File

@@ -942,6 +942,74 @@ func TestValidateConfigErrors(t *testing.T) {
mutate: func(c *Config) { c.Gateway.MaxLineSize = -1 },
wantErr: "gateway.max_line_size must be non-negative",
},
{
name: "gateway usage record worker count",
mutate: func(c *Config) { c.Gateway.UsageRecord.WorkerCount = 0 },
wantErr: "gateway.usage_record.worker_count",
},
{
name: "gateway usage record queue size",
mutate: func(c *Config) { c.Gateway.UsageRecord.QueueSize = 0 },
wantErr: "gateway.usage_record.queue_size",
},
{
name: "gateway usage record timeout",
mutate: func(c *Config) { c.Gateway.UsageRecord.TaskTimeoutSeconds = 0 },
wantErr: "gateway.usage_record.task_timeout_seconds",
},
{
name: "gateway usage record overflow policy",
mutate: func(c *Config) { c.Gateway.UsageRecord.OverflowPolicy = "invalid" },
wantErr: "gateway.usage_record.overflow_policy",
},
{
name: "gateway usage record sample percent range",
mutate: func(c *Config) { c.Gateway.UsageRecord.OverflowSamplePercent = 101 },
wantErr: "gateway.usage_record.overflow_sample_percent",
},
{
name: "gateway usage record sample percent required for sample policy",
mutate: func(c *Config) {
c.Gateway.UsageRecord.OverflowPolicy = UsageRecordOverflowPolicySample
c.Gateway.UsageRecord.OverflowSamplePercent = 0
},
wantErr: "gateway.usage_record.overflow_sample_percent must be positive",
},
{
name: "gateway usage record auto scale max gte min",
mutate: func(c *Config) {
c.Gateway.UsageRecord.AutoScaleMinWorkers = 256
c.Gateway.UsageRecord.AutoScaleMaxWorkers = 128
},
wantErr: "gateway.usage_record.auto_scale_max_workers",
},
{
name: "gateway usage record worker in auto scale range",
mutate: func(c *Config) {
c.Gateway.UsageRecord.AutoScaleMinWorkers = 200
c.Gateway.UsageRecord.AutoScaleMaxWorkers = 300
c.Gateway.UsageRecord.WorkerCount = 128
},
wantErr: "gateway.usage_record.worker_count must be between auto_scale_min_workers and auto_scale_max_workers",
},
{
name: "gateway usage record auto scale queue thresholds order",
mutate: func(c *Config) {
c.Gateway.UsageRecord.AutoScaleUpQueuePercent = 50
c.Gateway.UsageRecord.AutoScaleDownQueuePercent = 50
},
wantErr: "gateway.usage_record.auto_scale_down_queue_percent must be less",
},
{
name: "gateway usage record auto scale up step",
mutate: func(c *Config) { c.Gateway.UsageRecord.AutoScaleUpStep = 0 },
wantErr: "gateway.usage_record.auto_scale_up_step",
},
{
name: "gateway usage record auto scale interval",
mutate: func(c *Config) { c.Gateway.UsageRecord.AutoScaleCheckIntervalSeconds = 0 },
wantErr: "gateway.usage_record.auto_scale_check_interval_seconds",
},
{
name: "gateway scheduling sticky waiting",
mutate: func(c *Config) { c.Gateway.Scheduling.StickySessionMaxWaiting = 0 },
@@ -1025,6 +1093,99 @@ func TestValidateConfigErrors(t *testing.T) {
}
}
func TestValidateConfig_AutoScaleDisabledIgnoreAutoScaleFields(t *testing.T) {
resetViperWithJWTSecret(t)
cfg, err := Load()
if err != nil {
t.Fatalf("Load() error: %v", err)
}
cfg.Gateway.UsageRecord.AutoScaleEnabled = false
cfg.Gateway.UsageRecord.WorkerCount = 64
// 自动扩缩容关闭时,这些字段应被忽略,不应导致校验失败。
cfg.Gateway.UsageRecord.AutoScaleMinWorkers = 0
cfg.Gateway.UsageRecord.AutoScaleMaxWorkers = 0
cfg.Gateway.UsageRecord.AutoScaleUpQueuePercent = 0
cfg.Gateway.UsageRecord.AutoScaleDownQueuePercent = 100
cfg.Gateway.UsageRecord.AutoScaleUpStep = 0
cfg.Gateway.UsageRecord.AutoScaleDownStep = 0
cfg.Gateway.UsageRecord.AutoScaleCheckIntervalSeconds = 0
cfg.Gateway.UsageRecord.AutoScaleCooldownSeconds = -1
if err := cfg.Validate(); err != nil {
t.Fatalf("Validate() should ignore auto scale fields when disabled: %v", err)
}
}
func TestValidateConfig_LogRequiredAndRotationBounds(t *testing.T) {
resetViperWithJWTSecret(t)
cases := []struct {
name string
mutate func(*Config)
wantErr string
}{
{
name: "log level required",
mutate: func(c *Config) {
c.Log.Level = ""
},
wantErr: "log.level is required",
},
{
name: "log format required",
mutate: func(c *Config) {
c.Log.Format = ""
},
wantErr: "log.format is required",
},
{
name: "log stacktrace required",
mutate: func(c *Config) {
c.Log.StacktraceLevel = ""
},
wantErr: "log.stacktrace_level is required",
},
{
name: "log max backups non-negative",
mutate: func(c *Config) {
c.Log.Rotation.MaxBackups = -1
},
wantErr: "log.rotation.max_backups must be non-negative",
},
{
name: "log max age non-negative",
mutate: func(c *Config) {
c.Log.Rotation.MaxAgeDays = -1
},
wantErr: "log.rotation.max_age_days must be non-negative",
},
{
name: "sampling thereafter non-negative when disabled",
mutate: func(c *Config) {
c.Log.Sampling.Enabled = false
c.Log.Sampling.Thereafter = -1
},
wantErr: "log.sampling.thereafter must be non-negative",
},
}
for _, tt := range cases {
t.Run(tt.name, func(t *testing.T) {
cfg, err := Load()
if err != nil {
t.Fatalf("Load() error: %v", err)
}
tt.mutate(cfg)
err = cfg.Validate()
if err == nil || !strings.Contains(err.Error(), tt.wantErr) {
t.Fatalf("Validate() error = %v, want %q", err, tt.wantErr)
}
})
}
}
func TestSoraCurlCFFISidecarDefaults(t *testing.T) {
resetViperWithJWTSecret(t)
@@ -1112,3 +1273,53 @@ func TestValidateSoraCloudflareChallengeCooldownNonNegative(t *testing.T) {
t.Fatalf("Validate() error = %v, want cloudflare cooldown error", err)
}
}
func TestLoad_DefaultGatewayUsageRecordConfig(t *testing.T) {
resetViperWithJWTSecret(t)
cfg, err := Load()
if err != nil {
t.Fatalf("Load() error: %v", err)
}
if cfg.Gateway.UsageRecord.WorkerCount != 128 {
t.Fatalf("worker_count = %d, want 128", cfg.Gateway.UsageRecord.WorkerCount)
}
if cfg.Gateway.UsageRecord.QueueSize != 16384 {
t.Fatalf("queue_size = %d, want 16384", cfg.Gateway.UsageRecord.QueueSize)
}
if cfg.Gateway.UsageRecord.TaskTimeoutSeconds != 5 {
t.Fatalf("task_timeout_seconds = %d, want 5", cfg.Gateway.UsageRecord.TaskTimeoutSeconds)
}
if cfg.Gateway.UsageRecord.OverflowPolicy != UsageRecordOverflowPolicySample {
t.Fatalf("overflow_policy = %s, want %s", cfg.Gateway.UsageRecord.OverflowPolicy, UsageRecordOverflowPolicySample)
}
if cfg.Gateway.UsageRecord.OverflowSamplePercent != 10 {
t.Fatalf("overflow_sample_percent = %d, want 10", cfg.Gateway.UsageRecord.OverflowSamplePercent)
}
if !cfg.Gateway.UsageRecord.AutoScaleEnabled {
t.Fatalf("auto_scale_enabled = false, want true")
}
if cfg.Gateway.UsageRecord.AutoScaleMinWorkers != 128 {
t.Fatalf("auto_scale_min_workers = %d, want 128", cfg.Gateway.UsageRecord.AutoScaleMinWorkers)
}
if cfg.Gateway.UsageRecord.AutoScaleMaxWorkers != 512 {
t.Fatalf("auto_scale_max_workers = %d, want 512", cfg.Gateway.UsageRecord.AutoScaleMaxWorkers)
}
if cfg.Gateway.UsageRecord.AutoScaleUpQueuePercent != 70 {
t.Fatalf("auto_scale_up_queue_percent = %d, want 70", cfg.Gateway.UsageRecord.AutoScaleUpQueuePercent)
}
if cfg.Gateway.UsageRecord.AutoScaleDownQueuePercent != 15 {
t.Fatalf("auto_scale_down_queue_percent = %d, want 15", cfg.Gateway.UsageRecord.AutoScaleDownQueuePercent)
}
if cfg.Gateway.UsageRecord.AutoScaleUpStep != 32 {
t.Fatalf("auto_scale_up_step = %d, want 32", cfg.Gateway.UsageRecord.AutoScaleUpStep)
}
if cfg.Gateway.UsageRecord.AutoScaleDownStep != 16 {
t.Fatalf("auto_scale_down_step = %d, want 16", cfg.Gateway.UsageRecord.AutoScaleDownStep)
}
if cfg.Gateway.UsageRecord.AutoScaleCheckIntervalSeconds != 3 {
t.Fatalf("auto_scale_check_interval_seconds = %d, want 3", cfg.Gateway.UsageRecord.AutoScaleCheckIntervalSeconds)
}
if cfg.Gateway.UsageRecord.AutoScaleCooldownSeconds != 10 {
t.Fatalf("auto_scale_cooldown_seconds = %d, want 10", cfg.Gateway.UsageRecord.AutoScaleCooldownSeconds)
}
}