feat(openai): 支持messages模型映射与instructions模板注入

2026-04-09 12:29:49 +08:00
parent 23c4d592f8
commit 4de4823a65
11 changed files with 326 additions and 47 deletions
--- a/backend/internal/config/config.go
+++ b/backend/internal/config/config.go
@@ -318,6 +318,12 @@ type GatewayConfig struct {
 	// ForceCodexCLI: 强制将 OpenAI `/v1/responses` 请求按 Codex CLI 处理。
 	// 用于网关未透传/改写 User-Agent 时的兼容兜底（默认关闭，避免影响其他客户端）。
 	ForceCodexCLI bool `mapstructure:"force_codex_cli"`
 	// ForcedCodexInstructionsTemplateFile: 服务端强制附加到 Codex 顶层 instructions 的模板文件路径。
 	// 模板渲染后会直接覆盖最终 instructions；若需要保留客户端 system 转换结果，请在模板中显式引用 {{ .ExistingInstructions }}。
 	ForcedCodexInstructionsTemplateFile string `mapstructure:"forced_codex_instructions_template_file"`
 	// ForcedCodexInstructionsTemplate: 启动时从模板文件读取并缓存的模板内容。
 	// 该字段不直接参与配置反序列化，仅用于请求热路径避免重复读盘。
 	ForcedCodexInstructionsTemplate string `mapstructure:"-"`
 	// OpenAIPassthroughAllowTimeoutHeaders: OpenAI 透传模式是否放行客户端超时头
 	// 关闭（默认）可避免 x-stainless-timeout 等头导致上游提前断流。
 	OpenAIPassthroughAllowTimeoutHeaders bool `mapstructure:"openai_passthrough_allow_timeout_headers"`
@@ -983,6 +989,14 @@ func load(allowMissingJWTSecret bool) (*Config, error) {
 	cfg.Log.Environment = strings.TrimSpace(cfg.Log.Environment)
 	cfg.Log.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.Log.StacktraceLevel))
 	cfg.Log.Output.FilePath = strings.TrimSpace(cfg.Log.Output.FilePath)
 	cfg.Gateway.ForcedCodexInstructionsTemplateFile = strings.TrimSpace(cfg.Gateway.ForcedCodexInstructionsTemplateFile)
 	if cfg.Gateway.ForcedCodexInstructionsTemplateFile != "" {
 		content, err := os.ReadFile(cfg.Gateway.ForcedCodexInstructionsTemplateFile)
 		if err != nil {
 			return nil, fmt.Errorf("read forced codex instructions template %q: %w", cfg.Gateway.ForcedCodexInstructionsTemplateFile, err)
 		}
 		cfg.Gateway.ForcedCodexInstructionsTemplate = string(content)
 	}
 	// 兼容旧键 gateway.openai_ws.sticky_previous_response_ttl_seconds。
 	// 新键未配置（<=0）时回退旧键；新键优先。
--- a/backend/internal/config/config_test.go
+++ b/backend/internal/config/config_test.go
@@ -1,6 +1,8 @@
 package config
 import (
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 	"time"
@@ -223,6 +225,23 @@ func TestLoadSchedulingConfigFromEnv(t *testing.T) {
 	}
 }
 func TestLoadForcedCodexInstructionsTemplate(t *testing.T) {
 	resetViperWithJWTSecret(t)
 	tempDir := t.TempDir()
 	templatePath := filepath.Join(tempDir, "codex-instructions.md.tmpl")
 	configPath := filepath.Join(tempDir, "config.yaml")
 	require.NoError(t, os.WriteFile(templatePath, []byte("server-prefix\n\n{{ .ExistingInstructions }}"), 0o644))
 	require.NoError(t, os.WriteFile(configPath, []byte("gateway:\n  forced_codex_instructions_template_file: \""+templatePath+"\"\n"), 0o644))
 	t.Setenv("DATA_DIR", tempDir)
 	cfg, err := Load()
 	require.NoError(t, err)
 	require.Equal(t, templatePath, cfg.Gateway.ForcedCodexInstructionsTemplateFile)
 	require.Equal(t, "server-prefix\n\n{{ .ExistingInstructions }}", cfg.Gateway.ForcedCodexInstructionsTemplate)
 }
 func TestLoadDefaultSecurityToggles(t *testing.T) {
 	resetViperWithJWTSecret(t)
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -47,6 +47,13 @@ func resolveOpenAIForwardDefaultMappedModel(apiKey *service.APIKey, fallbackMode
 	return strings.TrimSpace(apiKey.Group.DefaultMappedModel)
 }
 func resolveOpenAIMessagesDispatchMappedModel(apiKey *service.APIKey, requestedModel string) string {
 	if apiKey == nil || apiKey.Group == nil {
 		return ""
 	}
 	return strings.TrimSpace(apiKey.Group.ResolveMessagesDispatchModel(requestedModel))
 }
 // NewOpenAIGatewayHandler creates a new OpenAIGatewayHandler
 func NewOpenAIGatewayHandler(
 	gatewayService *service.OpenAIGatewayService,
@@ -551,6 +558,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 	}
 	reqModel := modelResult.String()
 	routingModel := service.NormalizeOpenAICompatRequestedModel(reqModel)
 	preferredMappedModel := resolveOpenAIMessagesDispatchMappedModel(apiKey, reqModel)
 	reqStream := gjson.GetBytes(body, "stream").Bool()
 	reqLog = reqLog.With(zap.String("model", reqModel), zap.Bool("stream", reqStream))
@@ -609,17 +617,20 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 	failedAccountIDs := make(map[int64]struct{})
 	sameAccountRetryCount := make(map[int64]int)
 	var lastFailoverErr *service.UpstreamFailoverError
 	effectiveMappedModel := preferredMappedModel
 	for {
-		// 清除上一次迭代的降级模型标记，避免残留影响本次迭代
+		currentRoutingModel := routingModel
-		c.Set("openai_messages_fallback_model", "")
+		if effectiveMappedModel != "" {
 			currentRoutingModel = effectiveMappedModel
 		}
 		reqLog.Debug("openai_messages.account_selecting", zap.Int("excluded_account_count", len(failedAccountIDs)))
 		selection, scheduleDecision, err := h.gatewayService.SelectAccountWithScheduler(
 			c.Request.Context(),
 			apiKey.GroupID,
 			"", // no previous_response_id
 			sessionHash,
-			routingModel,
+			currentRoutingModel,
 			failedAccountIDs,
 			service.OpenAIUpstreamTransportAny,
 		)
@@ -628,29 +639,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 				zap.Error(err),
 				zap.Int("excluded_account_count", len(failedAccountIDs)),
 			)
 			// 首次调度失败 + 有默认映射模型 → 用默认模型重试
 			if len(failedAccountIDs) == 0 {
 				defaultModel := ""
 				if apiKey.Group != nil {
 					defaultModel = apiKey.Group.DefaultMappedModel
 				}
 				if defaultModel != "" && defaultModel != routingModel {
 					reqLog.Info("openai_messages.fallback_to_default_model",
 						zap.String("default_mapped_model", defaultModel),
 					)
 					selection, scheduleDecision, err = h.gatewayService.SelectAccountWithScheduler(
 						c.Request.Context(),
 						apiKey.GroupID,
 						"",
 						sessionHash,
 						defaultModel,
 						failedAccountIDs,
 						service.OpenAIUpstreamTransportAny,
 					)
 					if err == nil && selection != nil {
 						c.Set("openai_messages_fallback_model", defaultModel)
 					}
 				}
 				if err != nil {
 					h.anthropicStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
 					return
@@ -682,9 +671,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 		service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
 		forwardStart := time.Now()
-		// Forward 层需要始终拿到 group 默认映射模型，这样未命中账号级映射的
+		defaultMappedModel := strings.TrimSpace(effectiveMappedModel)
 		// Claude 兼容模型才不会在后续 Codex 规范化中意外退化到 gpt-5.1。
 		defaultMappedModel := resolveOpenAIForwardDefaultMappedModel(apiKey, c.GetString("openai_messages_fallback_model"))
 		// 应用渠道模型映射到请求体
 		forwardBody := body
 		if channelMappingMsg.Mapped {
--- a/backend/internal/handler/openai_gateway_handler_test.go
+++ b/backend/internal/handler/openai_gateway_handler_test.go
@@ -360,7 +360,7 @@ func TestResolveOpenAIForwardDefaultMappedModel(t *testing.T) {
 		require.Equal(t, "gpt-5.2", resolveOpenAIForwardDefaultMappedModel(apiKey, " gpt-5.2 "))
 	})
-	t.Run("uses_group_default_on_normal_path", func(t *testing.T) {
+	t.Run("uses_group_default_when_explicit_fallback_absent", func(t *testing.T) {
 		apiKey := &service.APIKey{
 			Group: &service.Group{DefaultMappedModel: "gpt-5.4"},
 		}
@@ -376,6 +376,45 @@ func TestResolveOpenAIForwardDefaultMappedModel(t *testing.T) {
 	})
 }
 func TestResolveOpenAIMessagesDispatchMappedModel(t *testing.T) {
 	t.Run("exact_claude_model_override_wins", func(t *testing.T) {
 		apiKey := &service.APIKey{
 			Group: &service.Group{
 				MessagesDispatchModelConfig: service.OpenAIMessagesDispatchModelConfig{
 					SonnetMappedModel: "gpt-5.2",
 					ExactModelMappings: map[string]string{
 						"claude-sonnet-4-5-20250929": "gpt-5.4-mini-high",
 					},
 				},
 			},
 		}
 		require.Equal(t, "gpt-5.4-mini", resolveOpenAIMessagesDispatchMappedModel(apiKey, "claude-sonnet-4-5-20250929"))
 	})
 	t.Run("uses_family_default_when_no_override", func(t *testing.T) {
 		apiKey := &service.APIKey{Group: &service.Group{}}
 		require.Equal(t, "gpt-5.4", resolveOpenAIMessagesDispatchMappedModel(apiKey, "claude-opus-4-6"))
 		require.Equal(t, "gpt-5.3-codex", resolveOpenAIMessagesDispatchMappedModel(apiKey, "claude-sonnet-4-5-20250929"))
 		require.Equal(t, "gpt-5.4-mini", resolveOpenAIMessagesDispatchMappedModel(apiKey, "claude-haiku-4-5-20251001"))
 	})
 	t.Run("returns_empty_for_non_claude_or_missing_group", func(t *testing.T) {
 		require.Empty(t, resolveOpenAIMessagesDispatchMappedModel(nil, "claude-sonnet-4-5-20250929"))
 		require.Empty(t, resolveOpenAIMessagesDispatchMappedModel(&service.APIKey{}, "claude-sonnet-4-5-20250929"))
 		require.Empty(t, resolveOpenAIMessagesDispatchMappedModel(&service.APIKey{Group: &service.Group{}}, "gpt-5.4"))
 	})
 	t.Run("does_not_fall_back_to_group_default_mapped_model", func(t *testing.T) {
 		apiKey := &service.APIKey{
 			Group: &service.Group{
 				DefaultMappedModel: "gpt-5.4",
 			},
 		}
 		require.Empty(t, resolveOpenAIMessagesDispatchMappedModel(apiKey, "gpt-5.4"))
 		require.Equal(t, "gpt-5.3-codex", resolveOpenAIMessagesDispatchMappedModel(apiKey, "claude-sonnet-4-5-20250929"))
 	})
 }
 func TestOpenAIResponses_MissingDependencies_ReturnsServiceUnavailable(t *testing.T) {
 	gin.SetMode(gin.TestMode)
--- a/backend/internal/pkg/apicompat/types.go
+++ b/backend/internal/pkg/apicompat/types.go
@@ -28,7 +28,7 @@ type AnthropicRequest struct {
 // AnthropicOutputConfig controls output generation parameters.
 type AnthropicOutputConfig struct {
-	Effort string `json:"effort,omitempty"` // "low" | "medium" | "high"
+	Effort string `json:"effort,omitempty"` // "low" | "medium" | "high" | "max"
 }
 // AnthropicThinking configures extended thinking in the Anthropic API.
@@ -167,7 +167,7 @@ type ResponsesRequest struct {
 // ResponsesReasoning configures reasoning effort in the Responses API.
 type ResponsesReasoning struct {
-	Effort  string `json:"effort"`            // "low" | "medium" | "high"
+	Effort  string `json:"effort"`            // "low" | "medium" | "high" | "xhigh"
 	Summary string `json:"summary,omitempty"` // "auto" | "concise" | "detailed"
 }
@@ -345,7 +345,7 @@ type ChatCompletionsRequest struct {
 	StreamOptions       *ChatStreamOptions `json:"stream_options,omitempty"`
 	Tools               []ChatTool         `json:"tools,omitempty"`
 	ToolChoice          json.RawMessage    `json:"tool_choice,omitempty"`
-	ReasoningEffort     string             `json:"reasoning_effort,omitempty"` // "low" | "medium" | "high"
+	ReasoningEffort     string             `json:"reasoning_effort,omitempty"` // "low" | "medium" | "high" | "xhigh"
 	ServiceTier         string             `json:"service_tier,omitempty"`
 	Stop                json.RawMessage    `json:"stop,omitempty"` // string or []string
--- a/backend/internal/service/openai_codex_instructions_template.go
+++ b/backend/internal/service/openai_codex_instructions_template.go
@@ -0,0 +1,55 @@
 package service
 import (
 	"bytes"
 	"fmt"
 	"strings"
 	"text/template"
 )
 type forcedCodexInstructionsTemplateData struct {
 	ExistingInstructions string
 	OriginalModel        string
 	NormalizedModel      string
 	BillingModel         string
 	UpstreamModel        string
 }
 func applyForcedCodexInstructionsTemplate(
 	reqBody map[string]any,
 	templateText string,
 	data forcedCodexInstructionsTemplateData,
 ) (bool, error) {
 	rendered, err := renderForcedCodexInstructionsTemplate(templateText, data)
 	if err != nil {
 		return false, err
 	}
 	if rendered == "" {
 		return false, nil
 	}
 	existing, _ := reqBody["instructions"].(string)
 	if strings.TrimSpace(existing) == rendered {
 		return false, nil
 	}
 	reqBody["instructions"] = rendered
 	return true, nil
 }
 func renderForcedCodexInstructionsTemplate(
 	templateText string,
 	data forcedCodexInstructionsTemplateData,
 ) (string, error) {
 	tmpl, err := template.New("forced_codex_instructions").Option("missingkey=zero").Parse(templateText)
 	if err != nil {
 		return "", fmt.Errorf("parse forced codex instructions template: %w", err)
 	}
 	var buf bytes.Buffer
 	if err := tmpl.Execute(&buf, data); err != nil {
 		return "", fmt.Errorf("render forced codex instructions template: %w", err)
 	}
 	return strings.TrimSpace(buf.String()), nil
 }
--- a/backend/internal/service/openai_compat_model_test.go
+++ b/backend/internal/service/openai_compat_model_test.go
@@ -6,9 +6,12 @@ import (
 	"io"
 	"net/http"
 	"net/http/httptest"
 	"os"
 	"path/filepath"
 	"strings"
 	"testing"
 	"github.com/Wei-Shaw/sub2api/internal/config"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
 	"github.com/gin-gonic/gin"
 	"github.com/stretchr/testify/require"
@@ -127,3 +130,101 @@ func TestForwardAsAnthropic_NormalizesRoutingAndEffortForGpt54XHigh(t *testing.T
 	t.Logf("upstream body: %s", string(upstream.lastBody))
 	t.Logf("response body: %s", rec.Body.String())
 }
 func TestForwardAsAnthropic_ForcedCodexInstructionsTemplatePrependsRenderedInstructions(t *testing.T) {
 	t.Parallel()
 	gin.SetMode(gin.TestMode)
 	templateDir := t.TempDir()
 	templatePath := filepath.Join(templateDir, "codex-instructions.md.tmpl")
 	require.NoError(t, os.WriteFile(templatePath, []byte("server-prefix\n\n{{ .ExistingInstructions }}"), 0o644))
 	rec := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(rec)
 	body := []byte(`{"model":"gpt-5.4","max_tokens":16,"system":"client-system","messages":[{"role":"user","content":"hello"}],"stream":false}`)
 	c.Request = httptest.NewRequest(http.MethodPost, "/v1/messages", bytes.NewReader(body))
 	c.Request.Header.Set("Content-Type", "application/json")
 	upstreamBody := strings.Join([]string{
 		`data: {"type":"response.completed","response":{"id":"resp_1","object":"response","model":"gpt-5.4","status":"completed","output":[{"type":"message","id":"msg_1","role":"assistant","status":"completed","content":[{"type":"output_text","text":"ok"}]}],"usage":{"input_tokens":5,"output_tokens":2,"total_tokens":7}}}`,
 		"",
 		"data: [DONE]",
 		"",
 	}, "\n")
 	upstream := &httpUpstreamRecorder{resp: &http.Response{
 		StatusCode: http.StatusOK,
 		Header:     http.Header{"Content-Type": []string{"text/event-stream"}, "x-request-id": []string{"rid_forced"}},
 		Body:       io.NopCloser(strings.NewReader(upstreamBody)),
 	}}
 	svc := &OpenAIGatewayService{
 		cfg: &config.Config{Gateway: config.GatewayConfig{
 			ForcedCodexInstructionsTemplateFile: templatePath,
 			ForcedCodexInstructionsTemplate:     "server-prefix\n\n{{ .ExistingInstructions }}",
 		}},
 		httpUpstream: upstream,
 	}
 	account := &Account{
 		ID:          1,
 		Name:        "openai-oauth",
 		Platform:    PlatformOpenAI,
 		Type:        AccountTypeOAuth,
 		Concurrency: 1,
 		Credentials: map[string]any{
 			"access_token":       "oauth-token",
 			"chatgpt_account_id": "chatgpt-acc",
 		},
 	}
 	result, err := svc.ForwardAsAnthropic(context.Background(), c, account, body, "", "gpt-5.1")
 	require.NoError(t, err)
 	require.NotNil(t, result)
 	require.Equal(t, "server-prefix\n\nclient-system", gjson.GetBytes(upstream.lastBody, "instructions").String())
 }
 func TestForwardAsAnthropic_ForcedCodexInstructionsTemplateUsesCachedTemplateContent(t *testing.T) {
 	t.Parallel()
 	gin.SetMode(gin.TestMode)
 	rec := httptest.NewRecorder()
 	c, _ := gin.CreateTestContext(rec)
 	body := []byte(`{"model":"gpt-5.4","max_tokens":16,"system":"client-system","messages":[{"role":"user","content":"hello"}],"stream":false}`)
 	c.Request = httptest.NewRequest(http.MethodPost, "/v1/messages", bytes.NewReader(body))
 	c.Request.Header.Set("Content-Type", "application/json")
 	upstreamBody := strings.Join([]string{
 		`data: {"type":"response.completed","response":{"id":"resp_1","object":"response","model":"gpt-5.4","status":"completed","output":[{"type":"message","id":"msg_1","role":"assistant","status":"completed","content":[{"type":"output_text","text":"ok"}]}],"usage":{"input_tokens":5,"output_tokens":2,"total_tokens":7}}}`,
 		"",
 		"data: [DONE]",
 		"",
 	}, "\n")
 	upstream := &httpUpstreamRecorder{resp: &http.Response{
 		StatusCode: http.StatusOK,
 		Header:     http.Header{"Content-Type": []string{"text/event-stream"}, "x-request-id": []string{"rid_forced_cached"}},
 		Body:       io.NopCloser(strings.NewReader(upstreamBody)),
 	}}
 	svc := &OpenAIGatewayService{
 		cfg: &config.Config{Gateway: config.GatewayConfig{
 			ForcedCodexInstructionsTemplateFile: "/path/that/should/not/be/read.tmpl",
 			ForcedCodexInstructionsTemplate:     "cached-prefix\n\n{{ .ExistingInstructions }}",
 		}},
 		httpUpstream: upstream,
 	}
 	account := &Account{
 		ID:          1,
 		Name:        "openai-oauth",
 		Platform:    PlatformOpenAI,
 		Type:        AccountTypeOAuth,
 		Concurrency: 1,
 		Credentials: map[string]any{
 			"access_token":       "oauth-token",
 			"chatgpt_account_id": "chatgpt-acc",
 		},
 	}
 	result, err := svc.ForwardAsAnthropic(context.Background(), c, account, body, "", "gpt-5.1")
 	require.NoError(t, err)
 	require.NotNil(t, result)
 	require.Equal(t, "cached-prefix\n\nclient-system", gjson.GetBytes(upstream.lastBody, "instructions").String())
 }
--- a/backend/internal/service/openai_gateway_messages.go
+++ b/backend/internal/service/openai_gateway_messages.go
@@ -86,6 +86,24 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
 			return nil, fmt.Errorf("unmarshal for codex transform: %w", err)
 		}
 		codexResult := applyCodexOAuthTransform(reqBody, false, false)
 		forcedTemplateText := ""
 		if s.cfg != nil {
 			forcedTemplateText = s.cfg.Gateway.ForcedCodexInstructionsTemplate
 		}
 		templateUpstreamModel := upstreamModel
 		if codexResult.NormalizedModel != "" {
 			templateUpstreamModel = codexResult.NormalizedModel
 		}
 		existingInstructions, _ := reqBody["instructions"].(string)
 		if _, err := applyForcedCodexInstructionsTemplate(reqBody, forcedTemplateText, forcedCodexInstructionsTemplateData{
 			ExistingInstructions: strings.TrimSpace(existingInstructions),
 			OriginalModel:        originalModel,
 			NormalizedModel:      normalizedModel,
 			BillingModel:         billingModel,
 			UpstreamModel:        templateUpstreamModel,
 		}); err != nil {
 			return nil, err
 		}
 		if codexResult.NormalizedModel != "" {
 			upstreamModel = codexResult.NormalizedModel
 		}
--- a/deploy/codex-instructions.md.tmpl
+++ b/deploy/codex-instructions.md.tmpl
@@ -0,0 +1,5 @@
 You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.
 {{ if .ExistingInstructions }}
 {{ .ExistingInstructions }}
 {{ end }}
--- a/deploy/config.example.yaml
+++ b/deploy/config.example.yaml
@@ -202,6 +202,32 @@ gateway:
  #
  # 注意：开启后会影响所有客户端的行为（不仅限于 VS Code / Codex CLI），请谨慎开启。
  force_codex_cli: false
  # Optional: template file used to build the final top-level Codex `instructions`.
  # 可选：用于构建最终 Codex 顶层 `instructions` 的模板文件路径。
  #
  # This is applied on the `/v1/messages -> Responses/Codex` conversion path,
  # after Claude `system` has already been normalized into Codex `instructions`.
  # 该模板作用于 `/v1/messages -> Responses/Codex` 转换链路，且发生在 Claude `system`
  # 已经被归一化为 Codex `instructions` 之后。
  #
  # The template can reference:
  # 模板可引用：
  # - {{ .ExistingInstructions }} : converted client instructions/system
  # - {{ .OriginalModel }}        : original requested model
  # - {{ .NormalizedModel }}      : normalized routing model
  # - {{ .BillingModel }}         : billing model
  # - {{ .UpstreamModel }}        : final upstream model
  #
  # If you want to preserve client system prompts, keep {{ .ExistingInstructions }}
  # somewhere in the template. If omitted, the template output fully replaces it.
  # 如需保留客户端 system 提示词，请在模板中显式包含 {{ .ExistingInstructions }}。
  # 若省略，则模板输出会完全覆盖它。
  #
  # Docker users can mount a host file to /app/data/codex-instructions.md.tmpl
  # and point this field there.
  # Docker 用户可将宿主机文件挂载到 /app/data/codex-instructions.md.tmpl，
  # 然后把本字段指向该路径。
  forced_codex_instructions_template_file: ""
  # OpenAI 透传模式是否放行客户端超时头（如 x-stainless-timeout）
  # 默认 false：过滤超时头，降低上游提前断流风险。
  openai_passthrough_allow_timeout_headers: false
@@ -347,12 +373,6 @@ gateway:
    # Enable batch load calculation for scheduling
    # 启用调度批量负载计算
    load_batch_enabled: true
    # Snapshot bucket MGET chunk size
    # 调度快照分桶读取时的 MGET 分块大小
    snapshot_mget_chunk_size: 128
    # Snapshot bucket write chunk size
    # 调度快照重建写入时的分块大小
    snapshot_write_chunk_size: 256
    # Slot cleanup interval (duration)
    # 并发槽位清理周期（时间段）
    slot_cleanup_interval: 30s
--- a/deploy/docker-compose.yml
+++ b/deploy/docker-compose.yml
@@ -31,6 +31,10 @@ services:
      # Optional: Mount custom config.yaml (uncomment and create the file first)
      # Copy config.example.yaml to config.yaml, modify it, then uncomment:
      # - ./config.yaml:/app/data/config.yaml
      # Optional: Mount a custom Codex instructions template file, then point
      # gateway.forced_codex_instructions_template_file at /app/data/codex-instructions.md.tmpl
      # in config.yaml.
      # - ./codex-instructions.md.tmpl:/app/data/codex-instructions.md.tmpl:ro
    environment:
      # =======================================================================
      # Auto Setup (REQUIRED for Docker deployment)
@@ -146,7 +150,17 @@ services:
    networks:
      - sub2api-network
    healthcheck:
-      test: ["CMD", "wget", "-q", "-T", "5", "-O", "/dev/null", "http://localhost:8080/health"]
+      test:
        [
          "CMD",
          "wget",
          "-q",
          "-T",
          "5",
          "-O",
          "/dev/null",
          "http://localhost:8080/health",
        ]
      interval: 30s
      timeout: 10s
      retries: 3
@@ -177,11 +191,17 @@ services:
    networks:
      - sub2api-network
    healthcheck:
-      test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-sub2api} -d ${POSTGRES_DB:-sub2api}"]
+      test:
        [
          "CMD-SHELL",
          "pg_isready -U ${POSTGRES_USER:-sub2api} -d ${POSTGRES_DB:-sub2api}",
        ]
      interval: 10s
      timeout: 5s
      retries: 5
      start_period: 10s
    ports:
      - 5432:5432
    # 注意：不暴露端口到宿主机，应用通过内部网络连接
    # 如需调试，可临时添加：ports: ["127.0.0.1:5433:5432"]
@@ -199,12 +219,12 @@ services:
    volumes:
      - redis_data:/data
    command: >
-        sh -c '
+      sh -c '
-          redis-server
+        redis-server
-          --save 60 1
+        --save 60 1
-          --appendonly yes
+        --appendonly yes
-          --appendfsync everysec
+        --appendfsync everysec
-          ${REDIS_PASSWORD:+--requirepass "$REDIS_PASSWORD"}'
+        ${REDIS_PASSWORD:+--requirepass "$REDIS_PASSWORD"}'
    environment:
      - TZ=${TZ:-Asia/Shanghai}
      # REDISCLI_AUTH is used by redis-cli for authentication (safer than -a flag)
@@ -217,7 +237,8 @@ services:
      timeout: 5s
      retries: 5
      start_period: 5s
-
+    ports:
      - 6379:6379
 # =============================================================================
 # Volumes
 # =============================================================================