feat: 支持后台设置是否启用整流开关

This commit is contained in:
shaw
2026-03-07 21:45:18 +08:00
parent bcb6444f89
commit a3791104f9
18 changed files with 557 additions and 7 deletions

View File

@@ -1348,6 +1348,63 @@ func (h *SettingHandler) TestSoraS3Connection(c *gin.Context) {
response.Success(c, gin.H{"message": "S3 连接成功"})
}
// GetRectifierSettings 获取请求整流器配置
// GET /api/v1/admin/settings/rectifier
func (h *SettingHandler) GetRectifierSettings(c *gin.Context) {
settings, err := h.settingService.GetRectifierSettings(c.Request.Context())
if err != nil {
response.ErrorFrom(c, err)
return
}
response.Success(c, dto.RectifierSettings{
Enabled: settings.Enabled,
ThinkingSignatureEnabled: settings.ThinkingSignatureEnabled,
ThinkingBudgetEnabled: settings.ThinkingBudgetEnabled,
})
}
// UpdateRectifierSettingsRequest 更新整流器配置请求
type UpdateRectifierSettingsRequest struct {
Enabled bool `json:"enabled"`
ThinkingSignatureEnabled bool `json:"thinking_signature_enabled"`
ThinkingBudgetEnabled bool `json:"thinking_budget_enabled"`
}
// UpdateRectifierSettings 更新请求整流器配置
// PUT /api/v1/admin/settings/rectifier
func (h *SettingHandler) UpdateRectifierSettings(c *gin.Context) {
var req UpdateRectifierSettingsRequest
if err := c.ShouldBindJSON(&req); err != nil {
response.BadRequest(c, "Invalid request: "+err.Error())
return
}
settings := &service.RectifierSettings{
Enabled: req.Enabled,
ThinkingSignatureEnabled: req.ThinkingSignatureEnabled,
ThinkingBudgetEnabled: req.ThinkingBudgetEnabled,
}
if err := h.settingService.SetRectifierSettings(c.Request.Context(), settings); err != nil {
response.BadRequest(c, err.Error())
return
}
// 重新获取设置返回
updatedSettings, err := h.settingService.GetRectifierSettings(c.Request.Context())
if err != nil {
response.ErrorFrom(c, err)
return
}
response.Success(c, dto.RectifierSettings{
Enabled: updatedSettings.Enabled,
ThinkingSignatureEnabled: updatedSettings.ThinkingSignatureEnabled,
ThinkingBudgetEnabled: updatedSettings.ThinkingBudgetEnabled,
})
}
// UpdateStreamTimeoutSettingsRequest 更新流超时配置请求
type UpdateStreamTimeoutSettingsRequest struct {
Enabled bool `json:"enabled"`

View File

@@ -161,6 +161,13 @@ type StreamTimeoutSettings struct {
ThresholdWindowMinutes int `json:"threshold_window_minutes"`
}
// RectifierSettings 请求整流器配置 DTO
type RectifierSettings struct {
Enabled bool `json:"enabled"`
ThinkingSignatureEnabled bool `json:"thinking_signature_enabled"`
ThinkingBudgetEnabled bool `json:"thinking_budget_enabled"`
}
// ParseCustomMenuItems parses a JSON string into a slice of CustomMenuItem.
// Returns empty slice on empty/invalid input.
func ParseCustomMenuItems(raw string) []CustomMenuItem {

View File

@@ -155,6 +155,7 @@ func newTestGatewayHandler(t *testing.T, group *service.Group, accounts []*servi
nil, // sessionLimitCache
nil, // rpmCache
nil, // digestStore
nil, // settingService
)
// RunModeSimple跳过计费检查避免引入 repo/cache 依赖。

View File

@@ -2207,7 +2207,7 @@ func (s *stubSoraClientForHandler) GetVideoTask(_ context.Context, _ *service.Ac
func newMinimalGatewayService(accountRepo service.AccountRepository) *service.GatewayService {
return service.NewGatewayService(
accountRepo, nil, nil, nil, nil, nil, nil, nil,
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
)
}

View File

@@ -445,6 +445,7 @@ func TestSoraGatewayHandler_ChatCompletions(t *testing.T) {
testutil.StubSessionLimitCache{},
nil, // rpmCache
nil, // digestStore
nil, // settingService
)
soraClient := &stubSoraClient{imageURLs: []string{"https://example.com/a.png"}}

View File

@@ -210,6 +210,7 @@ func TestAPIContracts(t *testing.T) {
"sora_video_price_per_request": null,
"sora_video_price_per_request_hd": null,
"claude_code_only": false,
"allow_messages_dispatch": false,
"fallback_group_id": null,
"fallback_group_id_on_invalid_request": null,
"created_at": "2025-01-02T03:04:05Z",

View File

@@ -392,6 +392,9 @@ func registerSettingsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
// 流超时处理配置
adminSettings.GET("/stream-timeout", h.Admin.Setting.GetStreamTimeoutSettings)
adminSettings.PUT("/stream-timeout", h.Admin.Setting.UpdateStreamTimeoutSettings)
// 请求整流器配置
adminSettings.GET("/rectifier", h.Admin.Setting.GetRectifierSettings)
adminSettings.PUT("/rectifier", h.Admin.Setting.UpdateRectifierSettings)
// Sora S3 存储配置
adminSettings.GET("/sora-s3", h.Admin.Setting.GetSoraS3Settings)
adminSettings.PUT("/sora-s3", h.Admin.Setting.UpdateSoraS3Settings)

View File

@@ -1384,7 +1384,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
// 优先检测 thinking block 的 signature 相关错误400并重试一次
// Antigravity /v1internal 链路在部分场景会对 thought/thinking signature 做严格校验,
// 当历史消息携带的 signature 不合法时会直接 400去除 thinking 后可继续完成请求。
if resp.StatusCode == http.StatusBadRequest && isSignatureRelatedError(respBody) {
if resp.StatusCode == http.StatusBadRequest && isSignatureRelatedError(respBody) && s.settingService.IsSignatureRectifierEnabled(ctx) {
upstreamMsg := strings.TrimSpace(extractAntigravityErrorMessage(respBody))
upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
logBody, maxBytes := s.getLogConfig()
@@ -1517,6 +1517,80 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
}
}
// Budget 整流:检测 budget_tokens 约束错误并自动修正重试
if resp.StatusCode == http.StatusBadRequest && respBody != nil && !isSignatureRelatedError(respBody) {
errMsg := strings.TrimSpace(extractAntigravityErrorMessage(respBody))
if isThinkingBudgetConstraintError(errMsg) && s.settingService.IsBudgetRectifierEnabled(ctx) {
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: resp.StatusCode,
UpstreamRequestID: resp.Header.Get("x-request-id"),
Kind: "budget_constraint_error",
Message: errMsg,
Detail: s.getUpstreamErrorDetail(respBody),
})
// 修正 claudeReq 的 thinking 参数adaptive 模式不修正)
if claudeReq.Thinking == nil || claudeReq.Thinking.Type != "adaptive" {
retryClaudeReq := claudeReq
retryClaudeReq.Messages = append([]antigravity.ClaudeMessage(nil), claudeReq.Messages...)
// 创建新的 ThinkingConfig 避免修改原始 claudeReq.Thinking 指针
retryClaudeReq.Thinking = &antigravity.ThinkingConfig{
Type: "enabled",
BudgetTokens: BudgetRectifyBudgetTokens,
}
if retryClaudeReq.MaxTokens < BudgetRectifyMinMaxTokens {
retryClaudeReq.MaxTokens = BudgetRectifyMaxTokens
}
logger.LegacyPrintf("service.antigravity_gateway", "Antigravity account %d: detected budget_tokens constraint error, retrying with rectified budget (budget_tokens=%d, max_tokens=%d)", account.ID, BudgetRectifyBudgetTokens, BudgetRectifyMaxTokens)
retryGeminiBody, txErr := antigravity.TransformClaudeToGeminiWithOptions(&retryClaudeReq, projectID, mappedModel, transformOpts)
if txErr == nil {
retryResult, retryErr := s.antigravityRetryLoop(antigravityRetryLoopParams{
ctx: ctx,
prefix: prefix,
account: account,
proxyURL: proxyURL,
accessToken: accessToken,
action: action,
body: retryGeminiBody,
c: c,
httpUpstream: s.httpUpstream,
settingService: s.settingService,
accountRepo: s.accountRepo,
handleError: s.handleUpstreamError,
requestedModel: originalModel,
isStickySession: isStickySession,
groupID: 0,
sessionHash: "",
})
if retryErr == nil {
retryResp := retryResult.resp
if retryResp.StatusCode < 400 {
_ = resp.Body.Close()
resp = retryResp
respBody = nil
} else {
retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
_ = retryResp.Body.Close()
respBody = retryBody
resp = &http.Response{
StatusCode: retryResp.StatusCode,
Header: retryResp.Header.Clone(),
Body: io.NopCloser(bytes.NewReader(retryBody)),
}
}
} else {
logger.LegacyPrintf("service.antigravity_gateway", "Antigravity account %d: budget rectifier retry failed: %v", account.ID, retryErr)
}
}
}
}
}
// 处理错误响应(重试后仍失败或不触发重试)
if resp.StatusCode >= 400 {
// 检测 prompt too long 错误,返回特殊错误类型供上层 fallback

View File

@@ -175,6 +175,13 @@ const (
// SettingKeyStreamTimeoutSettings stores JSON config for stream timeout handling.
SettingKeyStreamTimeoutSettings = "stream_timeout_settings"
// =========================
// Request Rectifier (请求整流器)
// =========================
// SettingKeyRectifierSettings stores JSON config for rectifier settings (thinking signature + budget).
SettingKeyRectifierSettings = "rectifier_settings"
// =========================
// Sora S3 存储配置
// =========================

View File

@@ -5,6 +5,7 @@ import (
"encoding/json"
"fmt"
"math"
"strings"
"unsafe"
"github.com/Wei-Shaw/sub2api/internal/domain"
@@ -675,3 +676,90 @@ func filterThinkingBlocksInternal(body []byte, _ bool) []byte {
}
return newBody
}
// =========================
// Thinking Budget Rectifier
// =========================
const (
// BudgetRectifyBudgetTokens is the budget_tokens value to set when rectifying.
BudgetRectifyBudgetTokens = 32000
// BudgetRectifyMaxTokens is the max_tokens value to set when rectifying.
BudgetRectifyMaxTokens = 64000
// BudgetRectifyMinMaxTokens is the minimum max_tokens that must exceed budget_tokens.
BudgetRectifyMinMaxTokens = 32001
)
// isThinkingBudgetConstraintError detects whether an upstream error message indicates
// a budget_tokens constraint violation (e.g. "budget_tokens >= 1024").
// Matches three conditions (all must be true):
// 1. Contains "budget_tokens" or "budget tokens"
// 2. Contains "thinking"
// 3. Contains ">= 1024" or "greater than or equal to 1024" or ("1024" + "input should be")
func isThinkingBudgetConstraintError(errMsg string) bool {
m := strings.ToLower(errMsg)
// Condition 1: budget_tokens or budget tokens
hasBudget := strings.Contains(m, "budget_tokens") || strings.Contains(m, "budget tokens")
if !hasBudget {
return false
}
// Condition 2: thinking
if !strings.Contains(m, "thinking") {
return false
}
// Condition 3: constraint indicator
if strings.Contains(m, ">= 1024") || strings.Contains(m, "greater than or equal to 1024") {
return true
}
if strings.Contains(m, "1024") && strings.Contains(m, "input should be") {
return true
}
return false
}
// RectifyThinkingBudget modifies the request body to fix budget_tokens constraint errors.
// It sets thinking.budget_tokens = 32000, thinking.type = "enabled" (unless adaptive),
// and ensures max_tokens >= 32001.
// Returns (modified body, true) if changes were applied, or (original body, false) if not.
func RectifyThinkingBudget(body []byte) ([]byte, bool) {
// If thinking type is "adaptive", skip rectification entirely
thinkingType := gjson.GetBytes(body, "thinking.type").String()
if thinkingType == "adaptive" {
return body, false
}
modified := body
changed := false
// Set thinking.type = "enabled"
if thinkingType != "enabled" {
if result, err := sjson.SetBytes(modified, "thinking.type", "enabled"); err == nil {
modified = result
changed = true
}
}
// Set thinking.budget_tokens = 32000
currentBudget := gjson.GetBytes(modified, "thinking.budget_tokens").Int()
if currentBudget != BudgetRectifyBudgetTokens {
if result, err := sjson.SetBytes(modified, "thinking.budget_tokens", BudgetRectifyBudgetTokens); err == nil {
modified = result
changed = true
}
}
// Ensure max_tokens >= BudgetRectifyMinMaxTokens
maxTokens := gjson.GetBytes(modified, "max_tokens").Int()
if maxTokens < int64(BudgetRectifyMinMaxTokens) {
if result, err := sjson.SetBytes(modified, "max_tokens", BudgetRectifyMaxTokens); err == nil {
modified = result
changed = true
}
}
return modified, changed
}

View File

@@ -526,6 +526,7 @@ type GatewayService struct {
userGroupRateSF singleflight.Group
modelsListCache *gocache.Cache
modelsListCacheTTL time.Duration
settingService *SettingService
responseHeaderFilter *responseheaders.CompiledHeaderFilter
debugModelRouting atomic.Bool
debugClaudeMimic atomic.Bool
@@ -553,6 +554,7 @@ func NewGatewayService(
sessionLimitCache SessionLimitCache,
rpmCache RPMCache,
digestStore *DigestSessionStore,
settingService *SettingService,
) *GatewayService {
userGroupRateTTL := resolveUserGroupRateCacheTTL(cfg)
modelsListTTL := resolveModelsListCacheTTL(cfg)
@@ -579,6 +581,7 @@ func NewGatewayService(
sessionLimitCache: sessionLimitCache,
rpmCache: rpmCache,
userGroupRateCache: gocache.New(userGroupRateTTL, time.Minute),
settingService: settingService,
modelsListCache: gocache.New(modelsListTTL, time.Minute),
modelsListCacheTTL: modelsListTTL,
responseHeaderFilter: compileResponseHeaderFilter(cfg),
@@ -4069,7 +4072,7 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
if readErr == nil {
_ = resp.Body.Close()
if s.isThinkingBlockSignatureError(respBody) {
if s.isThinkingBlockSignatureError(respBody) && s.settingService.IsSignatureRectifierEnabled(ctx) {
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
@@ -4186,7 +4189,45 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
resp.Body = io.NopCloser(bytes.NewReader(respBody))
break
}
// 不是thinking签名错误恢复响应体
// 不是签名错误(或整流器已关闭),继续检查 budget 约束
errMsg := extractUpstreamErrorMessage(respBody)
if isThinkingBudgetConstraintError(errMsg) && s.settingService.IsBudgetRectifierEnabled(ctx) {
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: resp.StatusCode,
UpstreamRequestID: resp.Header.Get("x-request-id"),
Kind: "budget_constraint_error",
Message: errMsg,
Detail: func() string {
if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
return truncateString(string(respBody), s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes)
}
return ""
}(),
})
rectifiedBody, applied := RectifyThinkingBudget(body)
if applied && time.Since(retryStart) < maxRetryElapsed {
logger.LegacyPrintf("service.gateway", "Account %d: detected budget_tokens constraint error, retrying with rectified budget (budget_tokens=%d, max_tokens=%d)", account.ID, BudgetRectifyBudgetTokens, BudgetRectifyMaxTokens)
budgetRetryReq, buildErr := s.buildUpstreamRequest(ctx, c, account, rectifiedBody, token, tokenType, reqModel, reqStream, shouldMimicClaudeCode)
if buildErr == nil {
budgetRetryResp, retryErr := s.httpUpstream.DoWithTLS(budgetRetryReq, proxyURL, account.ID, account.Concurrency, account.IsTLSFingerprintEnabled())
if retryErr == nil {
resp = budgetRetryResp
break
}
if budgetRetryResp != nil && budgetRetryResp.Body != nil {
_ = budgetRetryResp.Body.Close()
}
logger.LegacyPrintf("service.gateway", "Account %d: budget rectifier retry failed: %v", account.ID, retryErr)
} else {
logger.LegacyPrintf("service.gateway", "Account %d: budget rectifier retry build failed: %v", account.ID, buildErr)
}
}
}
resp.Body = io.NopCloser(bytes.NewReader(respBody))
}
}
@@ -6928,7 +6969,7 @@ func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context,
}
// 检测 thinking block 签名错误400并重试一次过滤 thinking blocks
if resp.StatusCode == 400 && s.isThinkingBlockSignatureError(respBody) {
if resp.StatusCode == 400 && s.isThinkingBlockSignatureError(respBody) && s.settingService.IsSignatureRectifierEnabled(ctx) {
logger.LegacyPrintf("service.gateway", "Account %d: detected thinking block signature error on count_tokens, retrying with filtered thinking blocks", account.ID)
filteredBody := FilterThinkingBlocksForRetry(body)

View File

@@ -1194,6 +1194,59 @@ func (s *SettingService) GetMinClaudeCodeVersion(ctx context.Context) string {
return ver
}
// GetRectifierSettings 获取请求整流器配置
func (s *SettingService) GetRectifierSettings(ctx context.Context) (*RectifierSettings, error) {
value, err := s.settingRepo.GetValue(ctx, SettingKeyRectifierSettings)
if err != nil {
if errors.Is(err, ErrSettingNotFound) {
return DefaultRectifierSettings(), nil
}
return nil, fmt.Errorf("get rectifier settings: %w", err)
}
if value == "" {
return DefaultRectifierSettings(), nil
}
var settings RectifierSettings
if err := json.Unmarshal([]byte(value), &settings); err != nil {
return DefaultRectifierSettings(), nil
}
return &settings, nil
}
// SetRectifierSettings 设置请求整流器配置
func (s *SettingService) SetRectifierSettings(ctx context.Context, settings *RectifierSettings) error {
if settings == nil {
return fmt.Errorf("settings cannot be nil")
}
data, err := json.Marshal(settings)
if err != nil {
return fmt.Errorf("marshal rectifier settings: %w", err)
}
return s.settingRepo.Set(ctx, SettingKeyRectifierSettings, string(data))
}
// IsSignatureRectifierEnabled 判断签名整流是否启用(总开关 && 签名子开关)
func (s *SettingService) IsSignatureRectifierEnabled(ctx context.Context) bool {
settings, err := s.GetRectifierSettings(ctx)
if err != nil {
return true // fail-open: 查询失败时默认启用
}
return settings.Enabled && settings.ThinkingSignatureEnabled
}
// IsBudgetRectifierEnabled 判断 Budget 整流是否启用(总开关 && Budget 子开关)
func (s *SettingService) IsBudgetRectifierEnabled(ctx context.Context) bool {
settings, err := s.GetRectifierSettings(ctx)
if err != nil {
return true // fail-open: 查询失败时默认启用
}
return settings.Enabled && settings.ThinkingBudgetEnabled
}
// SetStreamTimeoutSettings 设置流超时处理配置
func (s *SettingService) SetStreamTimeoutSettings(ctx context.Context, settings *StreamTimeoutSettings) error {
if settings == nil {

View File

@@ -175,3 +175,19 @@ func DefaultStreamTimeoutSettings() *StreamTimeoutSettings {
ThresholdWindowMinutes: 10,
}
}
// RectifierSettings 请求整流器配置
type RectifierSettings struct {
Enabled bool `json:"enabled"` // 总开关
ThinkingSignatureEnabled bool `json:"thinking_signature_enabled"` // Thinking 签名整流
ThinkingBudgetEnabled bool `json:"thinking_budget_enabled"` // Thinking Budget 整流
}
// DefaultRectifierSettings 返回默认的整流器配置(全部启用)
func DefaultRectifierSettings() *RectifierSettings {
return &RectifierSettings{
Enabled: true,
ThinkingSignatureEnabled: true,
ThinkingBudgetEnabled: true,
}
}