[UPDATE] 增强 Claude Thinking 模式支持与 Opus 4.6 动态预算适配
✨ feat(antigravity): 支持 thinking adaptive 类型并适配 Opus 4.6 动态预算 🧪 test(gateway): 增加 thinking 模式解析与签名块过滤的边界用例测试
This commit is contained in:
@@ -27,8 +27,8 @@ type ClaudeMessage struct {
|
|||||||
|
|
||||||
// ThinkingConfig Thinking 配置
|
// ThinkingConfig Thinking 配置
|
||||||
type ThinkingConfig struct {
|
type ThinkingConfig struct {
|
||||||
Type string `json:"type"` // "enabled" or "disabled"
|
Type string `json:"type"` // "enabled" / "adaptive" / "disabled"
|
||||||
BudgetTokens int `json:"budget_tokens,omitempty"` // thinking budget
|
BudgetTokens int `json:"budget_tokens,omitempty"` // thinking budget(-1 表示动态预算)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ClaudeMetadata 请求元数据
|
// ClaudeMetadata 请求元数据
|
||||||
|
|||||||
@@ -64,6 +64,10 @@ const MaxTokensBudgetPadding = 1000
|
|||||||
// Gemini 2.5 Flash thinking budget 上限
|
// Gemini 2.5 Flash thinking budget 上限
|
||||||
const Gemini25FlashThinkingBudgetLimit = 24576
|
const Gemini25FlashThinkingBudgetLimit = 24576
|
||||||
|
|
||||||
|
// 对于 Antigravity 的 Claude(budget-only)模型,该语义最终等价为 thinkingBudget=24576。
|
||||||
|
// 这里复用相同数值以保持行为一致。
|
||||||
|
const ClaudeAdaptiveHighThinkingBudgetTokens = Gemini25FlashThinkingBudgetLimit
|
||||||
|
|
||||||
// ensureMaxTokensGreaterThanBudget 确保 max_tokens > budget_tokens
|
// ensureMaxTokensGreaterThanBudget 确保 max_tokens > budget_tokens
|
||||||
// Claude API 要求启用 thinking 时,max_tokens 必须大于 thinking.budget_tokens
|
// Claude API 要求启用 thinking 时,max_tokens 必须大于 thinking.budget_tokens
|
||||||
// 返回调整后的 maxTokens 和是否进行了调整
|
// 返回调整后的 maxTokens 和是否进行了调整
|
||||||
@@ -96,7 +100,7 @@ func TransformClaudeToGeminiWithOptions(claudeReq *ClaudeRequest, projectID, map
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 检测是否启用 thinking
|
// 检测是否启用 thinking
|
||||||
isThinkingEnabled := claudeReq.Thinking != nil && claudeReq.Thinking.Type == "enabled"
|
isThinkingEnabled := claudeReq.Thinking != nil && (claudeReq.Thinking.Type == "enabled" || claudeReq.Thinking.Type == "adaptive")
|
||||||
|
|
||||||
// 只有 Gemini 模型支持 dummy thought workaround
|
// 只有 Gemini 模型支持 dummy thought workaround
|
||||||
// Claude 模型通过 Vertex/Google API 需要有效的 thought signatures
|
// Claude 模型通过 Vertex/Google API 需要有效的 thought signatures
|
||||||
@@ -198,8 +202,7 @@ type modelInfo struct {
|
|||||||
|
|
||||||
// modelInfoMap 模型前缀 → 模型信息映射
|
// modelInfoMap 模型前缀 → 模型信息映射
|
||||||
// 只有在此映射表中的模型才会注入身份提示词
|
// 只有在此映射表中的模型才会注入身份提示词
|
||||||
// 注意:当前 claude-opus-4-6 会被映射到 claude-opus-4-5-thinking,
|
// 注意:模型映射逻辑在网关层完成;这里仅用于按模型前缀判断是否注入身份提示词。
|
||||||
// 但保留此条目以便后续 Antigravity 上游支持 4.6 时快速切换
|
|
||||||
var modelInfoMap = map[string]modelInfo{
|
var modelInfoMap = map[string]modelInfo{
|
||||||
"claude-opus-4-5": {DisplayName: "Claude Opus 4.5", CanonicalID: "claude-opus-4-5-20250929"},
|
"claude-opus-4-5": {DisplayName: "Claude Opus 4.5", CanonicalID: "claude-opus-4-5-20250929"},
|
||||||
"claude-opus-4-6": {DisplayName: "Claude Opus 4.6", CanonicalID: "claude-opus-4-6"},
|
"claude-opus-4-6": {DisplayName: "Claude Opus 4.6", CanonicalID: "claude-opus-4-6"},
|
||||||
@@ -593,6 +596,10 @@ func maxOutputTokensLimit(model string) int {
|
|||||||
return maxOutputTokensUpperBound
|
return maxOutputTokensUpperBound
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isAntigravityOpus46Model(model string) bool {
|
||||||
|
return strings.HasPrefix(strings.ToLower(model), "claude-opus-4-6")
|
||||||
|
}
|
||||||
|
|
||||||
func buildGenerationConfig(req *ClaudeRequest) *GeminiGenerationConfig {
|
func buildGenerationConfig(req *ClaudeRequest) *GeminiGenerationConfig {
|
||||||
maxLimit := maxOutputTokensLimit(req.Model)
|
maxLimit := maxOutputTokensLimit(req.Model)
|
||||||
config := &GeminiGenerationConfig{
|
config := &GeminiGenerationConfig{
|
||||||
@@ -606,25 +613,37 @@ func buildGenerationConfig(req *ClaudeRequest) *GeminiGenerationConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Thinking 配置
|
// Thinking 配置
|
||||||
if req.Thinking != nil && req.Thinking.Type == "enabled" {
|
if req.Thinking != nil && (req.Thinking.Type == "enabled" || req.Thinking.Type == "adaptive") {
|
||||||
config.ThinkingConfig = &GeminiThinkingConfig{
|
config.ThinkingConfig = &GeminiThinkingConfig{
|
||||||
IncludeThoughts: true,
|
IncludeThoughts: true,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// - thinking.type=enabled:budget_tokens>0 用显式预算;否则使用动态预算(-1)
|
||||||
|
// - thinking.type=adaptive:仅在 Antigravity 的 Opus 4.6 上覆写为 (24576)
|
||||||
|
// (CLIProxyAPI 先翻译为 thinkingLevel="high",再按模型能力转换为预算值)。
|
||||||
|
budget := -1
|
||||||
if req.Thinking.BudgetTokens > 0 {
|
if req.Thinking.BudgetTokens > 0 {
|
||||||
budget := req.Thinking.BudgetTokens
|
budget = req.Thinking.BudgetTokens
|
||||||
|
}
|
||||||
|
if req.Thinking.Type == "adaptive" && isAntigravityOpus46Model(req.Model) {
|
||||||
|
budget = ClaudeAdaptiveHighThinkingBudgetTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
// 正预算需要做上限与 max_tokens 约束;动态预算(-1)直接透传给上游。
|
||||||
|
if budget > 0 {
|
||||||
// gemini-2.5-flash 上限
|
// gemini-2.5-flash 上限
|
||||||
if strings.Contains(req.Model, "gemini-2.5-flash") && budget > Gemini25FlashThinkingBudgetLimit {
|
if strings.Contains(req.Model, "gemini-2.5-flash") && budget > Gemini25FlashThinkingBudgetLimit {
|
||||||
budget = Gemini25FlashThinkingBudgetLimit
|
budget = Gemini25FlashThinkingBudgetLimit
|
||||||
}
|
}
|
||||||
config.ThinkingConfig.ThinkingBudget = budget
|
|
||||||
|
|
||||||
// 自动修正:max_tokens 必须大于 budget_tokens
|
// 自动修正:max_tokens 必须大于 budget_tokens(Claude 上游要求)
|
||||||
if adjusted, ok := ensureMaxTokensGreaterThanBudget(config.MaxOutputTokens, budget); ok {
|
if adjusted, ok := ensureMaxTokensGreaterThanBudget(config.MaxOutputTokens, budget); ok {
|
||||||
log.Printf("[Antigravity] Auto-adjusted max_tokens from %d to %d (must be > budget_tokens=%d)",
|
log.Printf("[Antigravity] Auto-adjusted max_tokens from %d to %d (must be > budget_tokens=%d)",
|
||||||
config.MaxOutputTokens, adjusted, budget)
|
config.MaxOutputTokens, adjusted, budget)
|
||||||
config.MaxOutputTokens = adjusted
|
config.MaxOutputTokens = adjusted
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
config.ThinkingConfig.ThinkingBudget = budget
|
||||||
}
|
}
|
||||||
|
|
||||||
if config.MaxOutputTokens > maxLimit {
|
if config.MaxOutputTokens > maxLimit {
|
||||||
|
|||||||
@@ -259,3 +259,93 @@ func TestBuildTools_CustomTypeTools(t *testing.T) {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildGenerationConfig_ThinkingDynamicBudget(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
model string
|
||||||
|
thinking *ThinkingConfig
|
||||||
|
wantBudget int
|
||||||
|
wantPresent bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "enabled without budget defaults to dynamic (-1)",
|
||||||
|
model: "claude-opus-4-6-thinking",
|
||||||
|
thinking: &ThinkingConfig{Type: "enabled"},
|
||||||
|
wantBudget: -1,
|
||||||
|
wantPresent: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "enabled with budget uses the provided value",
|
||||||
|
model: "claude-opus-4-6-thinking",
|
||||||
|
thinking: &ThinkingConfig{Type: "enabled", BudgetTokens: 1024},
|
||||||
|
wantBudget: 1024,
|
||||||
|
wantPresent: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "enabled with -1 budget uses dynamic (-1)",
|
||||||
|
model: "claude-opus-4-6-thinking",
|
||||||
|
thinking: &ThinkingConfig{Type: "enabled", BudgetTokens: -1},
|
||||||
|
wantBudget: -1,
|
||||||
|
wantPresent: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "adaptive on opus4.6 maps to high budget (24576)",
|
||||||
|
model: "claude-opus-4-6-thinking",
|
||||||
|
thinking: &ThinkingConfig{Type: "adaptive", BudgetTokens: 20000},
|
||||||
|
wantBudget: ClaudeAdaptiveHighThinkingBudgetTokens,
|
||||||
|
wantPresent: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "adaptive on non-opus model keeps default dynamic (-1)",
|
||||||
|
model: "claude-sonnet-4-5-thinking",
|
||||||
|
thinking: &ThinkingConfig{Type: "adaptive"},
|
||||||
|
wantBudget: -1,
|
||||||
|
wantPresent: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "disabled does not emit thinkingConfig",
|
||||||
|
model: "claude-opus-4-6-thinking",
|
||||||
|
thinking: &ThinkingConfig{Type: "disabled", BudgetTokens: 1024},
|
||||||
|
wantBudget: 0,
|
||||||
|
wantPresent: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "nil thinking does not emit thinkingConfig",
|
||||||
|
model: "claude-opus-4-6-thinking",
|
||||||
|
thinking: nil,
|
||||||
|
wantBudget: 0,
|
||||||
|
wantPresent: false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
req := &ClaudeRequest{
|
||||||
|
Model: tt.model,
|
||||||
|
Thinking: tt.thinking,
|
||||||
|
}
|
||||||
|
cfg := buildGenerationConfig(req)
|
||||||
|
if cfg == nil {
|
||||||
|
t.Fatalf("expected non-nil generationConfig")
|
||||||
|
}
|
||||||
|
|
||||||
|
if tt.wantPresent {
|
||||||
|
if cfg.ThinkingConfig == nil {
|
||||||
|
t.Fatalf("expected thinkingConfig to be present")
|
||||||
|
}
|
||||||
|
if !cfg.ThinkingConfig.IncludeThoughts {
|
||||||
|
t.Fatalf("expected includeThoughts=true")
|
||||||
|
}
|
||||||
|
if cfg.ThinkingConfig.ThinkingBudget != tt.wantBudget {
|
||||||
|
t.Fatalf("expected thinkingBudget=%d, got %d", tt.wantBudget, cfg.ThinkingConfig.ThinkingBudget)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.ThinkingConfig != nil {
|
||||||
|
t.Fatalf("expected thinkingConfig to be nil, got %+v", cfg.ThinkingConfig)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -1309,7 +1309,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
|
|||||||
return nil, s.writeClaudeError(c, http.StatusForbidden, "permission_error", fmt.Sprintf("model %s not in whitelist", claudeReq.Model))
|
return nil, s.writeClaudeError(c, http.StatusForbidden, "permission_error", fmt.Sprintf("model %s not in whitelist", claudeReq.Model))
|
||||||
}
|
}
|
||||||
// 应用 thinking 模式自动后缀:如果 thinking 开启且目标是 claude-sonnet-4-5,自动改为 thinking 版本
|
// 应用 thinking 模式自动后缀:如果 thinking 开启且目标是 claude-sonnet-4-5,自动改为 thinking 版本
|
||||||
thinkingEnabled := claudeReq.Thinking != nil && claudeReq.Thinking.Type == "enabled"
|
thinkingEnabled := claudeReq.Thinking != nil && (claudeReq.Thinking.Type == "enabled" || claudeReq.Thinking.Type == "adaptive")
|
||||||
mappedModel = applyThinkingModelSuffix(mappedModel, thinkingEnabled)
|
mappedModel = applyThinkingModelSuffix(mappedModel, thinkingEnabled)
|
||||||
|
|
||||||
// 获取 access_token
|
// 获取 access_token
|
||||||
|
|||||||
@@ -101,9 +101,9 @@ func ParseGatewayRequest(body []byte, protocol string) (*ParsedRequest, error) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// thinking: {type: "enabled"}
|
// thinking: {type: "enabled" | "adaptive"}
|
||||||
if rawThinking, ok := req["thinking"].(map[string]any); ok {
|
if rawThinking, ok := req["thinking"].(map[string]any); ok {
|
||||||
if t, ok := rawThinking["type"].(string); ok && t == "enabled" {
|
if t, ok := rawThinking["type"].(string); ok && (t == "enabled" || t == "adaptive") {
|
||||||
parsed.ThinkingEnabled = true
|
parsed.ThinkingEnabled = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -161,9 +161,9 @@ func parseIntegralNumber(raw any) (int, bool) {
|
|||||||
// Returns filtered body or original body if filtering fails (fail-safe)
|
// Returns filtered body or original body if filtering fails (fail-safe)
|
||||||
// This prevents 400 errors from invalid thinking block signatures
|
// This prevents 400 errors from invalid thinking block signatures
|
||||||
//
|
//
|
||||||
// Strategy:
|
// 策略:
|
||||||
// - When thinking.type != "enabled": Remove all thinking blocks
|
// - 当 thinking.type 不是 "enabled"/"adaptive":移除所有 thinking 相关块
|
||||||
// - When thinking.type == "enabled": Only remove thinking blocks without valid signatures
|
// - 当 thinking.type 是 "enabled"/"adaptive":仅移除缺失/无效 signature 的 thinking 块(避免 400)
|
||||||
// (blocks with missing/empty/dummy signatures that would cause 400 errors)
|
// (blocks with missing/empty/dummy signatures that would cause 400 errors)
|
||||||
func FilterThinkingBlocks(body []byte) []byte {
|
func FilterThinkingBlocks(body []byte) []byte {
|
||||||
return filterThinkingBlocksInternal(body, false)
|
return filterThinkingBlocksInternal(body, false)
|
||||||
@@ -489,9 +489,9 @@ func FilterSignatureSensitiveBlocksForRetry(body []byte) []byte {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// filterThinkingBlocksInternal removes invalid thinking blocks from request
|
// filterThinkingBlocksInternal removes invalid thinking blocks from request
|
||||||
// Strategy:
|
// 策略:
|
||||||
// - When thinking.type != "enabled": Remove all thinking blocks
|
// - 当 thinking.type 不是 "enabled"/"adaptive":移除所有 thinking 相关块
|
||||||
// - When thinking.type == "enabled": Only remove thinking blocks without valid signatures
|
// - 当 thinking.type 是 "enabled"/"adaptive":仅移除缺失/无效 signature 的 thinking 块
|
||||||
func filterThinkingBlocksInternal(body []byte, _ bool) []byte {
|
func filterThinkingBlocksInternal(body []byte, _ bool) []byte {
|
||||||
// Fast path: if body doesn't contain "thinking", skip parsing
|
// Fast path: if body doesn't contain "thinking", skip parsing
|
||||||
if !bytes.Contains(body, []byte(`"type":"thinking"`)) &&
|
if !bytes.Contains(body, []byte(`"type":"thinking"`)) &&
|
||||||
@@ -511,7 +511,7 @@ func filterThinkingBlocksInternal(body []byte, _ bool) []byte {
|
|||||||
// Check if thinking is enabled
|
// Check if thinking is enabled
|
||||||
thinkingEnabled := false
|
thinkingEnabled := false
|
||||||
if thinking, ok := req["thinking"].(map[string]any); ok {
|
if thinking, ok := req["thinking"].(map[string]any); ok {
|
||||||
if thinkType, ok := thinking["type"].(string); ok && thinkType == "enabled" {
|
if thinkType, ok := thinking["type"].(string); ok && (thinkType == "enabled" || thinkType == "adaptive") {
|
||||||
thinkingEnabled = true
|
thinkingEnabled = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,6 +29,14 @@ func TestParseGatewayRequest_ThinkingEnabled(t *testing.T) {
|
|||||||
require.True(t, parsed.ThinkingEnabled)
|
require.True(t, parsed.ThinkingEnabled)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseGatewayRequest_ThinkingAdaptiveEnabled(t *testing.T) {
|
||||||
|
body := []byte(`{"model":"claude-sonnet-4-5","thinking":{"type":"adaptive"},"messages":[{"content":"hi"}]}`)
|
||||||
|
parsed, err := ParseGatewayRequest(body, "")
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, "claude-sonnet-4-5", parsed.Model)
|
||||||
|
require.True(t, parsed.ThinkingEnabled)
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseGatewayRequest_MaxTokens(t *testing.T) {
|
func TestParseGatewayRequest_MaxTokens(t *testing.T) {
|
||||||
body := []byte(`{"model":"claude-haiku-4-5","max_tokens":1}`)
|
body := []byte(`{"model":"claude-haiku-4-5","max_tokens":1}`)
|
||||||
parsed, err := ParseGatewayRequest(body, "")
|
parsed, err := ParseGatewayRequest(body, "")
|
||||||
@@ -209,6 +217,16 @@ func TestFilterThinkingBlocks(t *testing.T) {
|
|||||||
input: `{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Hello"},{"type":"thinking","thinking":"internal","signature":"invalid"},{"type":"text","text":"World"}]}]}`,
|
input: `{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Hello"},{"type":"thinking","thinking":"internal","signature":"invalid"},{"type":"text","text":"World"}]}]}`,
|
||||||
shouldFilter: true,
|
shouldFilter: true,
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "does not filter signed thinking blocks when thinking adaptive",
|
||||||
|
input: `{"thinking":{"type":"adaptive"},"messages":[{"role":"assistant","content":[{"type":"thinking","thinking":"ok","signature":"sig_real_123"},{"type":"text","text":"B"}]}]}`,
|
||||||
|
shouldFilter: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "filters unsigned thinking blocks when thinking adaptive",
|
||||||
|
input: `{"thinking":{"type":"adaptive"},"messages":[{"role":"assistant","content":[{"type":"thinking","thinking":"internal","signature":""},{"type":"text","text":"B"}]}]}`,
|
||||||
|
shouldFilter: true,
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "handles no thinking blocks",
|
name: "handles no thinking blocks",
|
||||||
input: `{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Hello"}]}]}`,
|
input: `{"model":"claude-3-5-sonnet-20241022","messages":[{"role":"user","content":[{"type":"text","text":"Hello"}]}]}`,
|
||||||
|
|||||||
@@ -3631,7 +3631,8 @@ func requestNeedsBetaFeatures(body []byte) bool {
|
|||||||
if tools.Exists() && tools.IsArray() && len(tools.Array()) > 0 {
|
if tools.Exists() && tools.IsArray() && len(tools.Array()) > 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if strings.EqualFold(gjson.GetBytes(body, "thinking.type").String(), "enabled") {
|
thinkingType := gjson.GetBytes(body, "thinking.type").String()
|
||||||
|
if strings.EqualFold(thinkingType, "enabled") || strings.EqualFold(thinkingType, "adaptive") {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|||||||
Reference in New Issue
Block a user