Merge branch 'Wei-Shaw:main' into main
This commit is contained in:
@@ -27,7 +27,7 @@ type ClaudeMessage struct {
|
||||
|
||||
// ThinkingConfig Thinking 配置
|
||||
type ThinkingConfig struct {
|
||||
Type string `json:"type"` // "enabled" or "disabled"
|
||||
Type string `json:"type"` // "enabled" / "adaptive" / "disabled"
|
||||
BudgetTokens int `json:"budget_tokens,omitempty"` // thinking budget
|
||||
}
|
||||
|
||||
|
||||
@@ -155,6 +155,7 @@ type GeminiUsageMetadata struct {
|
||||
CandidatesTokenCount int `json:"candidatesTokenCount,omitempty"`
|
||||
CachedContentTokenCount int `json:"cachedContentTokenCount,omitempty"`
|
||||
TotalTokenCount int `json:"totalTokenCount,omitempty"`
|
||||
ThoughtsTokenCount int `json:"thoughtsTokenCount,omitempty"` // thinking tokens(按输出价格计费)
|
||||
}
|
||||
|
||||
// GeminiGroundingMetadata Gemini grounding 元数据(Web Search)
|
||||
|
||||
@@ -64,6 +64,10 @@ const MaxTokensBudgetPadding = 1000
|
||||
// Gemini 2.5 Flash thinking budget 上限
|
||||
const Gemini25FlashThinkingBudgetLimit = 24576
|
||||
|
||||
// 对于 Antigravity 的 Claude(budget-only)模型,该语义最终等价为 thinkingBudget=24576。
|
||||
// 这里复用相同数值以保持行为一致。
|
||||
const ClaudeAdaptiveHighThinkingBudgetTokens = Gemini25FlashThinkingBudgetLimit
|
||||
|
||||
// ensureMaxTokensGreaterThanBudget 确保 max_tokens > budget_tokens
|
||||
// Claude API 要求启用 thinking 时,max_tokens 必须大于 thinking.budget_tokens
|
||||
// 返回调整后的 maxTokens 和是否进行了调整
|
||||
@@ -96,7 +100,7 @@ func TransformClaudeToGeminiWithOptions(claudeReq *ClaudeRequest, projectID, map
|
||||
}
|
||||
|
||||
// 检测是否启用 thinking
|
||||
isThinkingEnabled := claudeReq.Thinking != nil && claudeReq.Thinking.Type == "enabled"
|
||||
isThinkingEnabled := claudeReq.Thinking != nil && (claudeReq.Thinking.Type == "enabled" || claudeReq.Thinking.Type == "adaptive")
|
||||
|
||||
// 只有 Gemini 模型支持 dummy thought workaround
|
||||
// Claude 模型通过 Vertex/Google API 需要有效的 thought signatures
|
||||
@@ -198,8 +202,7 @@ type modelInfo struct {
|
||||
|
||||
// modelInfoMap 模型前缀 → 模型信息映射
|
||||
// 只有在此映射表中的模型才会注入身份提示词
|
||||
// 注意:当前 claude-opus-4-6 会被映射到 claude-opus-4-5-thinking,
|
||||
// 但保留此条目以便后续 Antigravity 上游支持 4.6 时快速切换
|
||||
// 注意:模型映射逻辑在网关层完成;这里仅用于按模型前缀判断是否注入身份提示词。
|
||||
var modelInfoMap = map[string]modelInfo{
|
||||
"claude-opus-4-5": {DisplayName: "Claude Opus 4.5", CanonicalID: "claude-opus-4-5-20250929"},
|
||||
"claude-opus-4-6": {DisplayName: "Claude Opus 4.6", CanonicalID: "claude-opus-4-6"},
|
||||
@@ -593,6 +596,10 @@ func maxOutputTokensLimit(model string) int {
|
||||
return maxOutputTokensUpperBound
|
||||
}
|
||||
|
||||
func isAntigravityOpus46Model(model string) bool {
|
||||
return strings.HasPrefix(strings.ToLower(model), "claude-opus-4-6")
|
||||
}
|
||||
|
||||
func buildGenerationConfig(req *ClaudeRequest) *GeminiGenerationConfig {
|
||||
maxLimit := maxOutputTokensLimit(req.Model)
|
||||
config := &GeminiGenerationConfig{
|
||||
@@ -606,25 +613,36 @@ func buildGenerationConfig(req *ClaudeRequest) *GeminiGenerationConfig {
|
||||
}
|
||||
|
||||
// Thinking 配置
|
||||
if req.Thinking != nil && req.Thinking.Type == "enabled" {
|
||||
if req.Thinking != nil && (req.Thinking.Type == "enabled" || req.Thinking.Type == "adaptive") {
|
||||
config.ThinkingConfig = &GeminiThinkingConfig{
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
|
||||
// - thinking.type=enabled:budget_tokens>0 用显式预算
|
||||
// - thinking.type=adaptive:仅在 Antigravity 的 Opus 4.6 上覆写为 (24576)
|
||||
budget := -1
|
||||
if req.Thinking.BudgetTokens > 0 {
|
||||
budget := req.Thinking.BudgetTokens
|
||||
budget = req.Thinking.BudgetTokens
|
||||
}
|
||||
if req.Thinking.Type == "adaptive" && isAntigravityOpus46Model(req.Model) {
|
||||
budget = ClaudeAdaptiveHighThinkingBudgetTokens
|
||||
}
|
||||
|
||||
// 正预算需要做上限与 max_tokens 约束;动态预算(-1)直接透传给上游。
|
||||
if budget > 0 {
|
||||
// gemini-2.5-flash 上限
|
||||
if strings.Contains(req.Model, "gemini-2.5-flash") && budget > Gemini25FlashThinkingBudgetLimit {
|
||||
budget = Gemini25FlashThinkingBudgetLimit
|
||||
}
|
||||
config.ThinkingConfig.ThinkingBudget = budget
|
||||
|
||||
// 自动修正:max_tokens 必须大于 budget_tokens
|
||||
// 自动修正:max_tokens 必须大于 budget_tokens(Claude 上游要求)
|
||||
if adjusted, ok := ensureMaxTokensGreaterThanBudget(config.MaxOutputTokens, budget); ok {
|
||||
log.Printf("[Antigravity] Auto-adjusted max_tokens from %d to %d (must be > budget_tokens=%d)",
|
||||
config.MaxOutputTokens, adjusted, budget)
|
||||
config.MaxOutputTokens = adjusted
|
||||
}
|
||||
}
|
||||
config.ThinkingConfig.ThinkingBudget = budget
|
||||
}
|
||||
|
||||
if config.MaxOutputTokens > maxLimit {
|
||||
|
||||
@@ -259,3 +259,93 @@ func TestBuildTools_CustomTypeTools(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildGenerationConfig_ThinkingDynamicBudget(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
thinking *ThinkingConfig
|
||||
wantBudget int
|
||||
wantPresent bool
|
||||
}{
|
||||
{
|
||||
name: "enabled without budget defaults to dynamic (-1)",
|
||||
model: "claude-opus-4-6-thinking",
|
||||
thinking: &ThinkingConfig{Type: "enabled"},
|
||||
wantBudget: -1,
|
||||
wantPresent: true,
|
||||
},
|
||||
{
|
||||
name: "enabled with budget uses the provided value",
|
||||
model: "claude-opus-4-6-thinking",
|
||||
thinking: &ThinkingConfig{Type: "enabled", BudgetTokens: 1024},
|
||||
wantBudget: 1024,
|
||||
wantPresent: true,
|
||||
},
|
||||
{
|
||||
name: "enabled with -1 budget uses dynamic (-1)",
|
||||
model: "claude-opus-4-6-thinking",
|
||||
thinking: &ThinkingConfig{Type: "enabled", BudgetTokens: -1},
|
||||
wantBudget: -1,
|
||||
wantPresent: true,
|
||||
},
|
||||
{
|
||||
name: "adaptive on opus4.6 maps to high budget (24576)",
|
||||
model: "claude-opus-4-6-thinking",
|
||||
thinking: &ThinkingConfig{Type: "adaptive", BudgetTokens: 20000},
|
||||
wantBudget: ClaudeAdaptiveHighThinkingBudgetTokens,
|
||||
wantPresent: true,
|
||||
},
|
||||
{
|
||||
name: "adaptive on non-opus model keeps default dynamic (-1)",
|
||||
model: "claude-sonnet-4-5-thinking",
|
||||
thinking: &ThinkingConfig{Type: "adaptive"},
|
||||
wantBudget: -1,
|
||||
wantPresent: true,
|
||||
},
|
||||
{
|
||||
name: "disabled does not emit thinkingConfig",
|
||||
model: "claude-opus-4-6-thinking",
|
||||
thinking: &ThinkingConfig{Type: "disabled", BudgetTokens: 1024},
|
||||
wantBudget: 0,
|
||||
wantPresent: false,
|
||||
},
|
||||
{
|
||||
name: "nil thinking does not emit thinkingConfig",
|
||||
model: "claude-opus-4-6-thinking",
|
||||
thinking: nil,
|
||||
wantBudget: 0,
|
||||
wantPresent: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
req := &ClaudeRequest{
|
||||
Model: tt.model,
|
||||
Thinking: tt.thinking,
|
||||
}
|
||||
cfg := buildGenerationConfig(req)
|
||||
if cfg == nil {
|
||||
t.Fatalf("expected non-nil generationConfig")
|
||||
}
|
||||
|
||||
if tt.wantPresent {
|
||||
if cfg.ThinkingConfig == nil {
|
||||
t.Fatalf("expected thinkingConfig to be present")
|
||||
}
|
||||
if !cfg.ThinkingConfig.IncludeThoughts {
|
||||
t.Fatalf("expected includeThoughts=true")
|
||||
}
|
||||
if cfg.ThinkingConfig.ThinkingBudget != tt.wantBudget {
|
||||
t.Fatalf("expected thinkingBudget=%d, got %d", tt.wantBudget, cfg.ThinkingConfig.ThinkingBudget)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
if cfg.ThinkingConfig != nil {
|
||||
t.Fatalf("expected thinkingConfig to be nil, got %+v", cfg.ThinkingConfig)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,7 +280,7 @@ func (p *NonStreamingProcessor) buildResponse(geminiResp *GeminiResponse, respon
|
||||
if geminiResp.UsageMetadata != nil {
|
||||
cached := geminiResp.UsageMetadata.CachedContentTokenCount
|
||||
usage.InputTokens = geminiResp.UsageMetadata.PromptTokenCount - cached
|
||||
usage.OutputTokens = geminiResp.UsageMetadata.CandidatesTokenCount
|
||||
usage.OutputTokens = geminiResp.UsageMetadata.CandidatesTokenCount + geminiResp.UsageMetadata.ThoughtsTokenCount
|
||||
usage.CacheReadInputTokens = cached
|
||||
}
|
||||
|
||||
|
||||
@@ -85,7 +85,7 @@ func (p *StreamingProcessor) ProcessLine(line string) []byte {
|
||||
if geminiResp.UsageMetadata != nil {
|
||||
cached := geminiResp.UsageMetadata.CachedContentTokenCount
|
||||
p.inputTokens = geminiResp.UsageMetadata.PromptTokenCount - cached
|
||||
p.outputTokens = geminiResp.UsageMetadata.CandidatesTokenCount
|
||||
p.outputTokens = geminiResp.UsageMetadata.CandidatesTokenCount + geminiResp.UsageMetadata.ThoughtsTokenCount
|
||||
p.cacheReadTokens = cached
|
||||
}
|
||||
|
||||
@@ -146,7 +146,7 @@ func (p *StreamingProcessor) emitMessageStart(v1Resp *V1InternalResponse) []byte
|
||||
if v1Resp.Response.UsageMetadata != nil {
|
||||
cached := v1Resp.Response.UsageMetadata.CachedContentTokenCount
|
||||
usage.InputTokens = v1Resp.Response.UsageMetadata.PromptTokenCount - cached
|
||||
usage.OutputTokens = v1Resp.Response.UsageMetadata.CandidatesTokenCount
|
||||
usage.OutputTokens = v1Resp.Response.UsageMetadata.CandidatesTokenCount + v1Resp.Response.UsageMetadata.ThoughtsTokenCount
|
||||
usage.CacheReadInputTokens = cached
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user