feat(antigravity): 增强网关功能和 thinking 块处理

主要改进:
- 优化 thinking blocks 过滤策略,支持 Auto 模式降级
- 将无效 thinking block 内容转为普通 text
- 保留单个空白 text block,不过滤
- 重构配额刷新机制,统一与 Claude 一致
- 支持 cachedContentTokenCount 映射到 cache_read_input_tokens
- Haiku 模型映射到 Sonnet
- 添加 /antigravity/v1/models 端点支持
- countTokens 端点直接返回空值
This commit is contained in:
ianshaw
2026-01-03 06:29:02 -08:00
parent df1ef3deb6
commit 26438f7232
15 changed files with 463 additions and 358 deletions

View File

@@ -29,8 +29,9 @@ type StreamingProcessor struct {
originalModel string
// 累计 usage
inputTokens int
outputTokens int
inputTokens int
outputTokens int
cacheReadTokens int
}
// NewStreamingProcessor 创建流式响应处理器
@@ -76,9 +77,17 @@ func (p *StreamingProcessor) ProcessLine(line string) []byte {
}
// 更新 usage
// 注意Gemini 的 promptTokenCount 包含 cachedContentTokenCount
// 但 Claude 的 input_tokens 不包含 cache_read_input_tokens需要减去
if geminiResp.UsageMetadata != nil {
p.inputTokens = geminiResp.UsageMetadata.PromptTokenCount
cached := geminiResp.UsageMetadata.CachedContentTokenCount
prompt := geminiResp.UsageMetadata.PromptTokenCount
if cached > prompt {
cached = prompt
}
p.inputTokens = prompt - cached
p.outputTokens = geminiResp.UsageMetadata.CandidatesTokenCount
p.cacheReadTokens = cached
}
// 处理 parts
@@ -108,8 +117,9 @@ func (p *StreamingProcessor) Finish() ([]byte, *ClaudeUsage) {
}
usage := &ClaudeUsage{
InputTokens: p.inputTokens,
OutputTokens: p.outputTokens,
InputTokens: p.inputTokens,
OutputTokens: p.outputTokens,
CacheReadInputTokens: p.cacheReadTokens,
}
return result.Bytes(), usage
@@ -123,8 +133,14 @@ func (p *StreamingProcessor) emitMessageStart(v1Resp *V1InternalResponse) []byte
usage := ClaudeUsage{}
if v1Resp.Response.UsageMetadata != nil {
usage.InputTokens = v1Resp.Response.UsageMetadata.PromptTokenCount
cached := v1Resp.Response.UsageMetadata.CachedContentTokenCount
prompt := v1Resp.Response.UsageMetadata.PromptTokenCount
if cached > prompt {
cached = prompt
}
usage.InputTokens = prompt - cached
usage.OutputTokens = v1Resp.Response.UsageMetadata.CandidatesTokenCount
usage.CacheReadInputTokens = cached
}
responseID := v1Resp.ResponseID
@@ -418,8 +434,9 @@ func (p *StreamingProcessor) emitFinish(finishReason string) []byte {
}
usage := ClaudeUsage{
InputTokens: p.inputTokens,
OutputTokens: p.outputTokens,
InputTokens: p.inputTokens,
OutputTokens: p.outputTokens,
CacheReadInputTokens: p.cacheReadTokens,
}
deltaEvent := map[string]any{