fix(antigravity): bill with mapped model and use final model key for rate limiting

- Use mapped model (billingModel) instead of original request model for billing
- Use resolveFinalAntigravityModelKey for 429 rate limit model key,
  ensuring rate limit records match the actual upstream model
- Add regression tests for both fixes
This commit is contained in:
erio
2026-02-24 18:08:19 +08:00
parent 09166a52f8
commit 4573868c08
3 changed files with 172 additions and 9 deletions

View File

@@ -87,7 +87,6 @@ var (
)
const (
antigravityBillingModelEnv = "GATEWAY_ANTIGRAVITY_BILL_WITH_MAPPED_MODEL"
antigravityForwardBaseURLEnv = "GATEWAY_ANTIGRAVITY_FORWARD_BASE_URL"
antigravityFallbackSecondsEnv = "GATEWAY_ANTIGRAVITY_FALLBACK_COOLDOWN_SECONDS"
)
@@ -1309,6 +1308,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
// 应用 thinking 模式自动后缀:如果 thinking 开启且目标是 claude-sonnet-4-5自动改为 thinking 版本
thinkingEnabled := claudeReq.Thinking != nil && (claudeReq.Thinking.Type == "enabled" || claudeReq.Thinking.Type == "adaptive")
mappedModel = applyThinkingModelSuffix(mappedModel, thinkingEnabled)
billingModel := mappedModel
// 获取 access_token
if s.tokenProvider == nil {
@@ -1622,7 +1622,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
return &ForwardResult{
RequestID: requestID,
Usage: *usage,
Model: originalModel, // 使用原始模型用于计费和日志
Model: billingModel, // 使用映射模型用于计费和日志
Stream: claudeReq.Stream,
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
@@ -1976,6 +1976,7 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
if mappedModel == "" {
return nil, s.writeGoogleError(c, http.StatusForbidden, fmt.Sprintf("model %s not in whitelist", originalModel))
}
billingModel := mappedModel
// 获取 access_token
if s.tokenProvider == nil {
@@ -2205,7 +2206,7 @@ handleSuccess:
return &ForwardResult{
RequestID: requestID,
Usage: *usage,
Model: originalModel,
Model: billingModel,
Stream: stream,
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
@@ -2650,7 +2651,16 @@ func (s *AntigravityGatewayService) handleUpstreamError(
defaultDur := s.getDefaultRateLimitDuration()
// 尝试解析模型 key 并设置模型级限流
modelKey := resolveAntigravityModelKey(requestedModel)
//
// 注意requestedModel 可能是"映射前"的请求模型名(例如 claude-opus-4-6
// 调度与限流判定使用的是 Antigravity 最终模型名(包含映射与 thinking 后缀)。
// 因此这里必须写入最终模型 key确保后续调度能正确避开已限流模型。
modelKey := resolveFinalAntigravityModelKey(ctx, account, requestedModel)
if strings.TrimSpace(modelKey) == "" {
// 极少数情况下无法映射(理论上不应发生:能转发成功说明映射已通过),
// 保持旧行为作为兜底,避免完全丢失模型级限流记录。
modelKey = resolveAntigravityModelKey(requestedModel)
}
if modelKey != "" {
ra := s.resolveResetTime(resetAt, defaultDur)
if err := s.accountRepo.SetModelRateLimit(ctx, account.ID, modelKey, ra); err != nil {
@@ -3889,7 +3899,6 @@ func (s *AntigravityGatewayService) ForwardUpstream(ctx context.Context, c *gin.
return nil, fmt.Errorf("missing model")
}
originalModel := claudeReq.Model
billingModel := originalModel
// 构建上游请求 URL
upstreamURL := baseURL + "/v1/messages"
@@ -3942,7 +3951,7 @@ func (s *AntigravityGatewayService) ForwardUpstream(ctx context.Context, c *gin.
_, _ = c.Writer.Write(respBody)
return &ForwardResult{
Model: billingModel,
Model: originalModel,
}, nil
}
@@ -3983,7 +3992,7 @@ func (s *AntigravityGatewayService) ForwardUpstream(ctx context.Context, c *gin.
logger.LegacyPrintf("service.antigravity_gateway", "%s status=success duration_ms=%d", prefix, duration.Milliseconds())
return &ForwardResult{
Model: billingModel,
Model: originalModel,
Stream: claudeReq.Stream,
Duration: duration,
FirstTokenMs: firstTokenMs,