feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops
Key changes: - Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching - Unified rate limiting: scope-level → model-level with Redis snapshot sync - Load-balanced scheduling by call count with smart retry mechanism - Force cache billing support - Model identity injection in prompts with leak prevention - Thinking mode auto-handling (max_tokens/budget_tokens fix) - Frontend: whitelist mode toggle, model mapping validation, status indicators - Gemini session fallback with Redis Trie O(L) matching - Ops: enhanced concurrency monitoring, account availability, retry logic - Migration scripts: 049-051 for model mapping unification
This commit is contained in:
@@ -200,7 +200,7 @@ func (s *GeminiMessagesCompatService) tryStickySessionHit(
|
||||
|
||||
// 检查账号是否需要清理粘性会话
|
||||
// Check if sticky session should be cleared
|
||||
if shouldClearStickySession(account) {
|
||||
if shouldClearStickySession(account, requestedModel) {
|
||||
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), cacheKey)
|
||||
return nil
|
||||
}
|
||||
@@ -230,7 +230,7 @@ func (s *GeminiMessagesCompatService) isAccountUsableForRequest(
|
||||
) bool {
|
||||
// 检查模型调度能力
|
||||
// Check model scheduling capability
|
||||
if !account.IsSchedulableForModel(requestedModel) {
|
||||
if !account.IsSchedulableForModelWithContext(ctx, requestedModel) {
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -1498,28 +1498,6 @@ func (s *GeminiMessagesCompatService) writeGeminiMappedError(c *gin.Context, acc
|
||||
log.Printf("[Gemini] upstream error %d: %s", upstreamStatus, truncateForLog(body, s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes))
|
||||
}
|
||||
|
||||
if status, errType, errMsg, matched := applyErrorPassthroughRule(
|
||||
c,
|
||||
PlatformGemini,
|
||||
upstreamStatus,
|
||||
body,
|
||||
http.StatusBadGateway,
|
||||
"upstream_error",
|
||||
"Upstream request failed",
|
||||
); matched {
|
||||
c.JSON(status, gin.H{
|
||||
"type": "error",
|
||||
"error": gin.H{"type": errType, "message": errMsg},
|
||||
})
|
||||
if upstreamMsg == "" {
|
||||
upstreamMsg = errMsg
|
||||
}
|
||||
if upstreamMsg == "" {
|
||||
return fmt.Errorf("upstream error: %d (passthrough rule matched)", upstreamStatus)
|
||||
}
|
||||
return fmt.Errorf("upstream error: %d (passthrough rule matched) message=%s", upstreamStatus, upstreamMsg)
|
||||
}
|
||||
|
||||
var statusCode int
|
||||
var errType, errMsg string
|
||||
|
||||
@@ -2658,7 +2636,9 @@ func ParseGeminiRateLimitResetTime(body []byte) *int64 {
|
||||
if meta, ok := dm["metadata"].(map[string]any); ok {
|
||||
if v, ok := meta["quotaResetDelay"].(string); ok {
|
||||
if dur, err := time.ParseDuration(v); err == nil {
|
||||
ts := time.Now().Unix() + int64(dur.Seconds())
|
||||
// Use ceil to avoid undercounting fractional seconds (e.g. 10.1s should not become 10s),
|
||||
// which can affect scheduling decisions around thresholds (like 10s).
|
||||
ts := time.Now().Unix() + int64(math.Ceil(dur.Seconds()))
|
||||
return &ts
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user