Key changes: - Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching - Unified rate limiting: scope-level → model-level with Redis snapshot sync - Load-balanced scheduling by call count with smart retry mechanism - Force cache billing support - Model identity injection in prompts with leak prevention - Thinking mode auto-handling (max_tokens/budget_tokens fix) - Frontend: whitelist mode toggle, model mapping validation, status indicators - Gemini session fallback with Redis Trie O(L) matching - Ops: enhanced concurrency monitoring, account availability, retry logic - Migration scripts: 049-051 for model mapping unification
104 lines
2.8 KiB
Go
104 lines
2.8 KiB
Go
package service
|
||
|
||
import (
|
||
"context"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
|
||
)
|
||
|
||
const modelRateLimitsKey = "model_rate_limits"
|
||
|
||
// isRateLimitActiveForKey 检查指定 key 的限流是否生效
|
||
func (a *Account) isRateLimitActiveForKey(key string) bool {
|
||
resetAt := a.modelRateLimitResetAt(key)
|
||
return resetAt != nil && time.Now().Before(*resetAt)
|
||
}
|
||
|
||
// getRateLimitRemainingForKey 获取指定 key 的限流剩余时间,0 表示未限流或已过期
|
||
func (a *Account) getRateLimitRemainingForKey(key string) time.Duration {
|
||
resetAt := a.modelRateLimitResetAt(key)
|
||
if resetAt == nil {
|
||
return 0
|
||
}
|
||
remaining := time.Until(*resetAt)
|
||
if remaining > 0 {
|
||
return remaining
|
||
}
|
||
return 0
|
||
}
|
||
|
||
func (a *Account) isModelRateLimitedWithContext(ctx context.Context, requestedModel string) bool {
|
||
if a == nil {
|
||
return false
|
||
}
|
||
|
||
modelKey := a.GetMappedModel(requestedModel)
|
||
if a.Platform == PlatformAntigravity {
|
||
modelKey = resolveFinalAntigravityModelKey(ctx, a, requestedModel)
|
||
}
|
||
modelKey = strings.TrimSpace(modelKey)
|
||
if modelKey == "" {
|
||
return false
|
||
}
|
||
return a.isRateLimitActiveForKey(modelKey)
|
||
}
|
||
|
||
// GetModelRateLimitRemainingTime 获取模型限流剩余时间
|
||
// 返回 0 表示未限流或已过期
|
||
func (a *Account) GetModelRateLimitRemainingTime(requestedModel string) time.Duration {
|
||
return a.GetModelRateLimitRemainingTimeWithContext(context.Background(), requestedModel)
|
||
}
|
||
|
||
func (a *Account) GetModelRateLimitRemainingTimeWithContext(ctx context.Context, requestedModel string) time.Duration {
|
||
if a == nil {
|
||
return 0
|
||
}
|
||
|
||
modelKey := a.GetMappedModel(requestedModel)
|
||
if a.Platform == PlatformAntigravity {
|
||
modelKey = resolveFinalAntigravityModelKey(ctx, a, requestedModel)
|
||
}
|
||
modelKey = strings.TrimSpace(modelKey)
|
||
if modelKey == "" {
|
||
return 0
|
||
}
|
||
return a.getRateLimitRemainingForKey(modelKey)
|
||
}
|
||
|
||
func resolveFinalAntigravityModelKey(ctx context.Context, account *Account, requestedModel string) string {
|
||
modelKey := mapAntigravityModel(account, requestedModel)
|
||
if modelKey == "" {
|
||
return ""
|
||
}
|
||
// thinking 会影响 Antigravity 最终模型名(例如 claude-sonnet-4-5 -> claude-sonnet-4-5-thinking)
|
||
if enabled, ok := ctx.Value(ctxkey.ThinkingEnabled).(bool); ok {
|
||
modelKey = applyThinkingModelSuffix(modelKey, enabled)
|
||
}
|
||
return modelKey
|
||
}
|
||
|
||
func (a *Account) modelRateLimitResetAt(scope string) *time.Time {
|
||
if a == nil || a.Extra == nil || scope == "" {
|
||
return nil
|
||
}
|
||
rawLimits, ok := a.Extra[modelRateLimitsKey].(map[string]any)
|
||
if !ok {
|
||
return nil
|
||
}
|
||
rawLimit, ok := rawLimits[scope].(map[string]any)
|
||
if !ok {
|
||
return nil
|
||
}
|
||
resetAtRaw, ok := rawLimit["rate_limit_reset_at"].(string)
|
||
if !ok || strings.TrimSpace(resetAtRaw) == "" {
|
||
return nil
|
||
}
|
||
resetAt, err := time.Parse(time.RFC3339, resetAtRaw)
|
||
if err != nil {
|
||
return nil
|
||
}
|
||
return &resetAt
|
||
}
|