This commit is contained in:
kyx236
2026-03-04 20:25:39 +08:00
738 changed files with 138970 additions and 39525 deletions

View File

@@ -11,6 +11,7 @@ import (
"time"
"github.com/Wei-Shaw/sub2api/internal/config"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
)
// RateLimitService 处理限流和过载状态管理
@@ -33,6 +34,10 @@ type geminiUsageCacheEntry struct {
totals GeminiUsageTotals
}
type geminiUsageTotalsBatchProvider interface {
GetGeminiUsageTotalsBatch(ctx context.Context, accountIDs []int64, startTime, endTime time.Time) (map[int64]GeminiUsageTotals, error)
}
const geminiPrecheckCacheTTL = time.Minute
// NewRateLimitService 创建RateLimitService实例
@@ -141,13 +146,29 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc
} else {
slog.Info("oauth_401_force_refresh_set", "account_id", account.ID, "platform", account.Platform)
}
// 3. 临时不可调度,替代 SetError保持 status=active 让刷新服务能拾取)
msg := "Authentication failed (401): invalid or expired credentials"
if upstreamMsg != "" {
msg = "OAuth 401: " + upstreamMsg
}
cooldownMinutes := s.cfg.RateLimit.OAuth401CooldownMinutes
if cooldownMinutes <= 0 {
cooldownMinutes = 10
}
until := time.Now().Add(time.Duration(cooldownMinutes) * time.Minute)
if err := s.accountRepo.SetTempUnschedulable(ctx, account.ID, until, msg); err != nil {
slog.Warn("oauth_401_set_temp_unschedulable_failed", "account_id", account.ID, "error", err)
}
shouldDisable = true
} else {
// 非 OAuth 账号APIKey保持原有 SetError 行为
msg := "Authentication failed (401): invalid or expired credentials"
if upstreamMsg != "" {
msg = "Authentication failed (401): " + upstreamMsg
}
s.handleAuthError(ctx, account, msg)
shouldDisable = true
}
msg := "Authentication failed (401): invalid or expired credentials"
if upstreamMsg != "" {
msg = "Authentication failed (401): " + upstreamMsg
}
s.handleAuthError(ctx, account, msg)
shouldDisable = true
case 402:
// 支付要求:余额不足或计费问题,停止调度
msg := "Payment required (402): insufficient balance or billing issue"
@@ -162,6 +183,17 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc
if upstreamMsg != "" {
msg = "Access forbidden (403): " + upstreamMsg
}
logger.LegacyPrintf(
"service.ratelimit",
"[HandleUpstreamErrorRaw] account_id=%d platform=%s type=%s status=403 request_id=%s cf_ray=%s upstream_msg=%s raw_body=%s",
account.ID,
account.Platform,
account.Type,
strings.TrimSpace(headers.Get("x-request-id")),
strings.TrimSpace(headers.Get("cf-ray")),
upstreamMsg,
truncateForLog(responseBody, 1024),
)
s.handleAuthError(ctx, account, msg)
shouldDisable = true
case 429:
@@ -225,7 +257,7 @@ func (s *RateLimitService) PreCheckUsage(ctx context.Context, account *Account,
start := geminiDailyWindowStart(now)
totals, ok := s.getGeminiUsageTotals(account.ID, start, now)
if !ok {
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, now, 0, 0, account.ID, 0, nil, nil)
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, now, 0, 0, account.ID, 0, nil, nil, nil)
if err != nil {
return true, err
}
@@ -272,7 +304,7 @@ func (s *RateLimitService) PreCheckUsage(ctx context.Context, account *Account,
if limit > 0 {
start := now.Truncate(time.Minute)
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, now, 0, 0, account.ID, 0, nil, nil)
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, now, 0, 0, account.ID, 0, nil, nil, nil)
if err != nil {
return true, err
}
@@ -302,6 +334,218 @@ func (s *RateLimitService) PreCheckUsage(ctx context.Context, account *Account,
return true, nil
}
// PreCheckUsageBatch performs quota precheck for multiple accounts in one request.
// Returned map value=false means the account should be skipped.
func (s *RateLimitService) PreCheckUsageBatch(ctx context.Context, accounts []*Account, requestedModel string) (map[int64]bool, error) {
result := make(map[int64]bool, len(accounts))
for _, account := range accounts {
if account == nil {
continue
}
result[account.ID] = true
}
if len(accounts) == 0 || requestedModel == "" {
return result, nil
}
if s.usageRepo == nil || s.geminiQuotaService == nil {
return result, nil
}
modelClass := geminiModelClassFromName(requestedModel)
now := time.Now()
dailyStart := geminiDailyWindowStart(now)
minuteStart := now.Truncate(time.Minute)
type quotaAccount struct {
account *Account
quota GeminiQuota
}
quotaAccounts := make([]quotaAccount, 0, len(accounts))
for _, account := range accounts {
if account == nil || account.Platform != PlatformGemini {
continue
}
quota, ok := s.geminiQuotaService.QuotaForAccount(ctx, account)
if !ok {
continue
}
quotaAccounts = append(quotaAccounts, quotaAccount{
account: account,
quota: quota,
})
}
if len(quotaAccounts) == 0 {
return result, nil
}
// 1) Daily precheck (cached + batch DB fallback)
dailyTotalsByID := make(map[int64]GeminiUsageTotals, len(quotaAccounts))
dailyMissIDs := make([]int64, 0, len(quotaAccounts))
for _, item := range quotaAccounts {
limit := geminiDailyLimit(item.quota, modelClass)
if limit <= 0 {
continue
}
accountID := item.account.ID
if totals, ok := s.getGeminiUsageTotals(accountID, dailyStart, now); ok {
dailyTotalsByID[accountID] = totals
continue
}
dailyMissIDs = append(dailyMissIDs, accountID)
}
if len(dailyMissIDs) > 0 {
totalsBatch, err := s.getGeminiUsageTotalsBatch(ctx, dailyMissIDs, dailyStart, now)
if err != nil {
return result, err
}
for _, accountID := range dailyMissIDs {
totals := totalsBatch[accountID]
dailyTotalsByID[accountID] = totals
s.setGeminiUsageTotals(accountID, dailyStart, now, totals)
}
}
for _, item := range quotaAccounts {
limit := geminiDailyLimit(item.quota, modelClass)
if limit <= 0 {
continue
}
accountID := item.account.ID
used := geminiUsedRequests(item.quota, modelClass, dailyTotalsByID[accountID], true)
if used >= limit {
resetAt := geminiDailyResetTime(now)
slog.Info("gemini_precheck_daily_quota_reached_batch", "account_id", accountID, "used", used, "limit", limit, "reset_at", resetAt)
result[accountID] = false
}
}
// 2) Minute precheck (batch DB)
minuteIDs := make([]int64, 0, len(quotaAccounts))
for _, item := range quotaAccounts {
accountID := item.account.ID
if !result[accountID] {
continue
}
if geminiMinuteLimit(item.quota, modelClass) <= 0 {
continue
}
minuteIDs = append(minuteIDs, accountID)
}
if len(minuteIDs) == 0 {
return result, nil
}
minuteTotalsByID, err := s.getGeminiUsageTotalsBatch(ctx, minuteIDs, minuteStart, now)
if err != nil {
return result, err
}
for _, item := range quotaAccounts {
accountID := item.account.ID
if !result[accountID] {
continue
}
limit := geminiMinuteLimit(item.quota, modelClass)
if limit <= 0 {
continue
}
used := geminiUsedRequests(item.quota, modelClass, minuteTotalsByID[accountID], false)
if used >= limit {
resetAt := minuteStart.Add(time.Minute)
slog.Info("gemini_precheck_minute_quota_reached_batch", "account_id", accountID, "used", used, "limit", limit, "reset_at", resetAt)
result[accountID] = false
}
}
return result, nil
}
func (s *RateLimitService) getGeminiUsageTotalsBatch(ctx context.Context, accountIDs []int64, start, end time.Time) (map[int64]GeminiUsageTotals, error) {
result := make(map[int64]GeminiUsageTotals, len(accountIDs))
if len(accountIDs) == 0 {
return result, nil
}
ids := make([]int64, 0, len(accountIDs))
seen := make(map[int64]struct{}, len(accountIDs))
for _, accountID := range accountIDs {
if accountID <= 0 {
continue
}
if _, ok := seen[accountID]; ok {
continue
}
seen[accountID] = struct{}{}
ids = append(ids, accountID)
}
if len(ids) == 0 {
return result, nil
}
if batchReader, ok := s.usageRepo.(geminiUsageTotalsBatchProvider); ok {
stats, err := batchReader.GetGeminiUsageTotalsBatch(ctx, ids, start, end)
if err != nil {
return nil, err
}
for _, accountID := range ids {
result[accountID] = stats[accountID]
}
return result, nil
}
for _, accountID := range ids {
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, end, 0, 0, accountID, 0, nil, nil, nil)
if err != nil {
return nil, err
}
result[accountID] = geminiAggregateUsage(stats)
}
return result, nil
}
func geminiDailyLimit(quota GeminiQuota, modelClass geminiModelClass) int64 {
if quota.SharedRPD > 0 {
return quota.SharedRPD
}
switch modelClass {
case geminiModelFlash:
return quota.FlashRPD
default:
return quota.ProRPD
}
}
func geminiMinuteLimit(quota GeminiQuota, modelClass geminiModelClass) int64 {
if quota.SharedRPM > 0 {
return quota.SharedRPM
}
switch modelClass {
case geminiModelFlash:
return quota.FlashRPM
default:
return quota.ProRPM
}
}
func geminiUsedRequests(quota GeminiQuota, modelClass geminiModelClass, totals GeminiUsageTotals, daily bool) int64 {
if daily {
if quota.SharedRPD > 0 {
return totals.ProRequests + totals.FlashRequests
}
} else {
if quota.SharedRPM > 0 {
return totals.ProRequests + totals.FlashRequests
}
}
switch modelClass {
case geminiModelFlash:
return totals.FlashRequests
default:
return totals.ProRequests
}
}
func (s *RateLimitService) getGeminiUsageTotals(accountID int64, windowStart, now time.Time) (GeminiUsageTotals, bool) {
s.usageCacheMu.RLock()
defer s.usageCacheMu.RUnlock()
@@ -381,10 +625,31 @@ func (s *RateLimitService) handle429(ctx context.Context, account *Account, head
}
}
// 2. 尝试从响应头解析重置时间Anthropic
// 2. Anthropic 平台:尝试解析 per-window 头5h / 7d选择实际触发的窗口
if result := calculateAnthropic429ResetTime(headers); result != nil {
if err := s.accountRepo.SetRateLimited(ctx, account.ID, result.resetAt); err != nil {
slog.Warn("rate_limit_set_failed", "account_id", account.ID, "error", err)
return
}
// 更新 session window优先使用 5h-reset 头精确计算,否则从 resetAt 反推
windowEnd := result.resetAt
if result.fiveHourReset != nil {
windowEnd = *result.fiveHourReset
}
windowStart := windowEnd.Add(-5 * time.Hour)
if err := s.accountRepo.UpdateSessionWindow(ctx, account.ID, &windowStart, &windowEnd, "rejected"); err != nil {
slog.Warn("rate_limit_update_session_window_failed", "account_id", account.ID, "error", err)
}
slog.Info("anthropic_account_rate_limited", "account_id", account.ID, "reset_at", result.resetAt, "reset_in", time.Until(result.resetAt).Truncate(time.Second))
return
}
// 3. 尝试从响应头解析重置时间Anthropic 聚合头,向后兼容)
resetTimestamp := headers.Get("anthropic-ratelimit-unified-reset")
// 3. 如果响应头没有尝试从响应体解析OpenAI usage_limit_reached, Gemini
// 4. 如果响应头没有尝试从响应体解析OpenAI usage_limit_reached, Gemini
if resetTimestamp == "" {
switch account.Platform {
case PlatformOpenAI:
@@ -411,7 +676,17 @@ func (s *RateLimitService) handle429(ctx context.Context, account *Account, head
}
}
// 没有重置时间使用默认5分钟
// Anthropic 平台:没有限流重置时间的 429 可能是非真实限流(如 Extra usage required
// 不标记账号限流状态,直接透传错误给客户端
if account.Platform == PlatformAnthropic {
slog.Warn("rate_limit_429_no_reset_time_skipped",
"account_id", account.ID,
"platform", account.Platform,
"reason", "no rate limit reset time in headers, likely not a real rate limit")
return
}
// 其他平台没有重置时间使用默认5分钟
resetAt := time.Now().Add(5 * time.Minute)
slog.Warn("rate_limit_no_reset_time", "account_id", account.ID, "platform", account.Platform, "using_default", "5m")
if err := s.accountRepo.SetRateLimited(ctx, account.ID, resetAt); err != nil {
@@ -497,6 +772,112 @@ func (s *RateLimitService) calculateOpenAI429ResetTime(headers http.Header) *tim
return nil
}
// anthropic429Result holds the parsed Anthropic 429 rate-limit information.
type anthropic429Result struct {
resetAt time.Time // The correct reset time to use for SetRateLimited
fiveHourReset *time.Time // 5h window reset timestamp (for session window calculation), nil if not available
}
// calculateAnthropic429ResetTime parses Anthropic's per-window rate-limit headers
// to determine which window (5h or 7d) actually triggered the 429.
//
// Headers used:
// - anthropic-ratelimit-unified-5h-utilization / anthropic-ratelimit-unified-5h-surpassed-threshold
// - anthropic-ratelimit-unified-5h-reset
// - anthropic-ratelimit-unified-7d-utilization / anthropic-ratelimit-unified-7d-surpassed-threshold
// - anthropic-ratelimit-unified-7d-reset
//
// Returns nil when the per-window headers are absent (caller should fall back to
// the aggregated anthropic-ratelimit-unified-reset header).
func calculateAnthropic429ResetTime(headers http.Header) *anthropic429Result {
reset5hStr := headers.Get("anthropic-ratelimit-unified-5h-reset")
reset7dStr := headers.Get("anthropic-ratelimit-unified-7d-reset")
if reset5hStr == "" && reset7dStr == "" {
return nil
}
var reset5h, reset7d *time.Time
if ts, err := strconv.ParseInt(reset5hStr, 10, 64); err == nil {
t := time.Unix(ts, 0)
reset5h = &t
}
if ts, err := strconv.ParseInt(reset7dStr, 10, 64); err == nil {
t := time.Unix(ts, 0)
reset7d = &t
}
is5hExceeded := isAnthropicWindowExceeded(headers, "5h")
is7dExceeded := isAnthropicWindowExceeded(headers, "7d")
slog.Info("anthropic_429_window_analysis",
"is_5h_exceeded", is5hExceeded,
"is_7d_exceeded", is7dExceeded,
"reset_5h", reset5hStr,
"reset_7d", reset7dStr,
)
// Select the correct reset time based on which window(s) are exceeded.
var chosen *time.Time
switch {
case is5hExceeded && is7dExceeded:
// Both exceeded → prefer 7d (longer cooldown), fall back to 5h
chosen = reset7d
if chosen == nil {
chosen = reset5h
}
case is5hExceeded:
chosen = reset5h
case is7dExceeded:
chosen = reset7d
default:
// Neither flag clearly exceeded — pick the sooner reset as best guess
chosen = pickSooner(reset5h, reset7d)
}
if chosen == nil {
return nil
}
return &anthropic429Result{resetAt: *chosen, fiveHourReset: reset5h}
}
// isAnthropicWindowExceeded checks whether a given Anthropic rate-limit window
// (e.g. "5h" or "7d") has been exceeded, using utilization and surpassed-threshold headers.
func isAnthropicWindowExceeded(headers http.Header, window string) bool {
prefix := "anthropic-ratelimit-unified-" + window + "-"
// Check surpassed-threshold first (most explicit signal)
if st := headers.Get(prefix + "surpassed-threshold"); strings.EqualFold(st, "true") {
return true
}
// Fall back to utilization >= 1.0
if utilStr := headers.Get(prefix + "utilization"); utilStr != "" {
if util, err := strconv.ParseFloat(utilStr, 64); err == nil && util >= 1.0-1e-9 {
// Use a small epsilon to handle floating point: treat 0.9999999... as >= 1.0
return true
}
}
return false
}
// pickSooner returns whichever of the two time pointers is earlier.
// If only one is non-nil, it is returned. If both are nil, returns nil.
func pickSooner(a, b *time.Time) *time.Time {
switch {
case a != nil && b != nil:
if a.Before(*b) {
return a
}
return b
case a != nil:
return a
default:
return b
}
}
// parseOpenAIRateLimitResetTime 解析 OpenAI 格式的 429 响应,返回重置时间的 Unix 时间戳
// OpenAI 的 usage_limit_reached 错误格式:
//
@@ -611,7 +992,19 @@ func (s *RateLimitService) ClearRateLimit(ctx context.Context, accountID int64)
if err := s.accountRepo.ClearAntigravityQuotaScopes(ctx, accountID); err != nil {
return err
}
return s.accountRepo.ClearModelRateLimits(ctx, accountID)
if err := s.accountRepo.ClearModelRateLimits(ctx, accountID); err != nil {
return err
}
// 清除限流时一并清理临时不可调度状态,避免周限/窗口重置后仍被本地临时状态阻断。
if err := s.accountRepo.ClearTempUnschedulable(ctx, accountID); err != nil {
return err
}
if s.tempUnschedCache != nil {
if err := s.tempUnschedCache.DeleteTempUnsched(ctx, accountID); err != nil {
slog.Warn("temp_unsched_cache_delete_failed", "account_id", accountID, "error", err)
}
}
return nil
}
func (s *RateLimitService) ClearTempUnschedulable(ctx context.Context, accountID int64) error {