Merge branch 'main' of https://github.com/james-6-23/sub2api
This commit is contained in:
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/Wei-Shaw/sub2api/internal/config"
|
||||
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
|
||||
)
|
||||
|
||||
// RateLimitService 处理限流和过载状态管理
|
||||
@@ -33,6 +34,10 @@ type geminiUsageCacheEntry struct {
|
||||
totals GeminiUsageTotals
|
||||
}
|
||||
|
||||
type geminiUsageTotalsBatchProvider interface {
|
||||
GetGeminiUsageTotalsBatch(ctx context.Context, accountIDs []int64, startTime, endTime time.Time) (map[int64]GeminiUsageTotals, error)
|
||||
}
|
||||
|
||||
const geminiPrecheckCacheTTL = time.Minute
|
||||
|
||||
// NewRateLimitService 创建RateLimitService实例
|
||||
@@ -141,13 +146,29 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc
|
||||
} else {
|
||||
slog.Info("oauth_401_force_refresh_set", "account_id", account.ID, "platform", account.Platform)
|
||||
}
|
||||
// 3. 临时不可调度,替代 SetError(保持 status=active 让刷新服务能拾取)
|
||||
msg := "Authentication failed (401): invalid or expired credentials"
|
||||
if upstreamMsg != "" {
|
||||
msg = "OAuth 401: " + upstreamMsg
|
||||
}
|
||||
cooldownMinutes := s.cfg.RateLimit.OAuth401CooldownMinutes
|
||||
if cooldownMinutes <= 0 {
|
||||
cooldownMinutes = 10
|
||||
}
|
||||
until := time.Now().Add(time.Duration(cooldownMinutes) * time.Minute)
|
||||
if err := s.accountRepo.SetTempUnschedulable(ctx, account.ID, until, msg); err != nil {
|
||||
slog.Warn("oauth_401_set_temp_unschedulable_failed", "account_id", account.ID, "error", err)
|
||||
}
|
||||
shouldDisable = true
|
||||
} else {
|
||||
// 非 OAuth 账号(APIKey):保持原有 SetError 行为
|
||||
msg := "Authentication failed (401): invalid or expired credentials"
|
||||
if upstreamMsg != "" {
|
||||
msg = "Authentication failed (401): " + upstreamMsg
|
||||
}
|
||||
s.handleAuthError(ctx, account, msg)
|
||||
shouldDisable = true
|
||||
}
|
||||
msg := "Authentication failed (401): invalid or expired credentials"
|
||||
if upstreamMsg != "" {
|
||||
msg = "Authentication failed (401): " + upstreamMsg
|
||||
}
|
||||
s.handleAuthError(ctx, account, msg)
|
||||
shouldDisable = true
|
||||
case 402:
|
||||
// 支付要求:余额不足或计费问题,停止调度
|
||||
msg := "Payment required (402): insufficient balance or billing issue"
|
||||
@@ -162,6 +183,17 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc
|
||||
if upstreamMsg != "" {
|
||||
msg = "Access forbidden (403): " + upstreamMsg
|
||||
}
|
||||
logger.LegacyPrintf(
|
||||
"service.ratelimit",
|
||||
"[HandleUpstreamErrorRaw] account_id=%d platform=%s type=%s status=403 request_id=%s cf_ray=%s upstream_msg=%s raw_body=%s",
|
||||
account.ID,
|
||||
account.Platform,
|
||||
account.Type,
|
||||
strings.TrimSpace(headers.Get("x-request-id")),
|
||||
strings.TrimSpace(headers.Get("cf-ray")),
|
||||
upstreamMsg,
|
||||
truncateForLog(responseBody, 1024),
|
||||
)
|
||||
s.handleAuthError(ctx, account, msg)
|
||||
shouldDisable = true
|
||||
case 429:
|
||||
@@ -225,7 +257,7 @@ func (s *RateLimitService) PreCheckUsage(ctx context.Context, account *Account,
|
||||
start := geminiDailyWindowStart(now)
|
||||
totals, ok := s.getGeminiUsageTotals(account.ID, start, now)
|
||||
if !ok {
|
||||
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, now, 0, 0, account.ID, 0, nil, nil)
|
||||
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, now, 0, 0, account.ID, 0, nil, nil, nil)
|
||||
if err != nil {
|
||||
return true, err
|
||||
}
|
||||
@@ -272,7 +304,7 @@ func (s *RateLimitService) PreCheckUsage(ctx context.Context, account *Account,
|
||||
|
||||
if limit > 0 {
|
||||
start := now.Truncate(time.Minute)
|
||||
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, now, 0, 0, account.ID, 0, nil, nil)
|
||||
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, now, 0, 0, account.ID, 0, nil, nil, nil)
|
||||
if err != nil {
|
||||
return true, err
|
||||
}
|
||||
@@ -302,6 +334,218 @@ func (s *RateLimitService) PreCheckUsage(ctx context.Context, account *Account,
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// PreCheckUsageBatch performs quota precheck for multiple accounts in one request.
|
||||
// Returned map value=false means the account should be skipped.
|
||||
func (s *RateLimitService) PreCheckUsageBatch(ctx context.Context, accounts []*Account, requestedModel string) (map[int64]bool, error) {
|
||||
result := make(map[int64]bool, len(accounts))
|
||||
for _, account := range accounts {
|
||||
if account == nil {
|
||||
continue
|
||||
}
|
||||
result[account.ID] = true
|
||||
}
|
||||
|
||||
if len(accounts) == 0 || requestedModel == "" {
|
||||
return result, nil
|
||||
}
|
||||
if s.usageRepo == nil || s.geminiQuotaService == nil {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
modelClass := geminiModelClassFromName(requestedModel)
|
||||
now := time.Now()
|
||||
dailyStart := geminiDailyWindowStart(now)
|
||||
minuteStart := now.Truncate(time.Minute)
|
||||
|
||||
type quotaAccount struct {
|
||||
account *Account
|
||||
quota GeminiQuota
|
||||
}
|
||||
quotaAccounts := make([]quotaAccount, 0, len(accounts))
|
||||
for _, account := range accounts {
|
||||
if account == nil || account.Platform != PlatformGemini {
|
||||
continue
|
||||
}
|
||||
quota, ok := s.geminiQuotaService.QuotaForAccount(ctx, account)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
quotaAccounts = append(quotaAccounts, quotaAccount{
|
||||
account: account,
|
||||
quota: quota,
|
||||
})
|
||||
}
|
||||
if len(quotaAccounts) == 0 {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// 1) Daily precheck (cached + batch DB fallback)
|
||||
dailyTotalsByID := make(map[int64]GeminiUsageTotals, len(quotaAccounts))
|
||||
dailyMissIDs := make([]int64, 0, len(quotaAccounts))
|
||||
for _, item := range quotaAccounts {
|
||||
limit := geminiDailyLimit(item.quota, modelClass)
|
||||
if limit <= 0 {
|
||||
continue
|
||||
}
|
||||
accountID := item.account.ID
|
||||
if totals, ok := s.getGeminiUsageTotals(accountID, dailyStart, now); ok {
|
||||
dailyTotalsByID[accountID] = totals
|
||||
continue
|
||||
}
|
||||
dailyMissIDs = append(dailyMissIDs, accountID)
|
||||
}
|
||||
if len(dailyMissIDs) > 0 {
|
||||
totalsBatch, err := s.getGeminiUsageTotalsBatch(ctx, dailyMissIDs, dailyStart, now)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
for _, accountID := range dailyMissIDs {
|
||||
totals := totalsBatch[accountID]
|
||||
dailyTotalsByID[accountID] = totals
|
||||
s.setGeminiUsageTotals(accountID, dailyStart, now, totals)
|
||||
}
|
||||
}
|
||||
for _, item := range quotaAccounts {
|
||||
limit := geminiDailyLimit(item.quota, modelClass)
|
||||
if limit <= 0 {
|
||||
continue
|
||||
}
|
||||
accountID := item.account.ID
|
||||
used := geminiUsedRequests(item.quota, modelClass, dailyTotalsByID[accountID], true)
|
||||
if used >= limit {
|
||||
resetAt := geminiDailyResetTime(now)
|
||||
slog.Info("gemini_precheck_daily_quota_reached_batch", "account_id", accountID, "used", used, "limit", limit, "reset_at", resetAt)
|
||||
result[accountID] = false
|
||||
}
|
||||
}
|
||||
|
||||
// 2) Minute precheck (batch DB)
|
||||
minuteIDs := make([]int64, 0, len(quotaAccounts))
|
||||
for _, item := range quotaAccounts {
|
||||
accountID := item.account.ID
|
||||
if !result[accountID] {
|
||||
continue
|
||||
}
|
||||
if geminiMinuteLimit(item.quota, modelClass) <= 0 {
|
||||
continue
|
||||
}
|
||||
minuteIDs = append(minuteIDs, accountID)
|
||||
}
|
||||
if len(minuteIDs) == 0 {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
minuteTotalsByID, err := s.getGeminiUsageTotalsBatch(ctx, minuteIDs, minuteStart, now)
|
||||
if err != nil {
|
||||
return result, err
|
||||
}
|
||||
for _, item := range quotaAccounts {
|
||||
accountID := item.account.ID
|
||||
if !result[accountID] {
|
||||
continue
|
||||
}
|
||||
|
||||
limit := geminiMinuteLimit(item.quota, modelClass)
|
||||
if limit <= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
used := geminiUsedRequests(item.quota, modelClass, minuteTotalsByID[accountID], false)
|
||||
if used >= limit {
|
||||
resetAt := minuteStart.Add(time.Minute)
|
||||
slog.Info("gemini_precheck_minute_quota_reached_batch", "account_id", accountID, "used", used, "limit", limit, "reset_at", resetAt)
|
||||
result[accountID] = false
|
||||
}
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (s *RateLimitService) getGeminiUsageTotalsBatch(ctx context.Context, accountIDs []int64, start, end time.Time) (map[int64]GeminiUsageTotals, error) {
|
||||
result := make(map[int64]GeminiUsageTotals, len(accountIDs))
|
||||
if len(accountIDs) == 0 {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
ids := make([]int64, 0, len(accountIDs))
|
||||
seen := make(map[int64]struct{}, len(accountIDs))
|
||||
for _, accountID := range accountIDs {
|
||||
if accountID <= 0 {
|
||||
continue
|
||||
}
|
||||
if _, ok := seen[accountID]; ok {
|
||||
continue
|
||||
}
|
||||
seen[accountID] = struct{}{}
|
||||
ids = append(ids, accountID)
|
||||
}
|
||||
if len(ids) == 0 {
|
||||
return result, nil
|
||||
}
|
||||
|
||||
if batchReader, ok := s.usageRepo.(geminiUsageTotalsBatchProvider); ok {
|
||||
stats, err := batchReader.GetGeminiUsageTotalsBatch(ctx, ids, start, end)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for _, accountID := range ids {
|
||||
result[accountID] = stats[accountID]
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
for _, accountID := range ids {
|
||||
stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, end, 0, 0, accountID, 0, nil, nil, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result[accountID] = geminiAggregateUsage(stats)
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func geminiDailyLimit(quota GeminiQuota, modelClass geminiModelClass) int64 {
|
||||
if quota.SharedRPD > 0 {
|
||||
return quota.SharedRPD
|
||||
}
|
||||
switch modelClass {
|
||||
case geminiModelFlash:
|
||||
return quota.FlashRPD
|
||||
default:
|
||||
return quota.ProRPD
|
||||
}
|
||||
}
|
||||
|
||||
func geminiMinuteLimit(quota GeminiQuota, modelClass geminiModelClass) int64 {
|
||||
if quota.SharedRPM > 0 {
|
||||
return quota.SharedRPM
|
||||
}
|
||||
switch modelClass {
|
||||
case geminiModelFlash:
|
||||
return quota.FlashRPM
|
||||
default:
|
||||
return quota.ProRPM
|
||||
}
|
||||
}
|
||||
|
||||
func geminiUsedRequests(quota GeminiQuota, modelClass geminiModelClass, totals GeminiUsageTotals, daily bool) int64 {
|
||||
if daily {
|
||||
if quota.SharedRPD > 0 {
|
||||
return totals.ProRequests + totals.FlashRequests
|
||||
}
|
||||
} else {
|
||||
if quota.SharedRPM > 0 {
|
||||
return totals.ProRequests + totals.FlashRequests
|
||||
}
|
||||
}
|
||||
switch modelClass {
|
||||
case geminiModelFlash:
|
||||
return totals.FlashRequests
|
||||
default:
|
||||
return totals.ProRequests
|
||||
}
|
||||
}
|
||||
|
||||
func (s *RateLimitService) getGeminiUsageTotals(accountID int64, windowStart, now time.Time) (GeminiUsageTotals, bool) {
|
||||
s.usageCacheMu.RLock()
|
||||
defer s.usageCacheMu.RUnlock()
|
||||
@@ -381,10 +625,31 @@ func (s *RateLimitService) handle429(ctx context.Context, account *Account, head
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 尝试从响应头解析重置时间(Anthropic)
|
||||
// 2. Anthropic 平台:尝试解析 per-window 头(5h / 7d),选择实际触发的窗口
|
||||
if result := calculateAnthropic429ResetTime(headers); result != nil {
|
||||
if err := s.accountRepo.SetRateLimited(ctx, account.ID, result.resetAt); err != nil {
|
||||
slog.Warn("rate_limit_set_failed", "account_id", account.ID, "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
// 更新 session window:优先使用 5h-reset 头精确计算,否则从 resetAt 反推
|
||||
windowEnd := result.resetAt
|
||||
if result.fiveHourReset != nil {
|
||||
windowEnd = *result.fiveHourReset
|
||||
}
|
||||
windowStart := windowEnd.Add(-5 * time.Hour)
|
||||
if err := s.accountRepo.UpdateSessionWindow(ctx, account.ID, &windowStart, &windowEnd, "rejected"); err != nil {
|
||||
slog.Warn("rate_limit_update_session_window_failed", "account_id", account.ID, "error", err)
|
||||
}
|
||||
|
||||
slog.Info("anthropic_account_rate_limited", "account_id", account.ID, "reset_at", result.resetAt, "reset_in", time.Until(result.resetAt).Truncate(time.Second))
|
||||
return
|
||||
}
|
||||
|
||||
// 3. 尝试从响应头解析重置时间(Anthropic 聚合头,向后兼容)
|
||||
resetTimestamp := headers.Get("anthropic-ratelimit-unified-reset")
|
||||
|
||||
// 3. 如果响应头没有,尝试从响应体解析(OpenAI usage_limit_reached, Gemini)
|
||||
// 4. 如果响应头没有,尝试从响应体解析(OpenAI usage_limit_reached, Gemini)
|
||||
if resetTimestamp == "" {
|
||||
switch account.Platform {
|
||||
case PlatformOpenAI:
|
||||
@@ -411,7 +676,17 @@ func (s *RateLimitService) handle429(ctx context.Context, account *Account, head
|
||||
}
|
||||
}
|
||||
|
||||
// 没有重置时间,使用默认5分钟
|
||||
// Anthropic 平台:没有限流重置时间的 429 可能是非真实限流(如 Extra usage required),
|
||||
// 不标记账号限流状态,直接透传错误给客户端
|
||||
if account.Platform == PlatformAnthropic {
|
||||
slog.Warn("rate_limit_429_no_reset_time_skipped",
|
||||
"account_id", account.ID,
|
||||
"platform", account.Platform,
|
||||
"reason", "no rate limit reset time in headers, likely not a real rate limit")
|
||||
return
|
||||
}
|
||||
|
||||
// 其他平台:没有重置时间,使用默认5分钟
|
||||
resetAt := time.Now().Add(5 * time.Minute)
|
||||
slog.Warn("rate_limit_no_reset_time", "account_id", account.ID, "platform", account.Platform, "using_default", "5m")
|
||||
if err := s.accountRepo.SetRateLimited(ctx, account.ID, resetAt); err != nil {
|
||||
@@ -497,6 +772,112 @@ func (s *RateLimitService) calculateOpenAI429ResetTime(headers http.Header) *tim
|
||||
return nil
|
||||
}
|
||||
|
||||
// anthropic429Result holds the parsed Anthropic 429 rate-limit information.
|
||||
type anthropic429Result struct {
|
||||
resetAt time.Time // The correct reset time to use for SetRateLimited
|
||||
fiveHourReset *time.Time // 5h window reset timestamp (for session window calculation), nil if not available
|
||||
}
|
||||
|
||||
// calculateAnthropic429ResetTime parses Anthropic's per-window rate-limit headers
|
||||
// to determine which window (5h or 7d) actually triggered the 429.
|
||||
//
|
||||
// Headers used:
|
||||
// - anthropic-ratelimit-unified-5h-utilization / anthropic-ratelimit-unified-5h-surpassed-threshold
|
||||
// - anthropic-ratelimit-unified-5h-reset
|
||||
// - anthropic-ratelimit-unified-7d-utilization / anthropic-ratelimit-unified-7d-surpassed-threshold
|
||||
// - anthropic-ratelimit-unified-7d-reset
|
||||
//
|
||||
// Returns nil when the per-window headers are absent (caller should fall back to
|
||||
// the aggregated anthropic-ratelimit-unified-reset header).
|
||||
func calculateAnthropic429ResetTime(headers http.Header) *anthropic429Result {
|
||||
reset5hStr := headers.Get("anthropic-ratelimit-unified-5h-reset")
|
||||
reset7dStr := headers.Get("anthropic-ratelimit-unified-7d-reset")
|
||||
|
||||
if reset5hStr == "" && reset7dStr == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
var reset5h, reset7d *time.Time
|
||||
if ts, err := strconv.ParseInt(reset5hStr, 10, 64); err == nil {
|
||||
t := time.Unix(ts, 0)
|
||||
reset5h = &t
|
||||
}
|
||||
if ts, err := strconv.ParseInt(reset7dStr, 10, 64); err == nil {
|
||||
t := time.Unix(ts, 0)
|
||||
reset7d = &t
|
||||
}
|
||||
|
||||
is5hExceeded := isAnthropicWindowExceeded(headers, "5h")
|
||||
is7dExceeded := isAnthropicWindowExceeded(headers, "7d")
|
||||
|
||||
slog.Info("anthropic_429_window_analysis",
|
||||
"is_5h_exceeded", is5hExceeded,
|
||||
"is_7d_exceeded", is7dExceeded,
|
||||
"reset_5h", reset5hStr,
|
||||
"reset_7d", reset7dStr,
|
||||
)
|
||||
|
||||
// Select the correct reset time based on which window(s) are exceeded.
|
||||
var chosen *time.Time
|
||||
switch {
|
||||
case is5hExceeded && is7dExceeded:
|
||||
// Both exceeded → prefer 7d (longer cooldown), fall back to 5h
|
||||
chosen = reset7d
|
||||
if chosen == nil {
|
||||
chosen = reset5h
|
||||
}
|
||||
case is5hExceeded:
|
||||
chosen = reset5h
|
||||
case is7dExceeded:
|
||||
chosen = reset7d
|
||||
default:
|
||||
// Neither flag clearly exceeded — pick the sooner reset as best guess
|
||||
chosen = pickSooner(reset5h, reset7d)
|
||||
}
|
||||
|
||||
if chosen == nil {
|
||||
return nil
|
||||
}
|
||||
return &anthropic429Result{resetAt: *chosen, fiveHourReset: reset5h}
|
||||
}
|
||||
|
||||
// isAnthropicWindowExceeded checks whether a given Anthropic rate-limit window
|
||||
// (e.g. "5h" or "7d") has been exceeded, using utilization and surpassed-threshold headers.
|
||||
func isAnthropicWindowExceeded(headers http.Header, window string) bool {
|
||||
prefix := "anthropic-ratelimit-unified-" + window + "-"
|
||||
|
||||
// Check surpassed-threshold first (most explicit signal)
|
||||
if st := headers.Get(prefix + "surpassed-threshold"); strings.EqualFold(st, "true") {
|
||||
return true
|
||||
}
|
||||
|
||||
// Fall back to utilization >= 1.0
|
||||
if utilStr := headers.Get(prefix + "utilization"); utilStr != "" {
|
||||
if util, err := strconv.ParseFloat(utilStr, 64); err == nil && util >= 1.0-1e-9 {
|
||||
// Use a small epsilon to handle floating point: treat 0.9999999... as >= 1.0
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// pickSooner returns whichever of the two time pointers is earlier.
|
||||
// If only one is non-nil, it is returned. If both are nil, returns nil.
|
||||
func pickSooner(a, b *time.Time) *time.Time {
|
||||
switch {
|
||||
case a != nil && b != nil:
|
||||
if a.Before(*b) {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
case a != nil:
|
||||
return a
|
||||
default:
|
||||
return b
|
||||
}
|
||||
}
|
||||
|
||||
// parseOpenAIRateLimitResetTime 解析 OpenAI 格式的 429 响应,返回重置时间的 Unix 时间戳
|
||||
// OpenAI 的 usage_limit_reached 错误格式:
|
||||
//
|
||||
@@ -611,7 +992,19 @@ func (s *RateLimitService) ClearRateLimit(ctx context.Context, accountID int64)
|
||||
if err := s.accountRepo.ClearAntigravityQuotaScopes(ctx, accountID); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.accountRepo.ClearModelRateLimits(ctx, accountID)
|
||||
if err := s.accountRepo.ClearModelRateLimits(ctx, accountID); err != nil {
|
||||
return err
|
||||
}
|
||||
// 清除限流时一并清理临时不可调度状态,避免周限/窗口重置后仍被本地临时状态阻断。
|
||||
if err := s.accountRepo.ClearTempUnschedulable(ctx, accountID); err != nil {
|
||||
return err
|
||||
}
|
||||
if s.tempUnschedCache != nil {
|
||||
if err := s.tempUnschedCache.DeleteTempUnsched(ctx, accountID); err != nil {
|
||||
slog.Warn("temp_unsched_cache_delete_failed", "account_id", accountID, "error", err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *RateLimitService) ClearTempUnschedulable(ctx context.Context, accountID int64) error {
|
||||
|
||||
Reference in New Issue
Block a user