fix: OAuth 401 不再永久锁死账号,改用临时不可调度实现自动恢复

OAuth 账号收到 401 时,原逻辑同时设置 expires_at=now() 和 SetError(),
但刷新服务只查询 status=active 的账号,导致 error 状态的账号永远无法
被刷新服务拾取,expires_at=now() 实际上是死代码。

修复:
- OAuth 401 使用 SetTempUnschedulable 替代 SetError,保持 status=active
- 新增 oauth_401_cooldown_minutes 配置项(默认 10 分钟)
- 刷新成功后同步清除 DB 和 Redis 中的临时不可调度状态
- 不可重试错误检查(invalid_grant 等)从 Antigravity 推广到所有平台
- 可重试错误耗尽后不再标记 error,下个刷新周期继续重试

恢复流程:
OAuth 401 → temp_unschedulable + expires_at=now → 刷新服务拾取
  → 成功: 清除 temp_unschedulable → 自动恢复
  → invalid_grant: SetError → 永久禁用
  → 网络错误: 仅记日志 → 下周期重试
This commit is contained in:
zqq61
2026-03-02 22:54:38 +08:00
parent f7fa71bc28
commit ec6bcfeb83
7 changed files with 175 additions and 49 deletions

View File

@@ -146,13 +146,29 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc
} else {
slog.Info("oauth_401_force_refresh_set", "account_id", account.ID, "platform", account.Platform)
}
// 3. 临时不可调度,替代 SetError保持 status=active 让刷新服务能拾取)
msg := "Authentication failed (401): invalid or expired credentials"
if upstreamMsg != "" {
msg = "OAuth 401: " + upstreamMsg
}
cooldownMinutes := s.cfg.RateLimit.OAuth401CooldownMinutes
if cooldownMinutes <= 0 {
cooldownMinutes = 10
}
until := time.Now().Add(time.Duration(cooldownMinutes) * time.Minute)
if err := s.accountRepo.SetTempUnschedulable(ctx, account.ID, until, msg); err != nil {
slog.Warn("oauth_401_set_temp_unschedulable_failed", "account_id", account.ID, "error", err)
}
shouldDisable = true
} else {
// 非 OAuth 账号APIKey保持原有 SetError 行为
msg := "Authentication failed (401): invalid or expired credentials"
if upstreamMsg != "" {
msg = "Authentication failed (401): " + upstreamMsg
}
s.handleAuthError(ctx, account, msg)
shouldDisable = true
}
msg := "Authentication failed (401): invalid or expired credentials"
if upstreamMsg != "" {
msg = "Authentication failed (401): " + upstreamMsg
}
s.handleAuthError(ctx, account, msg)
shouldDisable = true
case 402:
// 支付要求:余额不足或计费问题,停止调度
msg := "Payment required (402): insufficient balance or billing issue"