fix(antigravity): fast-fail on proxy unavailable, temp-unschedule account
## Problem When a proxy is unreachable, token refresh retries up to 4 times with 30s timeout each, causing requests to hang for ~2 minutes before failing with a generic 502 error. The failed account is not marked, so subsequent requests keep hitting it. ## Changes ### Proxy connection fast-fail - Set TCP dial timeout to 5s and TLS handshake timeout to 5s on antigravity client, so proxy connectivity issues fail within 5s instead of 30s - Reduce overall HTTP client timeout from 30s to 10s - Export `IsConnectionError` for service-layer use - Detect proxy connection errors in `RefreshToken` and return immediately with "proxy unavailable" error (no retries) ### Token refresh temp-unschedulable - Add 8s context timeout for token refresh on request path - Mark account as temp-unschedulable for 10min when refresh fails (both background `TokenRefreshService` and request-path `GetAccessToken`) - Sync temp-unschedulable state to Redis cache for immediate scheduler effect - Inject `TempUnschedCache` into `AntigravityTokenProvider` ### Account failover - Return `UpstreamFailoverError` on `GetAccessToken` failure in `Forward`/`ForwardGemini` to trigger handler-level account switch instead of returning 502 directly ### Proxy probe alignment - Apply same 5s dial/TLS timeout to shared `httpclient` pool - Reduce proxy probe timeout from 30s to 10s
This commit is contained in:
@@ -12,6 +12,9 @@ import (
|
||||
"github.com/Wei-Shaw/sub2api/internal/config"
|
||||
)
|
||||
|
||||
// tokenRefreshTempUnschedDuration token 刷新重试耗尽后临时不可调度的持续时间
|
||||
const tokenRefreshTempUnschedDuration = 10 * time.Minute
|
||||
|
||||
// TokenRefreshService OAuth token自动刷新服务
|
||||
// 定期检查并刷新即将过期的token
|
||||
type TokenRefreshService struct {
|
||||
@@ -317,7 +320,7 @@ func (s *TokenRefreshService) refreshWithRetry(ctx context.Context, account *Acc
|
||||
}
|
||||
}
|
||||
|
||||
// 可重试错误耗尽:仅记录日志,不标记 error(可能是临时网络问题,下个周期继续重试)
|
||||
// 可重试错误耗尽:临时标记账号不可调度,避免请求路径反复命中已知失败的账号
|
||||
slog.Warn("token_refresh.retry_exhausted",
|
||||
"account_id", account.ID,
|
||||
"platform", account.Platform,
|
||||
@@ -325,6 +328,21 @@ func (s *TokenRefreshService) refreshWithRetry(ctx context.Context, account *Acc
|
||||
"error", lastErr,
|
||||
)
|
||||
|
||||
// 设置临时不可调度 10 分钟(不标记 error,保持 status=active 让下个刷新周期能继续尝试)
|
||||
until := time.Now().Add(tokenRefreshTempUnschedDuration)
|
||||
reason := fmt.Sprintf("token refresh retry exhausted: %v", lastErr)
|
||||
if setErr := s.accountRepo.SetTempUnschedulable(ctx, account.ID, until, reason); setErr != nil {
|
||||
slog.Warn("token_refresh.set_temp_unschedulable_failed",
|
||||
"account_id", account.ID,
|
||||
"error", setErr,
|
||||
)
|
||||
} else {
|
||||
slog.Info("token_refresh.temp_unschedulable_set",
|
||||
"account_id", account.ID,
|
||||
"until", until.Format(time.RFC3339),
|
||||
)
|
||||
}
|
||||
|
||||
return lastErr
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user