fix(antigravity): fast-fail on proxy unavailable, temp-unschedule account

## Problem

When a proxy is unreachable, token refresh retries up to 4 times with
30s timeout each, causing requests to hang for ~2 minutes before
failing with a generic 502 error. The failed account is not marked,
so subsequent requests keep hitting it.

## Changes

### Proxy connection fast-fail
- Set TCP dial timeout to 5s and TLS handshake timeout to 5s on
  antigravity client, so proxy connectivity issues fail within 5s
  instead of 30s
- Reduce overall HTTP client timeout from 30s to 10s
- Export `IsConnectionError` for service-layer use
- Detect proxy connection errors in `RefreshToken` and return
  immediately with "proxy unavailable" error (no retries)

### Token refresh temp-unschedulable
- Add 8s context timeout for token refresh on request path
- Mark account as temp-unschedulable for 10min when refresh fails
  (both background `TokenRefreshService` and request-path
  `GetAccessToken`)
- Sync temp-unschedulable state to Redis cache for immediate
  scheduler effect
- Inject `TempUnschedCache` into `AntigravityTokenProvider`

### Account failover
- Return `UpstreamFailoverError` on `GetAccessToken` failure in
  `Forward`/`ForwardGemini` to trigger handler-level account switch
  instead of returning 502 directly

### Proxy probe alignment
- Apply same 5s dial/TLS timeout to shared `httpclient` pool
- Reduce proxy probe timeout from 30s to 10s
This commit is contained in:
erio
2026-03-19 23:48:37 +08:00
parent 0236b97d49
commit 528ff5d28c
10 changed files with 125 additions and 20 deletions

View File

@@ -228,9 +228,18 @@ type Client struct {
httpClient *http.Client
}
const (
// proxyDialTimeout 代理 TCP 连接超时(含代理握手),代理不通时快速失败
proxyDialTimeout = 5 * time.Second
// proxyTLSHandshakeTimeout 代理 TLS 握手超时
proxyTLSHandshakeTimeout = 5 * time.Second
// clientTimeout 整体请求超时(含连接、发送、等待响应、读取 body
clientTimeout = 10 * time.Second
)
func NewClient(proxyURL string) (*Client, error) {
client := &http.Client{
Timeout: 30 * time.Second,
Timeout: clientTimeout,
}
_, parsed, err := proxyurl.Parse(proxyURL)
@@ -238,7 +247,12 @@ func NewClient(proxyURL string) (*Client, error) {
return nil, err
}
if parsed != nil {
transport := &http.Transport{}
transport := &http.Transport{
DialContext: (&net.Dialer{
Timeout: proxyDialTimeout,
}).DialContext,
TLSHandshakeTimeout: proxyTLSHandshakeTimeout,
}
if err := proxyutil.ConfigureTransportProxy(transport, parsed); err != nil {
return nil, fmt.Errorf("configure proxy: %w", err)
}
@@ -250,8 +264,8 @@ func NewClient(proxyURL string) (*Client, error) {
}, nil
}
// isConnectionError 判断是否为连接错误网络超时、DNS 失败、连接拒绝)
func isConnectionError(err error) bool {
// IsConnectionError 判断是否为连接错误网络超时、DNS 失败、连接拒绝)
func IsConnectionError(err error) bool {
if err == nil {
return false
}
@@ -276,7 +290,7 @@ func isConnectionError(err error) bool {
// shouldFallbackToNextURL 判断是否应切换到下一个 URL
// 与 Antigravity-Manager 保持一致连接错误、429、408、404、5xx 触发 URL 降级
func shouldFallbackToNextURL(err error, statusCode int) bool {
if isConnectionError(err) {
if IsConnectionError(err) {
return true
}
return statusCode == http.StatusTooManyRequests ||

View File

@@ -274,8 +274,8 @@ func TestNewClient_无代理(t *testing.T) {
if client.httpClient == nil {
t.Fatal("httpClient 为 nil")
}
if client.httpClient.Timeout != 30*time.Second {
t.Errorf("Timeout 不匹配: got %v, want 30s", client.httpClient.Timeout)
if client.httpClient.Timeout != clientTimeout {
t.Errorf("Timeout 不匹配: got %v, want %v", client.httpClient.Timeout, clientTimeout)
}
// 无代理时 Transport 应为 nil使用默认
if client.httpClient.Transport != nil {
@@ -322,11 +322,11 @@ func TestNewClient_无效代理URL(t *testing.T) {
}
// ---------------------------------------------------------------------------
// isConnectionError
// IsConnectionError
// ---------------------------------------------------------------------------
func TestIsConnectionError_nil(t *testing.T) {
if isConnectionError(nil) {
if IsConnectionError(nil) {
t.Error("nil 错误不应判定为连接错误")
}
}
@@ -338,7 +338,7 @@ func TestIsConnectionError_超时错误(t *testing.T) {
Net: "tcp",
Err: &timeoutError{},
}
if !isConnectionError(err) {
if !IsConnectionError(err) {
t.Error("超时错误应判定为连接错误")
}
}
@@ -356,7 +356,7 @@ func TestIsConnectionError_netOpError(t *testing.T) {
Net: "tcp",
Err: fmt.Errorf("connection refused"),
}
if !isConnectionError(err) {
if !IsConnectionError(err) {
t.Error("net.OpError 应判定为连接错误")
}
}
@@ -367,14 +367,14 @@ func TestIsConnectionError_urlError(t *testing.T) {
URL: "https://example.com",
Err: fmt.Errorf("some error"),
}
if !isConnectionError(err) {
if !IsConnectionError(err) {
t.Error("url.Error 应判定为连接错误")
}
}
func TestIsConnectionError_普通错误(t *testing.T) {
err := fmt.Errorf("some random error")
if isConnectionError(err) {
if IsConnectionError(err) {
t.Error("普通错误不应判定为连接错误")
}
}
@@ -386,7 +386,7 @@ func TestIsConnectionError_包装的netOpError(t *testing.T) {
Err: fmt.Errorf("connection refused"),
}
err := fmt.Errorf("wrapping: %w", inner)
if !isConnectionError(err) {
if !IsConnectionError(err) {
t.Error("被包装的 net.OpError 应判定为连接错误")
}
}