Merge remote-tracking branch 'upstream/main'

# Conflicts: # frontend/src/components/account/CreateAccountModal.vue
2026-01-01 16:15:16 +08:00
parent fb86002ef9 4f13c8de0d
commit 7331220e06
215 changed files with 22998 additions and 1641 deletions
--- a/backend/internal/handler/admin/setting_handler.go
+++ b/backend/internal/handler/admin/setting_handler.go
@@ -10,15 +10,17 @@ import (

 // SettingHandler 系统设置处理器
 type SettingHandler struct {
-	settingService *service.SettingService
-	emailService   *service.EmailService
+	settingService   *service.SettingService
+	emailService     *service.EmailService
+	turnstileService *service.TurnstileService
 }

 // NewSettingHandler 创建系统设置处理器
-func NewSettingHandler(settingService *service.SettingService, emailService *service.EmailService) *SettingHandler {
+func NewSettingHandler(settingService *service.SettingService, emailService *service.EmailService, turnstileService *service.TurnstileService) *SettingHandler {
 	return &SettingHandler{
-		settingService: settingService,
-		emailService:   emailService,
+		settingService:   settingService,
+		emailService:     emailService,
+		turnstileService: turnstileService,
 	}
 }

@@ -108,6 +110,36 @@ func (h *SettingHandler) UpdateSettings(c *gin.Context) {
 		req.SmtpPort = 587
 	}

+	// Turnstile 参数验证
+	if req.TurnstileEnabled {
+		// 检查必填字段
+		if req.TurnstileSiteKey == "" {
+			response.BadRequest(c, "Turnstile Site Key is required when enabled")
+			return
+		}
+		if req.TurnstileSecretKey == "" {
+			response.BadRequest(c, "Turnstile Secret Key is required when enabled")
+			return
+		}
+
+		// 获取当前设置，检查参数是否有变化
+		currentSettings, err := h.settingService.GetAllSettings(c.Request.Context())
+		if err != nil {
+			response.ErrorFrom(c, err)
+			return
+		}
+
+		// 当 site_key 或 secret_key 任一变化时验证（避免配置错误导致无法登录）
+		siteKeyChanged := currentSettings.TurnstileSiteKey != req.TurnstileSiteKey
+		secretKeyChanged := currentSettings.TurnstileSecretKey != req.TurnstileSecretKey
+		if siteKeyChanged || secretKeyChanged {
+			if err := h.turnstileService.ValidateSecretKey(c.Request.Context(), req.TurnstileSecretKey); err != nil {
+				response.ErrorFrom(c, err)
+				return
+			}
+		}
+	}
+
 	settings := &service.SystemSettings{
 		RegistrationEnabled: req.RegistrationEnabled,
 		EmailVerifyEnabled:  req.EmailVerifyEnabled,
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -67,6 +67,10 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	// 读取请求体
 	body, err := io.ReadAll(c.Request.Body)
 	if err != nil {
+		if maxErr, ok := extractMaxBytesError(err); ok {
+			h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
+			return
+		}
 		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
 		return
 	}
@@ -76,15 +80,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		return
 	}

-	// 解析请求获取模型名和stream
-	var req struct {
-		Model  string `json:"model"`
-		Stream bool   `json:"stream"`
-	}
-	if err := json.Unmarshal(body, &req); err != nil {
+	parsedReq, err := service.ParseGatewayRequest(body)
+	if err != nil {
 		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
 		return
 	}
+	reqModel := parsedReq.Model
+	reqStream := parsedReq.Stream
+
+	// 验证 model 必填
+	if reqModel == "" {
+		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
+		return
+	}

 	// Track if we've started streaming (for error handling)
 	streamStarted := false
@@ -106,7 +114,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	defer h.concurrencyHelper.DecrementWaitCount(c.Request.Context(), subject.UserID)

 	// 1. 首先获取用户并发槽位
-	userReleaseFunc, err := h.concurrencyHelper.AcquireUserSlotWithWait(c, subject.UserID, subject.Concurrency, req.Stream, &streamStarted)
+	userReleaseFunc, err := h.concurrencyHelper.AcquireUserSlotWithWait(c, subject.UserID, subject.Concurrency, reqStream, &streamStarted)
 	if err != nil {
 		log.Printf("User concurrency acquire failed: %v", err)
 		h.handleConcurrencyError(c, err, "user", streamStarted)
@@ -124,7 +132,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	}

 	// 计算粘性会话hash
-	sessionHash := h.gatewayService.GenerateSessionHash(body)
+	sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)

 	// 获取平台：优先使用强制平台（/antigravity 路由，中间件已设置 request.Context），否则使用分组平台
 	platform := ""
@@ -133,6 +141,10 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	} else if apiKey.Group != nil {
 		platform = apiKey.Group.Platform
 	}
+	sessionKey := sessionHash
+	if platform == service.PlatformGemini && sessionHash != "" {
+		sessionKey = "gemini:" + sessionHash
+	}

 	if platform == service.PlatformGemini {
 		const maxAccountSwitches = 3
@@ -141,7 +153,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		lastFailoverStatus := 0

 		for {
-			account, err := h.geminiCompatService.SelectAccountForModelWithExclusions(c.Request.Context(), apiKey.GroupID, sessionHash, req.Model, failedAccountIDs)
+			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs)
 			if err != nil {
 				if len(failedAccountIDs) == 0 {
 					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
@@ -150,35 +162,77 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 				h.handleFailoverExhausted(c, lastFailoverStatus, streamStarted)
 				return
 			}
+			account := selection.Account

 			// 检查预热请求拦截（在账号选择后、转发前检查）
 			if account.IsInterceptWarmupEnabled() && isWarmupRequest(body) {
-				if req.Stream {
-					sendMockWarmupStream(c, req.Model)
+				if selection.Acquired && selection.ReleaseFunc != nil {
+					selection.ReleaseFunc()
+				}
+				if reqStream {
+					sendMockWarmupStream(c, reqModel)
 				} else {
-					sendMockWarmupResponse(c, req.Model)
+					sendMockWarmupResponse(c, reqModel)
 				}
 				return
 			}

 			// 3. 获取账号并发槽位
-			accountReleaseFunc, err := h.concurrencyHelper.AcquireAccountSlotWithWait(c, account.ID, account.Concurrency, req.Stream, &streamStarted)
-			if err != nil {
-				log.Printf("Account concurrency acquire failed: %v", err)
-				h.handleConcurrencyError(c, err, "account", streamStarted)
-				return
+			accountReleaseFunc := selection.ReleaseFunc
+			var accountWaitRelease func()
+			if !selection.Acquired {
+				if selection.WaitPlan == nil {
+					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
+					return
+				}
+				canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
+				if err != nil {
+					log.Printf("Increment account wait count failed: %v", err)
+				} else if !canWait {
+					log.Printf("Account wait queue full: account=%d", account.ID)
+					h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
+					return
+				} else {
+					// Only set release function if increment succeeded
+					accountWaitRelease = func() {
+						h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
+					}
+				}
+
+				accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
+					c,
+					account.ID,
+					selection.WaitPlan.MaxConcurrency,
+					selection.WaitPlan.Timeout,
+					reqStream,
+					&streamStarted,
+				)
+				if err != nil {
+					if accountWaitRelease != nil {
+						accountWaitRelease()
+					}
+					log.Printf("Account concurrency acquire failed: %v", err)
+					h.handleConcurrencyError(c, err, "account", streamStarted)
+					return
+				}
+				if err := h.gatewayService.BindStickySession(c.Request.Context(), sessionKey, account.ID); err != nil {
+					log.Printf("Bind sticky session failed: %v", err)
+				}
 			}

 			// 转发请求 - 根据账号平台分流
 			var result *service.ForwardResult
 			if account.Platform == service.PlatformAntigravity {
-				result, err = h.antigravityGatewayService.ForwardGemini(c.Request.Context(), c, account, req.Model, "generateContent", req.Stream, body)
+				result, err = h.antigravityGatewayService.ForwardGemini(c.Request.Context(), c, account, reqModel, "generateContent", reqStream, body)
 			} else {
 				result, err = h.geminiCompatService.Forward(c.Request.Context(), c, account, body)
 			}
 			if accountReleaseFunc != nil {
 				accountReleaseFunc()
 			}
+			if accountWaitRelease != nil {
+				accountWaitRelease()
+			}
 			if err != nil {
 				var failoverErr *service.UpstreamFailoverError
 				if errors.As(err, &failoverErr) {
@@ -223,7 +277,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {

 	for {
 		// 选择支持该模型的账号
-		account, err := h.gatewayService.SelectAccountForModelWithExclusions(c.Request.Context(), apiKey.GroupID, sessionHash, req.Model, failedAccountIDs)
+		selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs)
 		if err != nil {
 			if len(failedAccountIDs) == 0 {
 				h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
@@ -232,23 +286,62 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 			h.handleFailoverExhausted(c, lastFailoverStatus, streamStarted)
 			return
 		}
+		account := selection.Account

 		// 检查预热请求拦截（在账号选择后、转发前检查）
 		if account.IsInterceptWarmupEnabled() && isWarmupRequest(body) {
-			if req.Stream {
-				sendMockWarmupStream(c, req.Model)
+			if selection.Acquired && selection.ReleaseFunc != nil {
+				selection.ReleaseFunc()
+			}
+			if reqStream {
+				sendMockWarmupStream(c, reqModel)
 			} else {
-				sendMockWarmupResponse(c, req.Model)
+				sendMockWarmupResponse(c, reqModel)
 			}
 			return
 		}

 		// 3. 获取账号并发槽位
-		accountReleaseFunc, err := h.concurrencyHelper.AcquireAccountSlotWithWait(c, account.ID, account.Concurrency, req.Stream, &streamStarted)
-		if err != nil {
-			log.Printf("Account concurrency acquire failed: %v", err)
-			h.handleConcurrencyError(c, err, "account", streamStarted)
-			return
+		accountReleaseFunc := selection.ReleaseFunc
+		var accountWaitRelease func()
+		if !selection.Acquired {
+			if selection.WaitPlan == nil {
+				h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
+				return
+			}
+			canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
+			if err != nil {
+				log.Printf("Increment account wait count failed: %v", err)
+			} else if !canWait {
+				log.Printf("Account wait queue full: account=%d", account.ID)
+				h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
+				return
+			} else {
+				// Only set release function if increment succeeded
+				accountWaitRelease = func() {
+					h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
+				}
+			}
+
+			accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
+				c,
+				account.ID,
+				selection.WaitPlan.MaxConcurrency,
+				selection.WaitPlan.Timeout,
+				reqStream,
+				&streamStarted,
+			)
+			if err != nil {
+				if accountWaitRelease != nil {
+					accountWaitRelease()
+				}
+				log.Printf("Account concurrency acquire failed: %v", err)
+				h.handleConcurrencyError(c, err, "account", streamStarted)
+				return
+			}
+			if err := h.gatewayService.BindStickySession(c.Request.Context(), sessionKey, account.ID); err != nil {
+				log.Printf("Bind sticky session failed: %v", err)
+			}
 		}

 		// 转发请求 - 根据账号平台分流
@@ -256,11 +349,14 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		if account.Platform == service.PlatformAntigravity {
 			result, err = h.antigravityGatewayService.Forward(c.Request.Context(), c, account, body)
 		} else {
-			result, err = h.gatewayService.Forward(c.Request.Context(), c, account, body)
+			result, err = h.gatewayService.Forward(c.Request.Context(), c, account, parsedReq)
 		}
 		if accountReleaseFunc != nil {
 			accountReleaseFunc()
 		}
+		if accountWaitRelease != nil {
+			accountWaitRelease()
+		}
 		if err != nil {
 			var failoverErr *service.UpstreamFailoverError
 			if errors.As(err, &failoverErr) {
@@ -525,6 +621,10 @@ func (h *GatewayHandler) CountTokens(c *gin.Context) {
 	// 读取请求体
 	body, err := io.ReadAll(c.Request.Body)
 	if err != nil {
+		if maxErr, ok := extractMaxBytesError(err); ok {
+			h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
+			return
+		}
 		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
 		return
 	}
@@ -534,15 +634,18 @@ func (h *GatewayHandler) CountTokens(c *gin.Context) {
 		return
 	}

-	// 解析请求获取模型名
-	var req struct {
-		Model string `json:"model"`
-	}
-	if err := json.Unmarshal(body, &req); err != nil {
+	parsedReq, err := service.ParseGatewayRequest(body)
+	if err != nil {
 		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
 		return
 	}

+	// 验证 model 必填
+	if parsedReq.Model == "" {
+		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
+		return
+	}
+
 	// 获取订阅信息（可能为nil）
 	subscription, _ := middleware2.GetSubscriptionFromContext(c)

@@ -554,17 +657,17 @@ func (h *GatewayHandler) CountTokens(c *gin.Context) {
 	}

 	// 计算粘性会话 hash
-	sessionHash := h.gatewayService.GenerateSessionHash(body)
+	sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)

 	// 选择支持该模型的账号
-	account, err := h.gatewayService.SelectAccountForModel(c.Request.Context(), apiKey.GroupID, sessionHash, req.Model)
+	account, err := h.gatewayService.SelectAccountForModel(c.Request.Context(), apiKey.GroupID, sessionHash, parsedReq.Model)
 	if err != nil {
 		h.errorResponse(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error())
 		return
 	}

 	// 转发请求（不记录使用量）
-	if err := h.gatewayService.ForwardCountTokens(c.Request.Context(), c, account, body); err != nil {
+	if err := h.gatewayService.ForwardCountTokens(c.Request.Context(), c, account, parsedReq); err != nil {
 		log.Printf("Forward count_tokens request failed: %v", err)
 		// 错误响应已在 ForwardCountTokens 中处理
 		return
--- a/backend/internal/handler/gateway_helper.go
+++ b/backend/internal/handler/gateway_helper.go
@@ -3,6 +3,7 @@ package handler
 import (
 	"context"
 	"fmt"
+	"math/rand"
 	"net/http"
 	"time"

@@ -11,11 +12,28 @@ import (
 	"github.com/gin-gonic/gin"
 )

+// 并发槽位等待相关常量
+//
+// 性能优化说明：
+// 原实现使用固定间隔（100ms）轮询并发槽位，存在以下问题：
+// 1. 高并发时频繁轮询增加 Redis 压力
+// 2. 固定间隔可能导致多个请求同时重试（惊群效应）
+//
+// 新实现使用指数退避 + 抖动算法：
+// 1. 初始退避 100ms，每次乘以 1.5，最大 2s
+// 2. 添加 ±20% 的随机抖动，分散重试时间点
+// 3. 减少 Redis 压力，避免惊群效应
 const (
-	// maxConcurrencyWait is the maximum time to wait for a concurrency slot
+	// maxConcurrencyWait 等待并发槽位的最大时间
 	maxConcurrencyWait = 30 * time.Second
-	// pingInterval is the interval for sending ping events during slot wait
+	// pingInterval 流式响应等待时发送 ping 的间隔
 	pingInterval = 15 * time.Second
+	// initialBackoff 初始退避时间
+	initialBackoff = 100 * time.Millisecond
+	// backoffMultiplier 退避时间乘数（指数退避）
+	backoffMultiplier = 1.5
+	// maxBackoff 最大退避时间
+	maxBackoff = 2 * time.Second
 )

 // SSEPingFormat defines the format of SSE ping events for different platforms
@@ -65,6 +83,16 @@ func (h *ConcurrencyHelper) DecrementWaitCount(ctx context.Context, userID int64
 	h.concurrencyService.DecrementWaitCount(ctx, userID)
 }

+// IncrementAccountWaitCount increments the wait count for an account
+func (h *ConcurrencyHelper) IncrementAccountWaitCount(ctx context.Context, accountID int64, maxWait int) (bool, error) {
+	return h.concurrencyService.IncrementAccountWaitCount(ctx, accountID, maxWait)
+}
+
+// DecrementAccountWaitCount decrements the wait count for an account
+func (h *ConcurrencyHelper) DecrementAccountWaitCount(ctx context.Context, accountID int64) {
+	h.concurrencyService.DecrementAccountWaitCount(ctx, accountID)
+}
+
 // AcquireUserSlotWithWait acquires a user concurrency slot, waiting if necessary.
 // For streaming requests, sends ping events during the wait.
 // streamStarted is updated if streaming response has begun.
@@ -108,7 +136,12 @@ func (h *ConcurrencyHelper) AcquireAccountSlotWithWait(c *gin.Context, accountID
 // waitForSlotWithPing waits for a concurrency slot, sending ping events for streaming requests.
 // streamStarted pointer is updated when streaming begins (for proper error handling by caller).
 func (h *ConcurrencyHelper) waitForSlotWithPing(c *gin.Context, slotType string, id int64, maxConcurrency int, isStream bool, streamStarted *bool) (func(), error) {
-	ctx, cancel := context.WithTimeout(c.Request.Context(), maxConcurrencyWait)
+	return h.waitForSlotWithPingTimeout(c, slotType, id, maxConcurrency, maxConcurrencyWait, isStream, streamStarted)
+}
+
+// waitForSlotWithPingTimeout waits for a concurrency slot with a custom timeout.
+func (h *ConcurrencyHelper) waitForSlotWithPingTimeout(c *gin.Context, slotType string, id int64, maxConcurrency int, timeout time.Duration, isStream bool, streamStarted *bool) (func(), error) {
+	ctx, cancel := context.WithTimeout(c.Request.Context(), timeout)
 	defer cancel()

 	// Determine if ping is needed (streaming + ping format defined)
@@ -131,8 +164,10 @@ func (h *ConcurrencyHelper) waitForSlotWithPing(c *gin.Context, slotType string,
 		pingCh = pingTicker.C
 	}

-	pollTicker := time.NewTicker(100 * time.Millisecond)
-	defer pollTicker.Stop()
+	backoff := initialBackoff
+	timer := time.NewTimer(backoff)
+	defer timer.Stop()
+	rng := rand.New(rand.NewSource(time.Now().UnixNano()))

 	for {
 		select {
@@ -156,7 +191,7 @@ func (h *ConcurrencyHelper) waitForSlotWithPing(c *gin.Context, slotType string,
 			}
 			flusher.Flush()

-		case <-pollTicker.C:
+		case <-timer.C:
 			// Try to acquire slot
 			var result *service.AcquireResult
 			var err error
@@ -174,6 +209,40 @@ func (h *ConcurrencyHelper) waitForSlotWithPing(c *gin.Context, slotType string,
 			if result.Acquired {
 				return result.ReleaseFunc, nil
 			}
+			backoff = nextBackoff(backoff, rng)
+			timer.Reset(backoff)
 		}
 	}
 }
+
+// AcquireAccountSlotWithWaitTimeout acquires an account slot with a custom timeout (keeps SSE ping).
+func (h *ConcurrencyHelper) AcquireAccountSlotWithWaitTimeout(c *gin.Context, accountID int64, maxConcurrency int, timeout time.Duration, isStream bool, streamStarted *bool) (func(), error) {
+	return h.waitForSlotWithPingTimeout(c, "account", accountID, maxConcurrency, timeout, isStream, streamStarted)
+}
+
+// nextBackoff 计算下一次退避时间
+// 性能优化：使用指数退避 + 随机抖动，避免惊群效应
+// current: 当前退避时间
+// rng: 随机数生成器（可为 nil，此时不添加抖动）
+// 返回值：下一次退避时间（100ms ~ 2s 之间）
+func nextBackoff(current time.Duration, rng *rand.Rand) time.Duration {
+	// 指数退避：当前时间 * 1.5
+	next := time.Duration(float64(current) * backoffMultiplier)
+	if next > maxBackoff {
+		next = maxBackoff
+	}
+	if rng == nil {
+		return next
+	}
+	// 添加 ±20% 的随机抖动（jitter 范围 0.8 ~ 1.2）
+	// 抖动可以分散多个请求的重试时间点，避免同时冲击 Redis
+	jitter := 0.8 + rng.Float64()*0.4
+	jittered := time.Duration(float64(next) * jitter)
+	if jittered < initialBackoff {
+		return initialBackoff
+	}
+	if jittered > maxBackoff {
+		return maxBackoff
+	}
+	return jittered
+}
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -148,6 +148,10 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {

 	body, err := io.ReadAll(c.Request.Body)
 	if err != nil {
+		if maxErr, ok := extractMaxBytesError(err); ok {
+			googleError(c, http.StatusRequestEntityTooLarge, buildBodyTooLargeMessage(maxErr.Limit))
+			return
+		}
 		googleError(c, http.StatusBadRequest, "Failed to read request body")
 		return
 	}
@@ -191,14 +195,19 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 	}

 	// 3) select account (sticky session based on request body)
-	sessionHash := h.gatewayService.GenerateSessionHash(body)
+	parsedReq, _ := service.ParseGatewayRequest(body)
+	sessionHash := h.gatewayService.GenerateSessionHash(parsedReq)
+	sessionKey := sessionHash
+	if sessionHash != "" {
+		sessionKey = "gemini:" + sessionHash
+	}
 	const maxAccountSwitches = 3
 	switchCount := 0
 	failedAccountIDs := make(map[int64]struct{})
 	lastFailoverStatus := 0

 	for {
-		account, err := h.geminiCompatService.SelectAccountForModelWithExclusions(c.Request.Context(), apiKey.GroupID, sessionHash, modelName, failedAccountIDs)
+		selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs)
 		if err != nil {
 			if len(failedAccountIDs) == 0 {
 				googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
@@ -207,12 +216,48 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 			handleGeminiFailoverExhausted(c, lastFailoverStatus)
 			return
 		}
+		account := selection.Account

 		// 4) account concurrency slot
-		accountReleaseFunc, err := geminiConcurrency.AcquireAccountSlotWithWait(c, account.ID, account.Concurrency, stream, &streamStarted)
-		if err != nil {
-			googleError(c, http.StatusTooManyRequests, err.Error())
-			return
+		accountReleaseFunc := selection.ReleaseFunc
+		var accountWaitRelease func()
+		if !selection.Acquired {
+			if selection.WaitPlan == nil {
+				googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts")
+				return
+			}
+			canWait, err := geminiConcurrency.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
+			if err != nil {
+				log.Printf("Increment account wait count failed: %v", err)
+			} else if !canWait {
+				log.Printf("Account wait queue full: account=%d", account.ID)
+				googleError(c, http.StatusTooManyRequests, "Too many pending requests, please retry later")
+				return
+			} else {
+				// Only set release function if increment succeeded
+				accountWaitRelease = func() {
+					geminiConcurrency.DecrementAccountWaitCount(c.Request.Context(), account.ID)
+				}
+			}
+
+			accountReleaseFunc, err = geminiConcurrency.AcquireAccountSlotWithWaitTimeout(
+				c,
+				account.ID,
+				selection.WaitPlan.MaxConcurrency,
+				selection.WaitPlan.Timeout,
+				stream,
+				&streamStarted,
+			)
+			if err != nil {
+				if accountWaitRelease != nil {
+					accountWaitRelease()
+				}
+				googleError(c, http.StatusTooManyRequests, err.Error())
+				return
+			}
+			if err := h.gatewayService.BindStickySession(c.Request.Context(), sessionKey, account.ID); err != nil {
+				log.Printf("Bind sticky session failed: %v", err)
+			}
 		}

 		// 5) forward (根据平台分流)
@@ -225,6 +270,9 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 		if accountReleaseFunc != nil {
 			accountReleaseFunc()
 		}
+		if accountWaitRelease != nil {
+			accountWaitRelease()
+		}
 		if err != nil {
 			var failoverErr *service.UpstreamFailoverError
 			if errors.As(err, &failoverErr) {
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -56,6 +56,10 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 	// Read request body
 	body, err := io.ReadAll(c.Request.Body)
 	if err != nil {
+		if maxErr, ok := extractMaxBytesError(err); ok {
+			h.errorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
+			return
+		}
 		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
 		return
 	}
@@ -76,6 +80,12 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 	reqModel, _ := reqBody["model"].(string)
 	reqStream, _ := reqBody["stream"].(bool)

+	// 验证 model 必填
+	if reqModel == "" {
+		h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
+		return
+	}
+
 	// For non-Codex CLI requests, set default instructions
 	userAgent := c.GetHeader("User-Agent")
 	if !openai.IsCodexCLIRequest(userAgent) {
@@ -136,7 +146,7 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 	for {
 		// Select account supporting the requested model
 		log.Printf("[OpenAI Handler] Selecting account: groupID=%v model=%s", apiKey.GroupID, reqModel)
-		account, err := h.gatewayService.SelectAccountForModelWithExclusions(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, failedAccountIDs)
+		selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionHash, reqModel, failedAccountIDs)
 		if err != nil {
 			log.Printf("[OpenAI Handler] SelectAccount failed: %v", err)
 			if len(failedAccountIDs) == 0 {
@@ -146,14 +156,50 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 			h.handleFailoverExhausted(c, lastFailoverStatus, streamStarted)
 			return
 		}
+		account := selection.Account
 		log.Printf("[OpenAI Handler] Selected account: id=%d name=%s", account.ID, account.Name)

 		// 3. Acquire account concurrency slot
-		accountReleaseFunc, err := h.concurrencyHelper.AcquireAccountSlotWithWait(c, account.ID, account.Concurrency, reqStream, &streamStarted)
-		if err != nil {
-			log.Printf("Account concurrency acquire failed: %v", err)
-			h.handleConcurrencyError(c, err, "account", streamStarted)
-			return
+		accountReleaseFunc := selection.ReleaseFunc
+		var accountWaitRelease func()
+		if !selection.Acquired {
+			if selection.WaitPlan == nil {
+				h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
+				return
+			}
+			canWait, err := h.concurrencyHelper.IncrementAccountWaitCount(c.Request.Context(), account.ID, selection.WaitPlan.MaxWaiting)
+			if err != nil {
+				log.Printf("Increment account wait count failed: %v", err)
+			} else if !canWait {
+				log.Printf("Account wait queue full: account=%d", account.ID)
+				h.handleStreamingAwareError(c, http.StatusTooManyRequests, "rate_limit_error", "Too many pending requests, please retry later", streamStarted)
+				return
+			} else {
+				// Only set release function if increment succeeded
+				accountWaitRelease = func() {
+					h.concurrencyHelper.DecrementAccountWaitCount(c.Request.Context(), account.ID)
+				}
+			}
+
+			accountReleaseFunc, err = h.concurrencyHelper.AcquireAccountSlotWithWaitTimeout(
+				c,
+				account.ID,
+				selection.WaitPlan.MaxConcurrency,
+				selection.WaitPlan.Timeout,
+				reqStream,
+				&streamStarted,
+			)
+			if err != nil {
+				if accountWaitRelease != nil {
+					accountWaitRelease()
+				}
+				log.Printf("Account concurrency acquire failed: %v", err)
+				h.handleConcurrencyError(c, err, "account", streamStarted)
+				return
+			}
+			if err := h.gatewayService.BindStickySession(c.Request.Context(), sessionHash, account.ID); err != nil {
+				log.Printf("Bind sticky session failed: %v", err)
+			}
 		}

 		// Forward request
@@ -161,6 +207,9 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
 		if accountReleaseFunc != nil {
 			accountReleaseFunc()
 		}
+		if accountWaitRelease != nil {
+			accountWaitRelease()
+		}
 		if err != nil {
 			var failoverErr *service.UpstreamFailoverError
 			if errors.As(err, &failoverErr) {
--- a/backend/internal/handler/request_body_limit.go
+++ b/backend/internal/handler/request_body_limit.go
@@ -0,0 +1,27 @@
+package handler
+
+import (
+	"errors"
+	"fmt"
+	"net/http"
+)
+
+func extractMaxBytesError(err error) (*http.MaxBytesError, bool) {
+	var maxErr *http.MaxBytesError
+	if errors.As(err, &maxErr) {
+		return maxErr, true
+	}
+	return nil, false
+}
+
+func formatBodyLimit(limit int64) string {
+	const mb = 1024 * 1024
+	if limit >= mb {
+		return fmt.Sprintf("%dMB", limit/mb)
+	}
+	return fmt.Sprintf("%dB", limit)
+}
+
+func buildBodyTooLargeMessage(limit int64) string {
+	return fmt.Sprintf("Request body too large, limit is %s", formatBodyLimit(limit))
+}
--- a/backend/internal/handler/request_body_limit_test.go
+++ b/backend/internal/handler/request_body_limit_test.go
@@ -0,0 +1,45 @@
+package handler
+
+import (
+	"bytes"
+	"io"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/Wei-Shaw/sub2api/internal/server/middleware"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+)
+
+func TestRequestBodyLimitTooLarge(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	limit := int64(16)
+	router := gin.New()
+	router.Use(middleware.RequestBodyLimit(limit))
+	router.POST("/test", func(c *gin.Context) {
+		_, err := io.ReadAll(c.Request.Body)
+		if err != nil {
+			if maxErr, ok := extractMaxBytesError(err); ok {
+				c.JSON(http.StatusRequestEntityTooLarge, gin.H{
+					"error": buildBodyTooLargeMessage(maxErr.Limit),
+				})
+				return
+			}
+			c.JSON(http.StatusBadRequest, gin.H{
+				"error": "read_failed",
+			})
+			return
+		}
+		c.JSON(http.StatusOK, gin.H{"ok": true})
+	})
+
+	payload := bytes.Repeat([]byte("a"), int(limit+1))
+	req := httptest.NewRequest(http.MethodPost, "/test", bytes.NewReader(payload))
+	recorder := httptest.NewRecorder()
+	router.ServeHTTP(recorder, req)
+
+	require.Equal(t, http.StatusRequestEntityTooLarge, recorder.Code)
+	require.Contains(t, recorder.Body.String(), buildBodyTooLargeMessage(limit))
+}