feat(gateway): 双模式用户消息队列 — 串行队列 + 软性限速

新增 UMQ (User Message Queue) 双模式支持:
- serialize: 账号级分布式串行锁 + RPM 自适应延迟(严格限流)
- throttle: 仅 RPM 自适应前置延迟,不阻塞并发(软性限速)

后端:
- config: 新增 Mode 字段,保留 Enabled 向后兼容
- service: 新增 UserMessageQueueService(Lua 锁/延迟算法/清理 worker)
- repository: 新增 UserMsgQueueCache(Redis Lua acquire/release/force-release)
- handler: 新增 UserMsgQueueHelper(SSE ping + 等待循环 + throttle)
- gateway: 按 mode 分支集成 serialize/throttle 逻辑
- lint: 修复 gofmt rewrite rules、errcheck 类型断言、staticcheck QF1012

前端:
- 三态选择器 UI(关闭/软性限速/串行队列)替代 toggle 开关
- BulkEdit 支持 null 语义(不修改)
- i18n 中英文文案

通过 6 轮专家评审(42 次 review)、golangci-lint、单元测试、集成测试。
This commit is contained in:
QTom
2026-03-03 01:02:39 +08:00
parent 7abec1888f
commit a9285b8a94
21 changed files with 1099 additions and 15 deletions

View File

@@ -216,6 +216,10 @@ func AccountFromServiceShallow(a *service.Account) *Account {
buffer := a.GetRPMStickyBuffer()
out.RPMStickyBuffer = &buffer
}
// 用户消息队列模式
if mode := a.GetUserMsgQueueMode(); mode != "" {
out.UserMsgQueueMode = &mode
}
// TLS指纹伪装开关
if a.IsTLSFingerprintEnabled() {
enabled := true

View File

@@ -155,9 +155,10 @@ type Account struct {
// RPM 限制(仅 Anthropic OAuth/SetupToken 账号有效)
// 从 extra 字段提取,方便前端显示和编辑
BaseRPM *int `json:"base_rpm,omitempty"`
RPMStrategy *string `json:"rpm_strategy,omitempty"`
RPMStickyBuffer *int `json:"rpm_sticky_buffer,omitempty"`
BaseRPM *int `json:"base_rpm,omitempty"`
RPMStrategy *string `json:"rpm_strategy,omitempty"`
RPMStickyBuffer *int `json:"rpm_sticky_buffer,omitempty"`
UserMsgQueueMode *string `json:"user_msg_queue_mode,omitempty"`
// TLS指纹伪装仅 Anthropic OAuth/SetupToken 账号有效)
// 从 extra 字段提取,方便前端显示和编辑

View File

@@ -45,6 +45,7 @@ type GatewayHandler struct {
usageRecordWorkerPool *service.UsageRecordWorkerPool
errorPassthroughService *service.ErrorPassthroughService
concurrencyHelper *ConcurrencyHelper
userMsgQueueHelper *UserMsgQueueHelper
maxAccountSwitches int
maxAccountSwitchesGemini int
cfg *config.Config
@@ -63,6 +64,7 @@ func NewGatewayHandler(
apiKeyService *service.APIKeyService,
usageRecordWorkerPool *service.UsageRecordWorkerPool,
errorPassthroughService *service.ErrorPassthroughService,
userMsgQueueService *service.UserMessageQueueService,
cfg *config.Config,
settingService *service.SettingService,
) *GatewayHandler {
@@ -78,6 +80,13 @@ func NewGatewayHandler(
maxAccountSwitchesGemini = cfg.Gateway.MaxAccountSwitchesGemini
}
}
// 初始化用户消息串行队列 helper
var umqHelper *UserMsgQueueHelper
if userMsgQueueService != nil && cfg != nil {
umqHelper = NewUserMsgQueueHelper(userMsgQueueService, SSEPingFormatClaude, pingInterval)
}
return &GatewayHandler{
gatewayService: gatewayService,
geminiCompatService: geminiCompatService,
@@ -89,6 +98,7 @@ func NewGatewayHandler(
usageRecordWorkerPool: usageRecordWorkerPool,
errorPassthroughService: errorPassthroughService,
concurrencyHelper: NewConcurrencyHelper(concurrencyService, SSEPingFormatClaude, pingInterval),
userMsgQueueHelper: umqHelper,
maxAccountSwitches: maxAccountSwitches,
maxAccountSwitchesGemini: maxAccountSwitchesGemini,
cfg: cfg,
@@ -566,6 +576,58 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
// 账号槽位/等待计数需要在超时或断开时安全回收
accountReleaseFunc = wrapReleaseOnDone(c.Request.Context(), accountReleaseFunc)
// ===== 用户消息串行队列 START =====
var queueRelease func()
umqMode := h.getUserMsgQueueMode(account, parsedReq)
switch umqMode {
case config.UMQModeSerialize:
// 串行模式:获取锁 + RPM 延迟 + 释放(当前行为不变)
baseRPM := account.GetBaseRPM()
release, qErr := h.userMsgQueueHelper.AcquireWithWait(
c, account.ID, baseRPM, reqStream, &streamStarted,
h.cfg.Gateway.UserMessageQueue.WaitTimeout(),
reqLog,
)
if qErr != nil {
// fail-open: 记录 warn不阻止请求
reqLog.Warn("gateway.umq_acquire_failed",
zap.Int64("account_id", account.ID),
zap.Error(qErr),
)
} else {
queueRelease = release
}
case config.UMQModeThrottle:
// 软性限速:仅施加 RPM 自适应延迟,不阻塞并发
baseRPM := account.GetBaseRPM()
if tErr := h.userMsgQueueHelper.ThrottleWithPing(
c, account.ID, baseRPM, reqStream, &streamStarted,
h.cfg.Gateway.UserMessageQueue.WaitTimeout(),
reqLog,
); tErr != nil {
reqLog.Warn("gateway.umq_throttle_failed",
zap.Int64("account_id", account.ID),
zap.Error(tErr),
)
}
default:
if umqMode != "" {
reqLog.Warn("gateway.umq_unknown_mode",
zap.String("mode", umqMode),
zap.Int64("account_id", account.ID),
)
}
}
// 用 wrapReleaseOnDone 确保 context 取消时自动释放(仅 serialize 模式有 queueRelease
queueRelease = wrapReleaseOnDone(c.Request.Context(), queueRelease)
// 注入回调到 ParsedRequest使用外层 wrapper 以便提前清理 AfterFunc
parsedReq.OnUpstreamAccepted = queueRelease
// ===== 用户消息串行队列 END =====
// 转发请求 - 根据账号平台分流
var result *service.ForwardResult
requestCtx := c.Request.Context()
@@ -577,6 +639,14 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
} else {
result, err = h.gatewayService.Forward(requestCtx, c, account, parsedReq)
}
// 兜底释放串行锁(正常情况已通过回调提前释放)
if queueRelease != nil {
queueRelease()
}
// 清理回调引用,防止 failover 重试时旧回调被错误调用
parsedReq.OnUpstreamAccepted = nil
if accountReleaseFunc != nil {
accountReleaseFunc()
}
@@ -1431,3 +1501,24 @@ func (h *GatewayHandler) submitUsageRecordTask(task service.UsageRecordTask) {
}()
task(ctx)
}
// getUserMsgQueueMode 获取当前请求的 UMQ 模式
// 返回 "serialize" | "throttle" | ""
func (h *GatewayHandler) getUserMsgQueueMode(account *service.Account, parsed *service.ParsedRequest) string {
if h.userMsgQueueHelper == nil {
return ""
}
// 仅适用于 Anthropic OAuth/SetupToken 账号
if !account.IsAnthropicOAuthOrSetupToken() {
return ""
}
if !service.IsRealUserMessage(parsed) {
return ""
}
// 账号级模式优先fallback 到全局配置
mode := account.GetUserMsgQueueMode()
if mode == "" {
mode = h.cfg.Gateway.UserMessageQueue.GetEffectiveMode()
}
return mode
}

View File

@@ -0,0 +1,237 @@
package handler
import (
"context"
"fmt"
"net/http"
"sync"
"time"
"github.com/Wei-Shaw/sub2api/internal/service"
"github.com/gin-gonic/gin"
"go.uber.org/zap"
)
// UserMsgQueueHelper 用户消息串行队列 Handler 层辅助
// 复用 ConcurrencyHelper 的退避 + SSE ping 模式
type UserMsgQueueHelper struct {
queueService *service.UserMessageQueueService
pingFormat SSEPingFormat
pingInterval time.Duration
}
// NewUserMsgQueueHelper 创建用户消息串行队列辅助
func NewUserMsgQueueHelper(
queueService *service.UserMessageQueueService,
pingFormat SSEPingFormat,
pingInterval time.Duration,
) *UserMsgQueueHelper {
if pingInterval <= 0 {
pingInterval = defaultPingInterval
}
return &UserMsgQueueHelper{
queueService: queueService,
pingFormat: pingFormat,
pingInterval: pingInterval,
}
}
// AcquireWithWait 等待获取串行锁,流式请求期间发送 SSE ping
// 返回的 releaseFunc 内部使用 sync.Once确保只执行一次释放
func (h *UserMsgQueueHelper) AcquireWithWait(
c *gin.Context,
accountID int64,
baseRPM int,
isStream bool,
streamStarted *bool,
timeout time.Duration,
reqLog *zap.Logger,
) (releaseFunc func(), err error) {
ctx, cancel := context.WithTimeout(c.Request.Context(), timeout)
defer cancel()
// 先尝试立即获取
result, err := h.queueService.TryAcquire(ctx, accountID)
if err != nil {
return nil, err // fail-open 已在 service 层处理
}
if result.Acquired {
// 获取成功,执行 RPM 自适应延迟
if err := h.queueService.EnforceDelay(ctx, accountID, baseRPM); err != nil {
if ctx.Err() != nil {
// 延迟期间 context 取消,释放锁
bgCtx, bgCancel := context.WithTimeout(context.Background(), 5*time.Second)
_ = h.queueService.Release(bgCtx, accountID, result.RequestID)
bgCancel()
return nil, ctx.Err()
}
}
reqLog.Debug("gateway.umq_lock_acquired", zap.Int64("account_id", accountID))
return h.makeReleaseFunc(accountID, result.RequestID, reqLog), nil
}
// 需要等待:指数退避轮询
return h.waitForLockWithPing(c, ctx, accountID, baseRPM, isStream, streamStarted, reqLog)
}
// waitForLockWithPing 等待获取锁,流式请求期间发送 SSE ping
func (h *UserMsgQueueHelper) waitForLockWithPing(
c *gin.Context,
ctx context.Context,
accountID int64,
baseRPM int,
isStream bool,
streamStarted *bool,
reqLog *zap.Logger,
) (func(), error) {
needPing := isStream && h.pingFormat != ""
var flusher http.Flusher
if needPing {
var ok bool
flusher, ok = c.Writer.(http.Flusher)
if !ok {
needPing = false
}
}
var pingCh <-chan time.Time
if needPing {
pingTicker := time.NewTicker(h.pingInterval)
defer pingTicker.Stop()
pingCh = pingTicker.C
}
backoff := initialBackoff
timer := time.NewTimer(backoff)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return nil, fmt.Errorf("umq wait timeout for account %d", accountID)
case <-pingCh:
if !*streamStarted {
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
c.Header("X-Accel-Buffering", "no")
*streamStarted = true
}
if _, err := fmt.Fprint(c.Writer, string(h.pingFormat)); err != nil {
return nil, err
}
flusher.Flush()
case <-timer.C:
result, err := h.queueService.TryAcquire(ctx, accountID)
if err != nil {
return nil, err
}
if result.Acquired {
// 获取成功,执行 RPM 自适应延迟
if delayErr := h.queueService.EnforceDelay(ctx, accountID, baseRPM); delayErr != nil {
if ctx.Err() != nil {
bgCtx, bgCancel := context.WithTimeout(context.Background(), 5*time.Second)
_ = h.queueService.Release(bgCtx, accountID, result.RequestID)
bgCancel()
return nil, ctx.Err()
}
}
reqLog.Debug("gateway.umq_lock_acquired", zap.Int64("account_id", accountID))
return h.makeReleaseFunc(accountID, result.RequestID, reqLog), nil
}
backoff = nextBackoff(backoff)
timer.Reset(backoff)
}
}
}
// makeReleaseFunc 创建锁释放函数(使用 sync.Once 确保只执行一次)
func (h *UserMsgQueueHelper) makeReleaseFunc(accountID int64, requestID string, reqLog *zap.Logger) func() {
var once sync.Once
return func() {
once.Do(func() {
bgCtx, bgCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer bgCancel()
if err := h.queueService.Release(bgCtx, accountID, requestID); err != nil {
reqLog.Warn("gateway.umq_release_failed",
zap.Int64("account_id", accountID),
zap.Error(err),
)
} else {
reqLog.Debug("gateway.umq_lock_released", zap.Int64("account_id", accountID))
}
})
}
}
// ThrottleWithPing 软性限速模式:施加 RPM 自适应延迟,流式期间发送 SSE ping
// 不获取串行锁,不阻塞并发。返回后即可转发请求。
func (h *UserMsgQueueHelper) ThrottleWithPing(
c *gin.Context,
accountID int64,
baseRPM int,
isStream bool,
streamStarted *bool,
timeout time.Duration,
reqLog *zap.Logger,
) error {
ctx, cancel := context.WithTimeout(c.Request.Context(), timeout)
defer cancel()
delay := h.queueService.CalculateRPMAwareDelay(ctx, accountID, baseRPM)
if delay <= 0 {
return nil
}
reqLog.Debug("gateway.umq_throttle_delay",
zap.Int64("account_id", accountID),
zap.Duration("delay", delay),
)
// 延迟期间发送 SSE ping复用 waitForLockWithPing 的 ping 逻辑)
needPing := isStream && h.pingFormat != ""
var flusher http.Flusher
if needPing {
flusher, _ = c.Writer.(http.Flusher)
if flusher == nil {
needPing = false
}
}
var pingCh <-chan time.Time
if needPing {
pingTicker := time.NewTicker(h.pingInterval)
defer pingTicker.Stop()
pingCh = pingTicker.C
}
timer := time.NewTimer(delay)
defer timer.Stop()
for {
select {
case <-ctx.Done():
return ctx.Err()
case <-pingCh:
// SSE ping 逻辑(与 waitForLockWithPing 一致)
if !*streamStarted {
c.Header("Content-Type", "text/event-stream")
c.Header("Cache-Control", "no-cache")
c.Header("Connection", "keep-alive")
c.Header("X-Accel-Buffering", "no")
*streamStarted = true
}
if _, err := fmt.Fprint(c.Writer, string(h.pingFormat)); err != nil {
return err
}
flusher.Flush()
case <-timer.C:
return nil
}
}
}