Merge pull request #679 from DaydreamCoding/feat/account-rpm-limit

feat: 添加账号级别 RPM(每分钟请求数)限流功能
This commit is contained in:
Wesley Liddick
2026-02-28 22:37:10 +08:00
committed by GitHub
27 changed files with 1174 additions and 31 deletions

View File

@@ -138,7 +138,8 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
accountTestService := service.NewAccountTestService(accountRepository, geminiTokenProvider, antigravityGatewayService, httpUpstream, configConfig)
crsSyncService := service.NewCRSSyncService(accountRepository, proxyRepository, oAuthService, openAIOAuthService, geminiOAuthService, configConfig)
sessionLimitCache := repository.ProvideSessionLimitCache(redisClient, configConfig)
accountHandler := admin.NewAccountHandler(adminService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, rateLimitService, accountUsageService, accountTestService, concurrencyService, crsSyncService, sessionLimitCache, compositeTokenCacheInvalidator)
rpmCache := repository.NewRPMCache(redisClient)
accountHandler := admin.NewAccountHandler(adminService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, rateLimitService, accountUsageService, accountTestService, concurrencyService, crsSyncService, sessionLimitCache, rpmCache, compositeTokenCacheInvalidator)
adminAnnouncementHandler := admin.NewAnnouncementHandler(announcementService)
dataManagementService := service.NewDataManagementService()
dataManagementHandler := admin.NewDataManagementHandler(dataManagementService)
@@ -160,7 +161,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
deferredService := service.ProvideDeferredService(accountRepository, timingWheelService)
claudeTokenProvider := service.NewClaudeTokenProvider(accountRepository, geminiTokenCache, oAuthService)
digestSessionStore := service.NewDigestSessionStore()
gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider, sessionLimitCache, digestSessionStore)
gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, userRepository, userSubscriptionRepository, userGroupRateRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider, sessionLimitCache, rpmCache, digestSessionStore)
openAITokenProvider := service.NewOpenAITokenProvider(accountRepository, geminiTokenCache, openAIOAuthService)
openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService, openAITokenProvider)
geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)

View File

@@ -64,6 +64,7 @@ func setupAccountDataRouter() (*gin.Engine, *stubAdminService) {
nil,
nil,
nil,
nil,
)
router.GET("/api/v1/admin/accounts/data", h.ExportData)

View File

@@ -53,6 +53,7 @@ type AccountHandler struct {
concurrencyService *service.ConcurrencyService
crsSyncService *service.CRSSyncService
sessionLimitCache service.SessionLimitCache
rpmCache service.RPMCache
tokenCacheInvalidator service.TokenCacheInvalidator
}
@@ -69,6 +70,7 @@ func NewAccountHandler(
concurrencyService *service.ConcurrencyService,
crsSyncService *service.CRSSyncService,
sessionLimitCache service.SessionLimitCache,
rpmCache service.RPMCache,
tokenCacheInvalidator service.TokenCacheInvalidator,
) *AccountHandler {
return &AccountHandler{
@@ -83,6 +85,7 @@ func NewAccountHandler(
concurrencyService: concurrencyService,
crsSyncService: crsSyncService,
sessionLimitCache: sessionLimitCache,
rpmCache: rpmCache,
tokenCacheInvalidator: tokenCacheInvalidator,
}
}
@@ -154,6 +157,7 @@ type AccountWithConcurrency struct {
// 以下字段仅对 Anthropic OAuth/SetupToken 账号有效,且仅在启用相应功能时返回
CurrentWindowCost *float64 `json:"current_window_cost,omitempty"` // 当前窗口费用
ActiveSessions *int `json:"active_sessions,omitempty"` // 当前活跃会话数
CurrentRPM *int `json:"current_rpm,omitempty"` // 当前分钟 RPM 计数
}
func (h *AccountHandler) buildAccountResponseWithRuntime(ctx context.Context, account *service.Account) AccountWithConcurrency {
@@ -189,6 +193,12 @@ func (h *AccountHandler) buildAccountResponseWithRuntime(ctx context.Context, ac
}
}
}
if h.rpmCache != nil && account.GetBaseRPM() > 0 {
if rpm, err := h.rpmCache.GetRPM(ctx, account.ID); err == nil {
item.CurrentRPM = &rpm
}
}
}
return item
@@ -231,9 +241,10 @@ func (h *AccountHandler) List(c *gin.Context) {
concurrencyCounts = make(map[int64]int)
}
// 识别需要查询窗口费用会话数的账号Anthropic OAuth/SetupToken 且启用了相应功能)
// 识别需要查询窗口费用会话数和 RPM 的账号Anthropic OAuth/SetupToken 且启用了相应功能)
windowCostAccountIDs := make([]int64, 0)
sessionLimitAccountIDs := make([]int64, 0)
rpmAccountIDs := make([]int64, 0)
sessionIdleTimeouts := make(map[int64]time.Duration) // 各账号的会话空闲超时配置
for i := range accounts {
acc := &accounts[i]
@@ -245,12 +256,24 @@ func (h *AccountHandler) List(c *gin.Context) {
sessionLimitAccountIDs = append(sessionLimitAccountIDs, acc.ID)
sessionIdleTimeouts[acc.ID] = time.Duration(acc.GetSessionIdleTimeoutMinutes()) * time.Minute
}
if acc.GetBaseRPM() > 0 {
rpmAccountIDs = append(rpmAccountIDs, acc.ID)
}
}
}
// 并行获取窗口费用活跃会话数
// 并行获取窗口费用活跃会话数和 RPM 计数
var windowCosts map[int64]float64
var activeSessions map[int64]int
var rpmCounts map[int64]int
// 获取 RPM 计数(批量查询)
if len(rpmAccountIDs) > 0 && h.rpmCache != nil {
rpmCounts, _ = h.rpmCache.GetRPMBatch(c.Request.Context(), rpmAccountIDs)
if rpmCounts == nil {
rpmCounts = make(map[int64]int)
}
}
// 获取活跃会话数(批量查询,传入各账号的 idleTimeout 配置)
if len(sessionLimitAccountIDs) > 0 && h.sessionLimitCache != nil {
@@ -311,6 +334,13 @@ func (h *AccountHandler) List(c *gin.Context) {
}
}
// 添加 RPM 计数(仅当启用时)
if rpmCounts != nil {
if rpm, ok := rpmCounts[acc.ID]; ok {
item.CurrentRPM = &rpm
}
}
result[i] = item
}
@@ -453,6 +483,8 @@ func (h *AccountHandler) Create(c *gin.Context) {
response.BadRequest(c, "rate_multiplier must be >= 0")
return
}
// base_rpm 输入校验:负值归零,超过 10000 截断
sanitizeExtraBaseRPM(req.Extra)
// 确定是否跳过混合渠道检查
skipCheck := req.ConfirmMixedChannelRisk != nil && *req.ConfirmMixedChannelRisk
@@ -522,6 +554,8 @@ func (h *AccountHandler) Update(c *gin.Context) {
response.BadRequest(c, "rate_multiplier must be >= 0")
return
}
// base_rpm 输入校验:负值归零,超过 10000 截断
sanitizeExtraBaseRPM(req.Extra)
// 确定是否跳过混合渠道检查
skipCheck := req.ConfirmMixedChannelRisk != nil && *req.ConfirmMixedChannelRisk
@@ -904,6 +938,9 @@ func (h *AccountHandler) BatchCreate(c *gin.Context) {
continue
}
// base_rpm 输入校验:负值归零,超过 10000 截断
sanitizeExtraBaseRPM(item.Extra)
skipCheck := item.ConfirmMixedChannelRisk != nil && *item.ConfirmMixedChannelRisk
account, err := h.adminService.CreateAccount(ctx, &service.CreateAccountInput{
@@ -1048,6 +1085,8 @@ func (h *AccountHandler) BulkUpdate(c *gin.Context) {
response.BadRequest(c, "rate_multiplier must be >= 0")
return
}
// base_rpm 输入校验:负值归零,超过 10000 截断
sanitizeExtraBaseRPM(req.Extra)
// 确定是否跳过混合渠道检查
skipCheck := req.ConfirmMixedChannelRisk != nil && *req.ConfirmMixedChannelRisk
@@ -1706,3 +1745,22 @@ func (h *AccountHandler) BatchRefreshTier(c *gin.Context) {
func (h *AccountHandler) GetAntigravityDefaultModelMapping(c *gin.Context) {
response.Success(c, domain.DefaultAntigravityModelMapping)
}
// sanitizeExtraBaseRPM 对 extra map 中的 base_rpm 值进行范围校验和归一化。
// 负值归零,超过 10000 截断为 10000。extra 为 nil 或不含 base_rpm 时无操作。
func sanitizeExtraBaseRPM(extra map[string]any) {
if extra == nil {
return
}
raw, ok := extra["base_rpm"]
if !ok {
return
}
v := service.ParseExtraInt(raw)
if v < 0 {
v = 0
} else if v > 10000 {
v = 10000
}
extra["base_rpm"] = v
}

View File

@@ -15,7 +15,7 @@ import (
func setupAccountMixedChannelRouter(adminSvc *stubAdminService) *gin.Engine {
gin.SetMode(gin.TestMode)
router := gin.New()
accountHandler := NewAccountHandler(adminSvc, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
accountHandler := NewAccountHandler(adminSvc, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
router.POST("/api/v1/admin/accounts/check-mixed-channel", accountHandler.CheckMixedChannel)
router.POST("/api/v1/admin/accounts", accountHandler.Create)
router.PUT("/api/v1/admin/accounts/:id", accountHandler.Update)

View File

@@ -28,6 +28,7 @@ func TestAccountHandler_Create_AnthropicAPIKeyPassthroughExtraForwarded(t *testi
nil,
nil,
nil,
nil,
)
router := gin.New()

View File

@@ -36,7 +36,7 @@ func (f *failingAdminService) UpdateAccount(ctx context.Context, id int64, input
func setupAccountHandlerWithService(adminSvc service.AdminService) (*gin.Engine, *AccountHandler) {
gin.SetMode(gin.TestMode)
router := gin.New()
handler := NewAccountHandler(adminSvc, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
handler := NewAccountHandler(adminSvc, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
router.POST("/api/v1/admin/accounts/batch-update-credentials", handler.BatchUpdateCredentials)
return router, handler
}

View File

@@ -209,6 +209,13 @@ func AccountFromServiceShallow(a *service.Account) *Account {
if idleTimeout := a.GetSessionIdleTimeoutMinutes(); idleTimeout > 0 {
out.SessionIdleTimeoutMin = &idleTimeout
}
if rpm := a.GetBaseRPM(); rpm > 0 {
out.BaseRPM = &rpm
strategy := a.GetRPMStrategy()
out.RPMStrategy = &strategy
buffer := a.GetRPMStickyBuffer()
out.RPMStickyBuffer = &buffer
}
// TLS指纹伪装开关
if a.IsTLSFingerprintEnabled() {
enabled := true

View File

@@ -153,6 +153,12 @@ type Account struct {
MaxSessions *int `json:"max_sessions,omitempty"`
SessionIdleTimeoutMin *int `json:"session_idle_timeout_minutes,omitempty"`
// RPM 限制(仅 Anthropic OAuth/SetupToken 账号有效)
// 从 extra 字段提取,方便前端显示和编辑
BaseRPM *int `json:"base_rpm,omitempty"`
RPMStrategy *string `json:"rpm_strategy,omitempty"`
RPMStickyBuffer *int `json:"rpm_sticky_buffer,omitempty"`
// TLS指纹伪装仅 Anthropic OAuth/SetupToken 账号有效)
// 从 extra 字段提取,方便前端显示和编辑
EnableTLSFingerprint *bool `json:"enable_tls_fingerprint,omitempty"`

View File

@@ -403,6 +403,15 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
return
}
// RPM 计数递增Forward 成功后)
// 注意TOCTOU 竞态是已知且可接受的设计权衡,与 WindowCost 一致的 soft-limit 模式。
// 在高并发下可能短暂超出 RPM 限制,但不会导致请求失败。
if account.IsAnthropicOAuthOrSetupToken() && account.GetBaseRPM() > 0 {
if err := h.gatewayService.IncrementAccountRPM(c.Request.Context(), account.ID); err != nil {
reqLog.Warn("gateway.rpm_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
}
}
// 捕获请求信息(用于异步记录,避免在 goroutine 中访问 gin.Context
userAgent := c.GetHeader("User-Agent")
clientIP := ip.GetClientIP(c)
@@ -595,7 +604,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
h.handleStreamingAwareError(c, status, code, message, streamStarted)
return
}
// 兜底重试按直接请求兜底分组处理:清除强制平台,允许按分组平台调度
// 兜底重试按"直接请求兜底分组"处理:清除强制平台,允许按分组平台调度
ctx := context.WithValue(c.Request.Context(), ctxkey.ForcePlatform, "")
c.Request = c.Request.WithContext(ctx)
currentAPIKey = fallbackAPIKey
@@ -629,6 +638,15 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
return
}
// RPM 计数递增Forward 成功后)
// 注意TOCTOU 竞态是已知且可接受的设计权衡,与 WindowCost 一致的 soft-limit 模式。
// 在高并发下可能短暂超出 RPM 限制,但不会导致请求失败。
if account.IsAnthropicOAuthOrSetupToken() && account.GetBaseRPM() > 0 {
if err := h.gatewayService.IncrementAccountRPM(c.Request.Context(), account.ID); err != nil {
reqLog.Warn("gateway.rpm_increment_failed", zap.Int64("account_id", account.ID), zap.Error(err))
}
}
// 捕获请求信息(用于异步记录,避免在 goroutine 中访问 gin.Context
userAgent := c.GetHeader("User-Agent")
clientIP := ip.GetClientIP(c)

View File

@@ -153,6 +153,7 @@ func newTestGatewayHandler(t *testing.T, group *service.Group, accounts []*servi
nil, // deferredService
nil, // claudeTokenProvider
nil, // sessionLimitCache
nil, // rpmCache
nil, // digestStore
)

View File

@@ -2184,7 +2184,7 @@ func (s *stubSoraClientForHandler) GetVideoTask(_ context.Context, _ *service.Ac
func newMinimalGatewayService(accountRepo service.AccountRepository) *service.GatewayService {
return service.NewGatewayService(
accountRepo, nil, nil, nil, nil, nil, nil, nil,
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
)
}

View File

@@ -426,7 +426,8 @@ func TestSoraGatewayHandler_ChatCompletions(t *testing.T) {
deferredService,
nil,
testutil.StubSessionLimitCache{},
nil,
nil, // rpmCache
nil, // digestStore
)
soraClient := &stubSoraClient{imageURLs: []string{"https://example.com/a.png"}}

View File

@@ -0,0 +1,141 @@
package repository
import (
"context"
"errors"
"fmt"
"strconv"
"time"
"github.com/Wei-Shaw/sub2api/internal/service"
"github.com/redis/go-redis/v9"
)
// RPM 计数器缓存常量定义
//
// 设计说明:
// 使用 Redis 简单计数器跟踪每个账号每分钟的请求数:
// - Key: rpm:{accountID}:{minuteTimestamp}
// - Value: 当前分钟内的请求计数
// - TTL: 120 秒(覆盖当前分钟 + 一定冗余)
//
// 使用 TxPipelineMULTI/EXEC执行 INCR + EXPIRE保证原子性且兼容 Redis Cluster。
// 通过 rdb.Time() 获取服务端时间,避免多实例时钟不同步。
//
// 设计决策:
// - TxPipeline vs PipelinePipeline 仅合并发送但不保证原子TxPipeline 使用 MULTI/EXEC 事务保证原子执行。
// - rdb.Time() 单独调用Pipeline/TxPipeline 中无法引用前一命令的结果,因此 TIME 必须单独调用2 RTT
// Lua 脚本可以做到 1 RTT但在 Redis Cluster 中动态拼接 key 存在 CROSSSLOT 风险,选择安全性优先。
const (
// RPM 计数器键前缀
// 格式: rpm:{accountID}:{minuteTimestamp}
rpmKeyPrefix = "rpm:"
// RPM 计数器 TTL120 秒,覆盖当前分钟窗口 + 冗余)
rpmKeyTTL = 120 * time.Second
)
// RPMCacheImpl RPM 计数器缓存 Redis 实现
type RPMCacheImpl struct {
rdb *redis.Client
}
// NewRPMCache 创建 RPM 计数器缓存
func NewRPMCache(rdb *redis.Client) service.RPMCache {
return &RPMCacheImpl{rdb: rdb}
}
// currentMinuteKey 获取当前分钟的完整 Redis key
// 使用 rdb.Time() 获取 Redis 服务端时间,避免多实例时钟偏差
func (c *RPMCacheImpl) currentMinuteKey(ctx context.Context, accountID int64) (string, error) {
serverTime, err := c.rdb.Time(ctx).Result()
if err != nil {
return "", fmt.Errorf("redis TIME: %w", err)
}
minuteTS := serverTime.Unix() / 60
return fmt.Sprintf("%s%d:%d", rpmKeyPrefix, accountID, minuteTS), nil
}
// currentMinuteSuffix 获取当前分钟时间戳后缀(供批量操作使用)
// 使用 rdb.Time() 获取 Redis 服务端时间
func (c *RPMCacheImpl) currentMinuteSuffix(ctx context.Context) (string, error) {
serverTime, err := c.rdb.Time(ctx).Result()
if err != nil {
return "", fmt.Errorf("redis TIME: %w", err)
}
minuteTS := serverTime.Unix() / 60
return strconv.FormatInt(minuteTS, 10), nil
}
// IncrementRPM 原子递增并返回当前分钟的计数
// 使用 TxPipeline (MULTI/EXEC) 执行 INCR + EXPIRE保证原子性且兼容 Redis Cluster
func (c *RPMCacheImpl) IncrementRPM(ctx context.Context, accountID int64) (int, error) {
key, err := c.currentMinuteKey(ctx, accountID)
if err != nil {
return 0, fmt.Errorf("rpm increment: %w", err)
}
// 使用 TxPipeline (MULTI/EXEC) 保证 INCR + EXPIRE 原子执行
// EXPIRE 幂等,每次都设置不影响正确性
pipe := c.rdb.TxPipeline()
incrCmd := pipe.Incr(ctx, key)
pipe.Expire(ctx, key, rpmKeyTTL)
if _, err := pipe.Exec(ctx); err != nil {
return 0, fmt.Errorf("rpm increment: %w", err)
}
return int(incrCmd.Val()), nil
}
// GetRPM 获取当前分钟的 RPM 计数
func (c *RPMCacheImpl) GetRPM(ctx context.Context, accountID int64) (int, error) {
key, err := c.currentMinuteKey(ctx, accountID)
if err != nil {
return 0, fmt.Errorf("rpm get: %w", err)
}
val, err := c.rdb.Get(ctx, key).Int()
if errors.Is(err, redis.Nil) {
return 0, nil // 当前分钟无记录
}
if err != nil {
return 0, fmt.Errorf("rpm get: %w", err)
}
return val, nil
}
// GetRPMBatch 批量获取多个账号的 RPM 计数(使用 Pipeline
func (c *RPMCacheImpl) GetRPMBatch(ctx context.Context, accountIDs []int64) (map[int64]int, error) {
if len(accountIDs) == 0 {
return map[int64]int{}, nil
}
// 获取当前分钟后缀
minuteSuffix, err := c.currentMinuteSuffix(ctx)
if err != nil {
return nil, fmt.Errorf("rpm batch get: %w", err)
}
// 使用 Pipeline 批量 GET
pipe := c.rdb.Pipeline()
cmds := make(map[int64]*redis.StringCmd, len(accountIDs))
for _, id := range accountIDs {
key := fmt.Sprintf("%s%d:%s", rpmKeyPrefix, id, minuteSuffix)
cmds[id] = pipe.Get(ctx, key)
}
if _, err := pipe.Exec(ctx); err != nil && !errors.Is(err, redis.Nil) {
return nil, fmt.Errorf("rpm batch get: %w", err)
}
result := make(map[int64]int, len(accountIDs))
for id, cmd := range cmds {
if val, err := cmd.Int(); err == nil {
result[id] = val
} else {
result[id] = 0
}
}
return result, nil
}

View File

@@ -79,6 +79,7 @@ var ProviderSet = wire.NewSet(
NewTimeoutCounterCache,
ProvideConcurrencyCache,
ProvideSessionLimitCache,
NewRPMCache,
NewDashboardCache,
NewEmailCache,
NewIdentityCache,

View File

@@ -624,7 +624,7 @@ func newContractDeps(t *testing.T) *contractDeps {
apiKeyHandler := handler.NewAPIKeyHandler(apiKeyService)
usageHandler := handler.NewUsageHandler(usageService, apiKeyService)
adminSettingHandler := adminhandler.NewSettingHandler(settingService, nil, nil, nil, nil)
adminAccountHandler := adminhandler.NewAccountHandler(adminService, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
adminAccountHandler := adminhandler.NewAccountHandler(adminService, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil)
jwtAuth := func(c *gin.Context) {
c.Set(string(middleware.ContextKeyUser), middleware.AuthSubject{

View File

@@ -1137,6 +1137,80 @@ func (a *Account) GetSessionIdleTimeoutMinutes() int {
return 5
}
// GetBaseRPM 获取基础 RPM 限制
// 返回 0 表示未启用(负数视为无效配置,按 0 处理)
func (a *Account) GetBaseRPM() int {
if a.Extra == nil {
return 0
}
if v, ok := a.Extra["base_rpm"]; ok {
val := parseExtraInt(v)
if val > 0 {
return val
}
}
return 0
}
// GetRPMStrategy 获取 RPM 策略
// "tiered" = 三区模型(默认), "sticky_exempt" = 粘性豁免
func (a *Account) GetRPMStrategy() string {
if a.Extra == nil {
return "tiered"
}
if v, ok := a.Extra["rpm_strategy"]; ok {
if s, ok := v.(string); ok && s == "sticky_exempt" {
return "sticky_exempt"
}
}
return "tiered"
}
// GetRPMStickyBuffer 获取 RPM 粘性缓冲数量
// tiered 模式下的黄区大小,默认为 base_rpm 的 20%(至少 1
func (a *Account) GetRPMStickyBuffer() int {
if a.Extra == nil {
return 0
}
if v, ok := a.Extra["rpm_sticky_buffer"]; ok {
val := parseExtraInt(v)
if val > 0 {
return val
}
}
base := a.GetBaseRPM()
buffer := base / 5
if buffer < 1 && base > 0 {
buffer = 1
}
return buffer
}
// CheckRPMSchedulability 根据当前 RPM 计数检查调度状态
// 复用 WindowCostSchedulability 三态Schedulable / StickyOnly / NotSchedulable
func (a *Account) CheckRPMSchedulability(currentRPM int) WindowCostSchedulability {
baseRPM := a.GetBaseRPM()
if baseRPM <= 0 {
return WindowCostSchedulable
}
if currentRPM < baseRPM {
return WindowCostSchedulable
}
strategy := a.GetRPMStrategy()
if strategy == "sticky_exempt" {
return WindowCostStickyOnly // 粘性豁免无红区
}
// tiered: 黄区 + 红区
buffer := a.GetRPMStickyBuffer()
if currentRPM < baseRPM+buffer {
return WindowCostStickyOnly
}
return WindowCostNotSchedulable
}
// CheckWindowCostSchedulability 根据当前窗口费用检查调度状态
// - 费用 < 阈值: WindowCostSchedulable可正常调度
// - 费用 >= 阈值 且 < 阈值+预留: WindowCostStickyOnly仅粘性会话
@@ -1200,6 +1274,12 @@ func parseExtraFloat64(value any) float64 {
}
// parseExtraInt 从 extra 字段解析 int 值
// ParseExtraInt 从 extra 字段的 any 值解析为 int。
// 支持 int, int64, float64, json.Number, string 类型,无法解析时返回 0。
func ParseExtraInt(value any) int {
return parseExtraInt(value)
}
func parseExtraInt(value any) int {
switch v := value.(type) {
case int:

View File

@@ -0,0 +1,120 @@
package service
import (
"encoding/json"
"testing"
)
func TestGetBaseRPM(t *testing.T) {
tests := []struct {
name string
extra map[string]any
expected int
}{
{"nil extra", nil, 0},
{"no key", map[string]any{}, 0},
{"zero", map[string]any{"base_rpm": 0}, 0},
{"int value", map[string]any{"base_rpm": 15}, 15},
{"float value", map[string]any{"base_rpm": 15.0}, 15},
{"string value", map[string]any{"base_rpm": "15"}, 15},
{"negative value", map[string]any{"base_rpm": -5}, 0},
{"int64 value", map[string]any{"base_rpm": int64(20)}, 20},
{"json.Number value", map[string]any{"base_rpm": json.Number("25")}, 25},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
a := &Account{Extra: tt.extra}
if got := a.GetBaseRPM(); got != tt.expected {
t.Errorf("GetBaseRPM() = %d, want %d", got, tt.expected)
}
})
}
}
func TestGetRPMStrategy(t *testing.T) {
tests := []struct {
name string
extra map[string]any
expected string
}{
{"nil extra", nil, "tiered"},
{"no key", map[string]any{}, "tiered"},
{"tiered", map[string]any{"rpm_strategy": "tiered"}, "tiered"},
{"sticky_exempt", map[string]any{"rpm_strategy": "sticky_exempt"}, "sticky_exempt"},
{"invalid", map[string]any{"rpm_strategy": "foobar"}, "tiered"},
{"empty string fallback", map[string]any{"rpm_strategy": ""}, "tiered"},
{"numeric value fallback", map[string]any{"rpm_strategy": 123}, "tiered"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
a := &Account{Extra: tt.extra}
if got := a.GetRPMStrategy(); got != tt.expected {
t.Errorf("GetRPMStrategy() = %q, want %q", got, tt.expected)
}
})
}
}
func TestCheckRPMSchedulability(t *testing.T) {
tests := []struct {
name string
extra map[string]any
currentRPM int
expected WindowCostSchedulability
}{
{"disabled", map[string]any{}, 100, WindowCostSchedulable},
{"green zone", map[string]any{"base_rpm": 15}, 10, WindowCostSchedulable},
{"yellow zone tiered", map[string]any{"base_rpm": 15}, 15, WindowCostStickyOnly},
{"red zone tiered", map[string]any{"base_rpm": 15}, 18, WindowCostNotSchedulable},
{"sticky_exempt at limit", map[string]any{"base_rpm": 15, "rpm_strategy": "sticky_exempt"}, 15, WindowCostStickyOnly},
{"sticky_exempt over limit", map[string]any{"base_rpm": 15, "rpm_strategy": "sticky_exempt"}, 100, WindowCostStickyOnly},
{"custom buffer", map[string]any{"base_rpm": 10, "rpm_sticky_buffer": 5}, 14, WindowCostStickyOnly},
{"custom buffer red", map[string]any{"base_rpm": 10, "rpm_sticky_buffer": 5}, 15, WindowCostNotSchedulable},
{"base_rpm=1 green", map[string]any{"base_rpm": 1}, 0, WindowCostSchedulable},
{"base_rpm=1 yellow (at limit)", map[string]any{"base_rpm": 1}, 1, WindowCostStickyOnly},
{"base_rpm=1 red (at limit+buffer)", map[string]any{"base_rpm": 1}, 2, WindowCostNotSchedulable},
{"negative currentRPM", map[string]any{"base_rpm": 15}, -1, WindowCostSchedulable},
{"base_rpm negative disabled", map[string]any{"base_rpm": -5}, 10, WindowCostSchedulable},
{"very high currentRPM", map[string]any{"base_rpm": 10}, 9999, WindowCostNotSchedulable},
{"sticky_exempt very high currentRPM", map[string]any{"base_rpm": 10, "rpm_strategy": "sticky_exempt"}, 9999, WindowCostStickyOnly},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
a := &Account{Extra: tt.extra}
if got := a.CheckRPMSchedulability(tt.currentRPM); got != tt.expected {
t.Errorf("CheckRPMSchedulability(%d) = %d, want %d", tt.currentRPM, got, tt.expected)
}
})
}
}
func TestGetRPMStickyBuffer(t *testing.T) {
tests := []struct {
name string
extra map[string]any
expected int
}{
{"nil extra", nil, 0},
{"no keys", map[string]any{}, 0},
{"base_rpm=0", map[string]any{"base_rpm": 0}, 0},
{"base_rpm=1 min buffer 1", map[string]any{"base_rpm": 1}, 1},
{"base_rpm=4 min buffer 1", map[string]any{"base_rpm": 4}, 1},
{"base_rpm=5 buffer 1", map[string]any{"base_rpm": 5}, 1},
{"base_rpm=10 buffer 2", map[string]any{"base_rpm": 10}, 2},
{"base_rpm=15 buffer 3", map[string]any{"base_rpm": 15}, 3},
{"base_rpm=100 buffer 20", map[string]any{"base_rpm": 100}, 20},
{"custom buffer=5", map[string]any{"base_rpm": 10, "rpm_sticky_buffer": 5}, 5},
{"custom buffer=0 fallback to default", map[string]any{"base_rpm": 10, "rpm_sticky_buffer": 0}, 2},
{"custom buffer negative fallback", map[string]any{"base_rpm": 10, "rpm_sticky_buffer": -1}, 2},
{"custom buffer with float", map[string]any{"base_rpm": 10, "rpm_sticky_buffer": float64(7)}, 7},
{"json.Number base_rpm", map[string]any{"base_rpm": json.Number("10")}, 2},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
a := &Account{Extra: tt.extra}
if got := a.GetRPMStickyBuffer(); got != tt.expected {
t.Errorf("GetRPMStickyBuffer() = %d, want %d", got, tt.expected)
}
})
}
}

View File

@@ -520,6 +520,7 @@ type GatewayService struct {
concurrencyService *ConcurrencyService
claudeTokenProvider *ClaudeTokenProvider
sessionLimitCache SessionLimitCache // 会话数量限制缓存(仅 Anthropic OAuth/SetupToken
rpmCache RPMCache // RPM 计数缓存(仅 Anthropic OAuth/SetupToken
userGroupRateCache *gocache.Cache
userGroupRateSF singleflight.Group
modelsListCache *gocache.Cache
@@ -549,6 +550,7 @@ func NewGatewayService(
deferredService *DeferredService,
claudeTokenProvider *ClaudeTokenProvider,
sessionLimitCache SessionLimitCache,
rpmCache RPMCache,
digestStore *DigestSessionStore,
) *GatewayService {
userGroupRateTTL := resolveUserGroupRateCacheTTL(cfg)
@@ -574,6 +576,7 @@ func NewGatewayService(
deferredService: deferredService,
claudeTokenProvider: claudeTokenProvider,
sessionLimitCache: sessionLimitCache,
rpmCache: rpmCache,
userGroupRateCache: gocache.New(userGroupRateTTL, time.Minute),
modelsListCache: gocache.New(modelsListTTL, time.Minute),
modelsListCacheTTL: modelsListTTL,
@@ -1154,6 +1157,7 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
return nil, errors.New("no available accounts")
}
ctx = s.withWindowCostPrefetch(ctx, accounts)
ctx = s.withRPMPrefetch(ctx, accounts)
isExcluded := func(accountID int64) bool {
if excludedIDs == nil {
@@ -1229,6 +1233,10 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
filteredWindowCost++
continue
}
// RPM 检查(非粘性会话路径)
if !s.isAccountSchedulableForRPM(ctx, account, false) {
continue
}
routingCandidates = append(routingCandidates, account)
}
@@ -1252,7 +1260,9 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
s.isAccountAllowedForPlatform(stickyAccount, platform, useMixed) &&
(requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, stickyAccount, requestedModel)) &&
s.isAccountSchedulableForModelSelection(ctx, stickyAccount, requestedModel) &&
s.isAccountSchedulableForWindowCost(ctx, stickyAccount, true) { // 粘性会话窗口费用检查
s.isAccountSchedulableForWindowCost(ctx, stickyAccount, true) &&
s.isAccountSchedulableForRPM(ctx, stickyAccount, true) { // 粘性会话窗口费用+RPM 检查
result, err := s.tryAcquireAccountSlot(ctx, stickyAccountID, stickyAccount.Concurrency)
if err == nil && result.Acquired {
// 会话数量限制检查
@@ -1406,7 +1416,9 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
s.isAccountAllowedForPlatform(account, platform, useMixed) &&
(requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) &&
s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) &&
s.isAccountSchedulableForWindowCost(ctx, account, true) { // 粘性会话窗口费用检查
s.isAccountSchedulableForWindowCost(ctx, account, true) &&
s.isAccountSchedulableForRPM(ctx, account, true) { // 粘性会话窗口费用+RPM 检查
result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
if err == nil && result.Acquired {
// 会话数量限制检查
@@ -1472,6 +1484,10 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
if !s.isAccountSchedulableForWindowCost(ctx, acc, false) {
continue
}
// RPM 检查(非粘性会话路径)
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
continue
}
candidates = append(candidates, acc)
}
@@ -2155,6 +2171,88 @@ checkSchedulability:
return true
}
// rpmPrefetchContextKey is the context key for prefetched RPM counts.
type rpmPrefetchContextKeyType struct{}
var rpmPrefetchContextKey = rpmPrefetchContextKeyType{}
func rpmFromPrefetchContext(ctx context.Context, accountID int64) (int, bool) {
if v, ok := ctx.Value(rpmPrefetchContextKey).(map[int64]int); ok {
count, found := v[accountID]
return count, found
}
return 0, false
}
// withRPMPrefetch 批量预取所有候选账号的 RPM 计数
func (s *GatewayService) withRPMPrefetch(ctx context.Context, accounts []Account) context.Context {
if s.rpmCache == nil {
return ctx
}
var ids []int64
for i := range accounts {
if accounts[i].IsAnthropicOAuthOrSetupToken() && accounts[i].GetBaseRPM() > 0 {
ids = append(ids, accounts[i].ID)
}
}
if len(ids) == 0 {
return ctx
}
counts, err := s.rpmCache.GetRPMBatch(ctx, ids)
if err != nil {
return ctx // 失败开放
}
return context.WithValue(ctx, rpmPrefetchContextKey, counts)
}
// isAccountSchedulableForRPM 检查账号是否可根据 RPM 进行调度
// 仅适用于 Anthropic OAuth/SetupToken 账号
func (s *GatewayService) isAccountSchedulableForRPM(ctx context.Context, account *Account, isSticky bool) bool {
if !account.IsAnthropicOAuthOrSetupToken() {
return true
}
baseRPM := account.GetBaseRPM()
if baseRPM <= 0 {
return true
}
// 尝试从预取缓存获取
var currentRPM int
if count, ok := rpmFromPrefetchContext(ctx, account.ID); ok {
currentRPM = count
} else if s.rpmCache != nil {
if count, err := s.rpmCache.GetRPM(ctx, account.ID); err == nil {
currentRPM = count
}
// 失败开放GetRPM 错误时允许调度
}
schedulability := account.CheckRPMSchedulability(currentRPM)
switch schedulability {
case WindowCostSchedulable:
return true
case WindowCostStickyOnly:
return isSticky
case WindowCostNotSchedulable:
return false
}
return true
}
// IncrementAccountRPM increments the RPM counter for the given account.
// 已知 TOCTOU 竞态:调度时读取 RPM 计数与此处递增之间存在时间窗口,
// 高并发下可能短暂超出 RPM 限制。这是与 WindowCost 一致的 soft-limit
// 设计权衡——可接受的少量超额优于加锁带来的延迟和复杂度。
func (s *GatewayService) IncrementAccountRPM(ctx context.Context, accountID int64) error {
if s.rpmCache == nil {
return nil
}
_, err := s.rpmCache.IncrementRPM(ctx, accountID)
return err
}
// checkAndRegisterSession 检查并注册会话,用于会话数量限制
// 仅适用于 Anthropic OAuth/SetupToken 账号
// sessionID: 会话标识符(使用粘性会话的 hash
@@ -2349,7 +2447,7 @@ func sameAccountWithLoadGroup(a, b accountWithLoad) bool {
// shuffleWithinPriorityAndLastUsed 对排序后的 []*Account 切片,按 (Priority, LastUsedAt) 分组后组内随机打乱。
//
// 注意:当 preferOAuth=true 时,需要保证 OAuth 账号在同组内仍然优先,否则会把排序时的偏好打散掉。
// 因此这里采用组内分区 + 分区内 shuffle的方式:
// 因此这里采用"组内分区 + 分区内 shuffle"的方式:
// - 先把同组账号按 (OAuth / 非 OAuth) 拆成两段,保持 OAuth 段在前;
// - 再分别在各段内随机打散,避免热点。
func shuffleWithinPriorityAndLastUsed(accounts []*Account, preferOAuth bool) {
@@ -2489,7 +2587,7 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
if clearSticky {
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
}
if !clearSticky && s.isAccountInGroup(account, groupID) && account.Platform == platform && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) {
if !clearSticky && s.isAccountInGroup(account, groupID) && account.Platform == platform && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) && s.isAccountSchedulableForWindowCost(ctx, account, true) && s.isAccountSchedulableForRPM(ctx, account, true) {
if s.debugModelRoutingEnabled() {
logger.LegacyPrintf("service.gateway", "[ModelRoutingDebug] legacy routed sticky hit: group_id=%v model=%s session=%s account=%d", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), accountID)
}
@@ -2512,6 +2610,10 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
}
accountsLoaded = true
// 提前预取窗口费用+RPM 计数,确保 routing 段内的调度检查调用能命中缓存
ctx = s.withWindowCostPrefetch(ctx, accounts)
ctx = s.withRPMPrefetch(ctx, accounts)
routingSet := make(map[int64]struct{}, len(routingAccountIDs))
for _, id := range routingAccountIDs {
if id > 0 {
@@ -2539,6 +2641,12 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
continue
}
if !s.isAccountSchedulableForWindowCost(ctx, acc, false) {
continue
}
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
continue
}
if selected == nil {
selected = acc
continue
@@ -2589,7 +2697,7 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
if clearSticky {
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
}
if !clearSticky && s.isAccountInGroup(account, groupID) && account.Platform == platform && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) {
if !clearSticky && s.isAccountInGroup(account, groupID) && account.Platform == platform && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) && s.isAccountSchedulableForWindowCost(ctx, account, true) && s.isAccountSchedulableForRPM(ctx, account, true) {
return account, nil
}
}
@@ -2610,6 +2718,10 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
}
}
// 批量预取窗口费用+RPM 计数避免逐个账号查询N+1
ctx = s.withWindowCostPrefetch(ctx, accounts)
ctx = s.withRPMPrefetch(ctx, accounts)
// 3. 按优先级+最久未用选择(考虑模型支持)
var selected *Account
for i := range accounts {
@@ -2628,6 +2740,12 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
continue
}
if !s.isAccountSchedulableForWindowCost(ctx, acc, false) {
continue
}
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
continue
}
if selected == nil {
selected = acc
continue
@@ -2697,7 +2815,7 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
if clearSticky {
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
}
if !clearSticky && s.isAccountInGroup(account, groupID) && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) {
if !clearSticky && s.isAccountInGroup(account, groupID) && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) && s.isAccountSchedulableForWindowCost(ctx, account, true) && s.isAccountSchedulableForRPM(ctx, account, true) {
if account.Platform == nativePlatform || (account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled()) {
if s.debugModelRoutingEnabled() {
logger.LegacyPrintf("service.gateway", "[ModelRoutingDebug] legacy mixed routed sticky hit: group_id=%v model=%s session=%s account=%d", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), accountID)
@@ -2718,6 +2836,10 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
}
accountsLoaded = true
// 提前预取窗口费用+RPM 计数,确保 routing 段内的调度检查调用能命中缓存
ctx = s.withWindowCostPrefetch(ctx, accounts)
ctx = s.withRPMPrefetch(ctx, accounts)
routingSet := make(map[int64]struct{}, len(routingAccountIDs))
for _, id := range routingAccountIDs {
if id > 0 {
@@ -2749,6 +2871,12 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
continue
}
if !s.isAccountSchedulableForWindowCost(ctx, acc, false) {
continue
}
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
continue
}
if selected == nil {
selected = acc
continue
@@ -2799,7 +2927,7 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
if clearSticky {
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
}
if !clearSticky && s.isAccountInGroup(account, groupID) && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) {
if !clearSticky && s.isAccountInGroup(account, groupID) && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) && s.isAccountSchedulableForWindowCost(ctx, account, true) && s.isAccountSchedulableForRPM(ctx, account, true) {
if account.Platform == nativePlatform || (account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled()) {
return account, nil
}
@@ -2818,6 +2946,10 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
}
}
// 批量预取窗口费用+RPM 计数避免逐个账号查询N+1
ctx = s.withWindowCostPrefetch(ctx, accounts)
ctx = s.withRPMPrefetch(ctx, accounts)
// 3. 按优先级+最久未用选择(考虑模型支持和混合调度)
var selected *Account
for i := range accounts {
@@ -2840,6 +2972,12 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
continue
}
if !s.isAccountSchedulableForWindowCost(ctx, acc, false) {
continue
}
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
continue
}
if selected == nil {
selected = acc
continue
@@ -5185,7 +5323,7 @@ func (s *GatewayService) isThinkingBlockSignatureError(respBody []byte) bool {
}
func (s *GatewayService) shouldFailoverOn400(respBody []byte) bool {
// 只对可能是兼容性差异导致的 400 允许切换,避免无意义重试。
// 只对"可能是兼容性差异导致"的 400 允许切换,避免无意义重试。
// 默认保守:无法识别则不切换。
msg := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
if msg == "" {

View File

@@ -0,0 +1,17 @@
package service
import "context"
// RPMCache RPM 计数器缓存接口
// 用于 Anthropic OAuth/SetupToken 账号的每分钟请求数限制
type RPMCache interface {
// IncrementRPM 原子递增并返回当前分钟的计数
// 使用 Redis 服务器时间确定 minute key避免多实例时钟偏差
IncrementRPM(ctx context.Context, accountID int64) (count int, err error)
// GetRPM 获取当前分钟的 RPM 计数
GetRPM(ctx context.Context, accountID int64) (count int, err error)
// GetRPMBatch 批量获取多个账号的 RPM 计数(使用 Pipeline
GetRPMBatch(ctx context.Context, accountIDs []int64) (map[int64]int, error)
}

View File

@@ -52,6 +52,25 @@
<span class="font-mono">{{ account.max_sessions }}</span>
</span>
</div>
<!-- RPM 限制 Anthropic OAuth/SetupToken 且启用时显示 -->
<div v-if="showRpmLimit" class="flex items-center gap-1">
<span
:class="[
'inline-flex items-center gap-1 rounded-md px-1.5 py-0.5 text-[10px] font-medium',
rpmClass
]"
:title="rpmTooltip"
>
<svg class="h-2.5 w-2.5" fill="none" viewBox="0 0 24 24" stroke-width="1.5" stroke="currentColor">
<path stroke-linecap="round" stroke-linejoin="round" d="M12 6v6h4.5m4.5 0a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />
</svg>
<span class="font-mono">{{ currentRPM }}</span>
<span class="text-gray-400 dark:text-gray-500">/</span>
<span class="font-mono">{{ account.base_rpm }}</span>
<span class="text-[9px] opacity-60">{{ rpmStrategyTag }}</span>
</span>
</div>
</div>
</template>
@@ -125,19 +144,15 @@ const windowCostClass = computed(() => {
const limit = props.account.window_cost_limit || 0
const reserve = props.account.window_cost_sticky_reserve || 10
// >= 阈值+预留: 完全不可调度 (红色)
if (current >= limit + reserve) {
return 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400'
}
// >= 阈值: 仅粘性会话 (橙色)
if (current >= limit) {
return 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400'
}
// >= 80% 阈值: 警告 (黄色)
if (current >= limit * 0.8) {
return 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400'
}
// 正常 (绿色)
return 'bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-400'
})
@@ -165,15 +180,12 @@ const sessionLimitClass = computed(() => {
const current = activeSessions.value
const max = props.account.max_sessions || 0
// >= 最大: 完全占满 (红色)
if (current >= max) {
return 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400'
}
// >= 80%: 警告 (黄色)
if (current >= max * 0.8) {
return 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400'
}
// 正常 (绿色)
return 'bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-400'
})
@@ -191,6 +203,89 @@ const sessionLimitTooltip = computed(() => {
return t('admin.accounts.capacity.sessions.normal', { idle })
})
// 是否显示 RPM 限制
const showRpmLimit = computed(() => {
return (
isAnthropicOAuthOrSetupToken.value &&
props.account.base_rpm !== undefined &&
props.account.base_rpm !== null &&
props.account.base_rpm > 0
)
})
// 当前 RPM 计数
const currentRPM = computed(() => props.account.current_rpm ?? 0)
// RPM 策略
const rpmStrategy = computed(() => props.account.rpm_strategy || 'tiered')
// RPM 策略标签
const rpmStrategyTag = computed(() => {
return rpmStrategy.value === 'sticky_exempt' ? '[S]' : '[T]'
})
// RPM buffer 计算与后端一致base <= 0 时 buffer 为 0
const rpmBuffer = computed(() => {
const base = props.account.base_rpm || 0
return props.account.rpm_sticky_buffer ?? (base > 0 ? Math.max(1, Math.floor(base / 5)) : 0)
})
// RPM 状态样式
const rpmClass = computed(() => {
if (!showRpmLimit.value) return ''
const current = currentRPM.value
const base = props.account.base_rpm ?? 0
const buffer = rpmBuffer.value
if (rpmStrategy.value === 'tiered') {
if (current >= base + buffer) {
return 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400'
}
if (current >= base) {
return 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400'
}
} else {
if (current >= base) {
return 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400'
}
}
if (current >= base * 0.8) {
return 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400'
}
return 'bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-400'
})
// RPM 提示文字(增强版:显示策略、区域、缓冲区)
const rpmTooltip = computed(() => {
if (!showRpmLimit.value) return ''
const current = currentRPM.value
const base = props.account.base_rpm ?? 0
const buffer = rpmBuffer.value
if (rpmStrategy.value === 'tiered') {
if (current >= base + buffer) {
return t('admin.accounts.capacity.rpm.tieredBlocked', { buffer })
}
if (current >= base) {
return t('admin.accounts.capacity.rpm.tieredStickyOnly', { buffer })
}
if (current >= base * 0.8) {
return t('admin.accounts.capacity.rpm.tieredWarning')
}
return t('admin.accounts.capacity.rpm.tieredNormal')
} else {
if (current >= base) {
return t('admin.accounts.capacity.rpm.stickyExemptOver')
}
if (current >= base * 0.8) {
return t('admin.accounts.capacity.rpm.stickyExemptWarning')
}
return t('admin.accounts.capacity.rpm.stickyExemptNormal')
}
})
// 格式化费用显示
const formatCost = (value: number | null | undefined) => {
if (value === null || value === undefined) return '0'

View File

@@ -585,6 +585,111 @@
</div>
</div>
<!-- RPM Limit (仅全部为 Anthropic OAuth/SetupToken 时显示) -->
<div v-if="allAnthropicOAuthOrSetupToken" class="border-t border-gray-200 pt-4 dark:border-dark-600">
<div class="mb-3 flex items-center justify-between">
<label
id="bulk-edit-rpm-limit-label"
class="input-label mb-0"
for="bulk-edit-rpm-limit-enabled"
>
{{ t('admin.accounts.quotaControl.rpmLimit.label') }}
</label>
<input
v-model="enableRpmLimit"
id="bulk-edit-rpm-limit-enabled"
type="checkbox"
aria-controls="bulk-edit-rpm-limit-body"
class="rounded border-gray-300 text-primary-600 focus:ring-primary-500"
/>
</div>
<div
id="bulk-edit-rpm-limit-body"
:class="!enableRpmLimit && 'pointer-events-none opacity-50'"
role="group"
aria-labelledby="bulk-edit-rpm-limit-label"
>
<div class="mb-3 flex items-center justify-between">
<span class="text-sm text-gray-700 dark:text-gray-300">{{ t('admin.accounts.quotaControl.rpmLimit.hint') }}</span>
<button
type="button"
@click="rpmLimitEnabled = !rpmLimitEnabled"
:class="[
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
rpmLimitEnabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
]"
>
<span
:class="[
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
rpmLimitEnabled ? 'translate-x-5' : 'translate-x-0'
]"
/>
</button>
</div>
<div v-if="rpmLimitEnabled" class="space-y-3">
<div>
<label class="input-label text-xs">{{ t('admin.accounts.quotaControl.rpmLimit.baseRpm') }}</label>
<input
v-model.number="bulkBaseRpm"
type="number"
min="1"
max="1000"
step="1"
class="input"
:placeholder="t('admin.accounts.quotaControl.rpmLimit.baseRpmPlaceholder')"
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.baseRpmHint') }}</p>
</div>
<div>
<label class="input-label text-xs">{{ t('admin.accounts.quotaControl.rpmLimit.strategy') }}</label>
<div class="flex gap-2">
<button
type="button"
@click="bulkRpmStrategy = 'tiered'"
:class="[
'flex-1 rounded-lg px-3 py-2 text-sm font-medium transition-all',
bulkRpmStrategy === 'tiered'
? 'bg-primary-100 text-primary-700 dark:bg-primary-900/30 dark:text-primary-400'
: 'bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-400 dark:hover:bg-dark-500'
]"
>
{{ t('admin.accounts.quotaControl.rpmLimit.strategyTiered') }}
</button>
<button
type="button"
@click="bulkRpmStrategy = 'sticky_exempt'"
:class="[
'flex-1 rounded-lg px-3 py-2 text-sm font-medium transition-all',
bulkRpmStrategy === 'sticky_exempt'
? 'bg-primary-100 text-primary-700 dark:bg-primary-900/30 dark:text-primary-400'
: 'bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-400 dark:hover:bg-dark-500'
]"
>
{{ t('admin.accounts.quotaControl.rpmLimit.strategyStickyExempt') }}
</button>
</div>
</div>
<div v-if="bulkRpmStrategy === 'tiered'">
<label class="input-label text-xs">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBuffer') }}</label>
<input
v-model.number="bulkRpmStickyBuffer"
type="number"
min="1"
step="1"
class="input"
:placeholder="t('admin.accounts.quotaControl.rpmLimit.stickyBufferPlaceholder')"
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBufferHint') }}</p>
</div>
</div>
</div>
</div>
<!-- Groups -->
<div class="border-t border-gray-200 pt-4 dark:border-dark-600">
<div class="mb-3 flex items-center justify-between">
@@ -658,7 +763,7 @@ import { ref, watch, computed } from 'vue'
import { useI18n } from 'vue-i18n'
import { useAppStore } from '@/stores/app'
import { adminAPI } from '@/api/admin'
import type { Proxy as ProxyConfig, AdminGroup, AccountPlatform } from '@/types'
import type { Proxy as ProxyConfig, AdminGroup, AccountPlatform, AccountType } from '@/types'
import BaseDialog from '@/components/common/BaseDialog.vue'
import Select from '@/components/common/Select.vue'
import ProxySelector from '@/components/common/ProxySelector.vue'
@@ -670,6 +775,7 @@ interface Props {
show: boolean
accountIds: number[]
selectedPlatforms: AccountPlatform[]
selectedTypes: AccountType[]
proxies: ProxyConfig[]
groups: AdminGroup[]
}
@@ -686,6 +792,15 @@ const appStore = useAppStore()
// Platform awareness
const isMixedPlatform = computed(() => props.selectedPlatforms.length > 1)
// 是否全部为 Anthropic OAuth/SetupTokenRPM 配置仅在此条件下显示)
const allAnthropicOAuthOrSetupToken = computed(() => {
return (
props.selectedPlatforms.length === 1 &&
props.selectedPlatforms[0] === 'anthropic' &&
props.selectedTypes.every(t => t === 'oauth' || t === 'setup-token')
)
})
const platformModelPrefix: Record<string, string[]> = {
anthropic: ['claude-'],
antigravity: ['claude-', 'gemini-', 'gpt-oss-', 'tab_'],
@@ -725,6 +840,7 @@ const enablePriority = ref(false)
const enableRateMultiplier = ref(false)
const enableStatus = ref(false)
const enableGroups = ref(false)
const enableRpmLimit = ref(false)
// State - field values
const submitting = ref(false)
@@ -741,6 +857,10 @@ const priority = ref(1)
const rateMultiplier = ref(1)
const status = ref<'active' | 'inactive'>('active')
const groupIds = ref<number[]>([])
const rpmLimitEnabled = ref(false)
const bulkBaseRpm = ref<number | null>(null)
const bulkRpmStrategy = ref<'tiered' | 'sticky_exempt'>('tiered')
const bulkRpmStickyBuffer = ref<number | null>(null)
// All models list (combined Anthropic + OpenAI + Gemini)
const allModels = [
@@ -1094,6 +1214,26 @@ const buildUpdatePayload = (): Record<string, unknown> | null => {
updates.credentials = credentials
}
// RPM limit settings (写入 extra 字段)
if (enableRpmLimit.value) {
const extra: Record<string, unknown> = {}
if (rpmLimitEnabled.value && bulkBaseRpm.value != null && bulkBaseRpm.value > 0) {
extra.base_rpm = bulkBaseRpm.value
extra.rpm_strategy = bulkRpmStrategy.value
if (bulkRpmStickyBuffer.value != null && bulkRpmStickyBuffer.value > 0) {
extra.rpm_sticky_buffer = bulkRpmStickyBuffer.value
}
} else {
// 关闭 RPM 限制 - 设置 base_rpm 为 0并用空值覆盖关联字段
// 后端使用 JSONB || merge 语义,不会删除已有 key
// 所以必须显式发送空值来重置(后端读取时会 fallback 到默认值)
extra.base_rpm = 0
extra.rpm_strategy = ''
extra.rpm_sticky_buffer = 0
}
updates.extra = extra
}
return Object.keys(updates).length > 0 ? updates : null
}
@@ -1117,7 +1257,8 @@ const handleSubmit = async () => {
enablePriority.value ||
enableRateMultiplier.value ||
enableStatus.value ||
enableGroups.value
enableGroups.value ||
enableRpmLimit.value
if (!hasAnyFieldEnabled) {
appStore.showError(t('admin.accounts.bulkEdit.noFieldsSelected'))
@@ -1173,6 +1314,7 @@ watch(
enableRateMultiplier.value = false
enableStatus.value = false
enableGroups.value = false
enableRpmLimit.value = false
// Reset all values
baseUrl.value = ''
@@ -1188,6 +1330,10 @@ watch(
rateMultiplier.value = 1
status.value = 'active'
groupIds.value = []
rpmLimitEnabled.value = false
bulkBaseRpm.value = null
bulkRpmStrategy.value = 'tiered'
bulkRpmStickyBuffer.value = null
}
}
)

View File

@@ -1536,6 +1536,98 @@
</div>
</div>
<!-- RPM Limit -->
<div class="rounded-lg border border-gray-200 p-4 dark:border-dark-600">
<div class="mb-3 flex items-center justify-between">
<div>
<label class="input-label mb-0">{{ t('admin.accounts.quotaControl.rpmLimit.label') }}</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.accounts.quotaControl.rpmLimit.hint') }}
</p>
</div>
<button
type="button"
@click="rpmLimitEnabled = !rpmLimitEnabled"
:class="[
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
rpmLimitEnabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
]"
>
<span
:class="[
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
rpmLimitEnabled ? 'translate-x-5' : 'translate-x-0'
]"
/>
</button>
</div>
<div v-if="rpmLimitEnabled" class="space-y-4">
<div>
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.baseRpm') }}</label>
<input
v-model.number="baseRpm"
type="number"
min="1"
max="1000"
step="1"
class="input"
:placeholder="t('admin.accounts.quotaControl.rpmLimit.baseRpmPlaceholder')"
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.baseRpmHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.strategy') }}</label>
<div class="flex gap-2">
<button
type="button"
@click="rpmStrategy = 'tiered'"
:class="[
'flex-1 rounded-lg px-3 py-2 text-sm font-medium transition-all',
rpmStrategy === 'tiered'
? 'bg-primary-100 text-primary-700 dark:bg-primary-900/30 dark:text-primary-400'
: 'bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-400 dark:hover:bg-dark-500'
]"
>
<div class="text-center">
<div>{{ t('admin.accounts.quotaControl.rpmLimit.strategyTiered') }}</div>
<div class="mt-0.5 text-[10px] opacity-70">{{ t('admin.accounts.quotaControl.rpmLimit.strategyTieredHint') }}</div>
</div>
</button>
<button
type="button"
@click="rpmStrategy = 'sticky_exempt'"
:class="[
'flex-1 rounded-lg px-3 py-2 text-sm font-medium transition-all',
rpmStrategy === 'sticky_exempt'
? 'bg-primary-100 text-primary-700 dark:bg-primary-900/30 dark:text-primary-400'
: 'bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-400 dark:hover:bg-dark-500'
]"
>
<div class="text-center">
<div>{{ t('admin.accounts.quotaControl.rpmLimit.strategyStickyExempt') }}</div>
<div class="mt-0.5 text-[10px] opacity-70">{{ t('admin.accounts.quotaControl.rpmLimit.strategyStickyExemptHint') }}</div>
</div>
</button>
</div>
</div>
<div v-if="rpmStrategy === 'tiered'">
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBuffer') }}</label>
<input
v-model.number="rpmStickyBuffer"
type="number"
min="1"
step="1"
class="input"
:placeholder="t('admin.accounts.quotaControl.rpmLimit.stickyBufferPlaceholder')"
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBufferHint') }}</p>
</div>
</div>
</div>
<!-- TLS Fingerprint -->
<div class="rounded-lg border border-gray-200 p-4 dark:border-dark-600">
<div class="flex items-center justify-between">
@@ -2393,6 +2485,10 @@ const windowCostStickyReserve = ref<number | null>(null)
const sessionLimitEnabled = ref(false)
const maxSessions = ref<number | null>(null)
const sessionIdleTimeout = ref<number | null>(null)
const rpmLimitEnabled = ref(false)
const baseRpm = ref<number | null>(null)
const rpmStrategy = ref<'tiered' | 'sticky_exempt'>('tiered')
const rpmStickyBuffer = ref<number | null>(null)
const tlsFingerprintEnabled = ref(false)
const sessionIdMaskingEnabled = ref(false)
const cacheTTLOverrideEnabled = ref(false)
@@ -3017,6 +3113,10 @@ const resetForm = () => {
sessionLimitEnabled.value = false
maxSessions.value = null
sessionIdleTimeout.value = null
rpmLimitEnabled.value = false
baseRpm.value = null
rpmStrategy.value = 'tiered'
rpmStickyBuffer.value = null
tlsFingerprintEnabled.value = false
sessionIdMaskingEnabled.value = false
cacheTTLOverrideEnabled.value = false
@@ -3926,6 +4026,15 @@ const handleAnthropicExchange = async (authCode: string) => {
extra.session_idle_timeout_minutes = sessionIdleTimeout.value ?? 5
}
// Add RPM limit settings
if (rpmLimitEnabled.value && baseRpm.value != null && baseRpm.value > 0) {
extra.base_rpm = baseRpm.value
extra.rpm_strategy = rpmStrategy.value
if (rpmStickyBuffer.value != null && rpmStickyBuffer.value > 0) {
extra.rpm_sticky_buffer = rpmStickyBuffer.value
}
}
// Add TLS fingerprint settings
if (tlsFingerprintEnabled.value) {
extra.enable_tls_fingerprint = true
@@ -4024,6 +4133,15 @@ const handleCookieAuth = async (sessionKey: string) => {
extra.session_idle_timeout_minutes = sessionIdleTimeout.value ?? 5
}
// Add RPM limit settings
if (rpmLimitEnabled.value && baseRpm.value != null && baseRpm.value > 0) {
extra.base_rpm = baseRpm.value
extra.rpm_strategy = rpmStrategy.value
if (rpmStickyBuffer.value != null && rpmStickyBuffer.value > 0) {
extra.rpm_sticky_buffer = rpmStickyBuffer.value
}
}
// Add TLS fingerprint settings
if (tlsFingerprintEnabled.value) {
extra.enable_tls_fingerprint = true

View File

@@ -946,6 +946,98 @@
</div>
</div>
<!-- RPM Limit -->
<div class="rounded-lg border border-gray-200 p-4 dark:border-dark-600">
<div class="mb-3 flex items-center justify-between">
<div>
<label class="input-label mb-0">{{ t('admin.accounts.quotaControl.rpmLimit.label') }}</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.accounts.quotaControl.rpmLimit.hint') }}
</p>
</div>
<button
type="button"
@click="rpmLimitEnabled = !rpmLimitEnabled"
:class="[
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
rpmLimitEnabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
]"
>
<span
:class="[
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
rpmLimitEnabled ? 'translate-x-5' : 'translate-x-0'
]"
/>
</button>
</div>
<div v-if="rpmLimitEnabled" class="space-y-4">
<div>
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.baseRpm') }}</label>
<input
v-model.number="baseRpm"
type="number"
min="1"
max="1000"
step="1"
class="input"
:placeholder="t('admin.accounts.quotaControl.rpmLimit.baseRpmPlaceholder')"
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.baseRpmHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.strategy') }}</label>
<div class="flex gap-2">
<button
type="button"
@click="rpmStrategy = 'tiered'"
:class="[
'flex-1 rounded-lg px-3 py-2 text-sm font-medium transition-all',
rpmStrategy === 'tiered'
? 'bg-primary-100 text-primary-700 dark:bg-primary-900/30 dark:text-primary-400'
: 'bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-400 dark:hover:bg-dark-500'
]"
>
<div class="text-center">
<div>{{ t('admin.accounts.quotaControl.rpmLimit.strategyTiered') }}</div>
<div class="mt-0.5 text-[10px] opacity-70">{{ t('admin.accounts.quotaControl.rpmLimit.strategyTieredHint') }}</div>
</div>
</button>
<button
type="button"
@click="rpmStrategy = 'sticky_exempt'"
:class="[
'flex-1 rounded-lg px-3 py-2 text-sm font-medium transition-all',
rpmStrategy === 'sticky_exempt'
? 'bg-primary-100 text-primary-700 dark:bg-primary-900/30 dark:text-primary-400'
: 'bg-gray-100 text-gray-600 hover:bg-gray-200 dark:bg-dark-600 dark:text-gray-400 dark:hover:bg-dark-500'
]"
>
<div class="text-center">
<div>{{ t('admin.accounts.quotaControl.rpmLimit.strategyStickyExempt') }}</div>
<div class="mt-0.5 text-[10px] opacity-70">{{ t('admin.accounts.quotaControl.rpmLimit.strategyStickyExemptHint') }}</div>
</div>
</button>
</div>
</div>
<div v-if="rpmStrategy === 'tiered'">
<label class="input-label">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBuffer') }}</label>
<input
v-model.number="rpmStickyBuffer"
type="number"
min="1"
step="1"
class="input"
:placeholder="t('admin.accounts.quotaControl.rpmLimit.stickyBufferPlaceholder')"
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.rpmLimit.stickyBufferHint') }}</p>
</div>
</div>
</div>
<!-- TLS Fingerprint -->
<div class="rounded-lg border border-gray-200 p-4 dark:border-dark-600">
<div class="flex items-center justify-between">
@@ -1251,6 +1343,10 @@ const windowCostStickyReserve = ref<number | null>(null)
const sessionLimitEnabled = ref(false)
const maxSessions = ref<number | null>(null)
const sessionIdleTimeout = ref<number | null>(null)
const rpmLimitEnabled = ref(false)
const baseRpm = ref<number | null>(null)
const rpmStrategy = ref<'tiered' | 'sticky_exempt'>('tiered')
const rpmStickyBuffer = ref<number | null>(null)
const tlsFingerprintEnabled = ref(false)
const sessionIdMaskingEnabled = ref(false)
const cacheTTLOverrideEnabled = ref(false)
@@ -1710,6 +1806,10 @@ function loadQuotaControlSettings(account: Account) {
sessionLimitEnabled.value = false
maxSessions.value = null
sessionIdleTimeout.value = null
rpmLimitEnabled.value = false
baseRpm.value = null
rpmStrategy.value = 'tiered'
rpmStickyBuffer.value = null
tlsFingerprintEnabled.value = false
sessionIdMaskingEnabled.value = false
cacheTTLOverrideEnabled.value = false
@@ -1733,6 +1833,14 @@ function loadQuotaControlSettings(account: Account) {
sessionIdleTimeout.value = account.session_idle_timeout_minutes ?? 5
}
// RPM limit
if (account.base_rpm != null && account.base_rpm > 0) {
rpmLimitEnabled.value = true
baseRpm.value = account.base_rpm
rpmStrategy.value = (account.rpm_strategy as 'tiered' | 'sticky_exempt') || 'tiered'
rpmStickyBuffer.value = account.rpm_sticky_buffer ?? null
}
// Load TLS fingerprint setting
if (account.enable_tls_fingerprint === true) {
tlsFingerprintEnabled.value = true
@@ -2043,6 +2151,21 @@ const handleSubmit = async () => {
delete newExtra.session_idle_timeout_minutes
}
// RPM limit settings
if (rpmLimitEnabled.value && baseRpm.value != null && baseRpm.value > 0) {
newExtra.base_rpm = baseRpm.value
newExtra.rpm_strategy = rpmStrategy.value
if (rpmStickyBuffer.value != null && rpmStickyBuffer.value > 0) {
newExtra.rpm_sticky_buffer = rpmStickyBuffer.value
} else {
delete newExtra.rpm_sticky_buffer
}
} else {
delete newExtra.base_rpm
delete newExtra.rpm_strategy
delete newExtra.rpm_sticky_buffer
}
// TLS fingerprint setting
if (tlsFingerprintEnabled.value) {
newExtra.enable_tls_fingerprint = true

View File

@@ -1616,7 +1616,19 @@ export default {
sessions: {
full: 'Active sessions full, new sessions must wait (idle timeout: {idle} min)',
normal: 'Active sessions normal (idle timeout: {idle} min)'
}
},
rpm: {
full: 'RPM limit reached',
warning: 'RPM approaching limit',
normal: 'RPM normal',
tieredNormal: 'RPM limit (Tiered) - Normal',
tieredWarning: 'RPM limit (Tiered) - Approaching limit',
tieredStickyOnly: 'RPM limit (Tiered) - Sticky only | Buffer: {buffer}',
tieredBlocked: 'RPM limit (Tiered) - Blocked | Buffer: {buffer}',
stickyExemptNormal: 'RPM limit (Sticky Exempt) - Normal',
stickyExemptWarning: 'RPM limit (Sticky Exempt) - Approaching limit',
stickyExemptOver: 'RPM limit (Sticky Exempt) - Over limit, sticky only'
},
},
tempUnschedulable: {
title: 'Temp Unschedulable',
@@ -1831,6 +1843,22 @@ export default {
idleTimeoutPlaceholder: '5',
idleTimeoutHint: 'Sessions will be released after idle timeout'
},
rpmLimit: {
label: 'RPM Limit',
hint: 'Limit requests per minute to protect upstream accounts',
baseRpm: 'Base RPM',
baseRpmPlaceholder: '15',
baseRpmHint: 'Max requests per minute, 0 or empty means no limit',
strategy: 'RPM Strategy',
strategyTiered: 'Tiered Model',
strategyStickyExempt: 'Sticky Exempt',
strategyTieredHint: 'Green → Yellow → Sticky only → Blocked, progressive throttling',
strategyStickyExemptHint: 'Only sticky sessions allowed when over limit',
strategyHint: 'Tiered: gradually restrict when exceeded; Sticky Exempt: existing sessions unrestricted',
stickyBuffer: 'Sticky Buffer',
stickyBufferPlaceholder: 'Default: 20% of base RPM',
stickyBufferHint: 'Extra requests allowed for sticky sessions after exceeding base RPM. Leave empty to use default (20% of base RPM, min 1)'
},
tlsFingerprint: {
label: 'TLS Fingerprint Simulation',
hint: 'Simulate Node.js/Claude Code client TLS fingerprint'

View File

@@ -1667,7 +1667,19 @@ export default {
sessions: {
full: '活跃会话已满,新会话需等待(空闲超时:{idle}分钟)',
normal: '活跃会话正常(空闲超时:{idle}分钟)'
}
},
rpm: {
full: '已达 RPM 上限',
warning: 'RPM 接近上限',
normal: 'RPM 正常',
tieredNormal: 'RPM 限制 (三区模型) - 正常',
tieredWarning: 'RPM 限制 (三区模型) - 接近阈值',
tieredStickyOnly: 'RPM 限制 (三区模型) - 仅粘性会话 | 缓冲区: {buffer}',
tieredBlocked: 'RPM 限制 (三区模型) - 已阻塞 | 缓冲区: {buffer}',
stickyExemptNormal: 'RPM 限制 (粘性豁免) - 正常',
stickyExemptWarning: 'RPM 限制 (粘性豁免) - 接近阈值',
stickyExemptOver: 'RPM 限制 (粘性豁免) - 超限,仅粘性会话'
},
},
clearRateLimit: '清除速率限制',
testConnection: '测试连接',
@@ -1974,6 +1986,22 @@ export default {
idleTimeoutPlaceholder: '5',
idleTimeoutHint: '会话空闲超时后自动释放'
},
rpmLimit: {
label: 'RPM 限制',
hint: '限制每分钟请求数量,保护上游账号',
baseRpm: '基础 RPM',
baseRpmPlaceholder: '15',
baseRpmHint: '每分钟最大请求数0 或留空表示不限制',
strategy: 'RPM 策略',
strategyTiered: '三区模型',
strategyStickyExempt: '粘性豁免',
strategyTieredHint: '绿区→黄区→仅粘性→阻塞,逐步限流',
strategyStickyExemptHint: '超限后仅允许粘性会话',
strategyHint: '三区模型: 超限后逐步限制; 粘性豁免: 已有会话不受限',
stickyBuffer: '粘性缓冲区',
stickyBufferPlaceholder: '默认: base RPM 的 20%',
stickyBufferHint: '超过 base RPM 后粘性会话额外允许的请求数。为空则使用默认值base RPM 的 20%,最小为 1'
},
tlsFingerprint: {
label: 'TLS 指纹模拟',
hint: '模拟 Node.js/Claude Code 客户端的 TLS 指纹'

View File

@@ -661,6 +661,11 @@ export interface Account {
max_sessions?: number | null
session_idle_timeout_minutes?: number | null
// RPM 限制(仅 Anthropic OAuth/SetupToken 账号有效)
base_rpm?: number | null
rpm_strategy?: string | null
rpm_sticky_buffer?: number | null
// TLS指纹伪装仅 Anthropic OAuth/SetupToken 账号有效)
enable_tls_fingerprint?: boolean | null
@@ -675,6 +680,7 @@ export interface Account {
// 运行时状态(仅当启用对应限制时返回)
current_window_cost?: number | null // 当前窗口费用
active_sessions?: number | null // 当前活跃会话数
current_rpm?: number | null // 当前分钟 RPM 计数
}
// Account Usage types

View File

@@ -263,7 +263,7 @@
<AccountActionMenu :show="menu.show" :account="menu.acc" :position="menu.pos" @close="menu.show = false" @test="handleTest" @stats="handleViewStats" @reauth="handleReAuth" @refresh-token="handleRefresh" @reset-status="handleResetStatus" @clear-rate-limit="handleClearRateLimit" />
<SyncFromCrsModal :show="showSync" @close="showSync = false" @synced="reload" />
<ImportDataModal :show="showImportData" @close="showImportData = false" @imported="handleDataImported" />
<BulkEditAccountModal :show="showBulkEdit" :account-ids="selIds" :selected-platforms="selPlatforms" :proxies="proxies" :groups="groups" @close="showBulkEdit = false" @updated="handleBulkUpdated" />
<BulkEditAccountModal :show="showBulkEdit" :account-ids="selIds" :selected-platforms="selPlatforms" :selected-types="selTypes" :proxies="proxies" :groups="groups" @close="showBulkEdit = false" @updated="handleBulkUpdated" />
<TempUnschedStatusModal :show="showTempUnsched" :account="tempUnschedAcc" @close="showTempUnsched = false" @reset="handleTempUnschedReset" />
<ConfirmDialog :show="showDeleteDialog" :title="t('admin.accounts.deleteAccount')" :message="t('admin.accounts.deleteConfirm', { name: deletingAcc?.name })" :confirm-text="t('common.delete')" :cancel-text="t('common.cancel')" :danger="true" @confirm="confirmDelete" @cancel="showDeleteDialog = false" />
<ConfirmDialog :show="showExportDataDialog" :title="t('admin.accounts.dataExport')" :message="t('admin.accounts.dataExportConfirmMessage')" :confirm-text="t('admin.accounts.dataExportConfirm')" :cancel-text="t('common.cancel')" @confirm="handleExportData" @cancel="showExportDataDialog = false">
@@ -307,7 +307,7 @@ import PlatformTypeBadge from '@/components/common/PlatformTypeBadge.vue'
import Icon from '@/components/icons/Icon.vue'
import ErrorPassthroughRulesModal from '@/components/admin/ErrorPassthroughRulesModal.vue'
import { formatDateTime, formatRelativeTime } from '@/utils/format'
import type { Account, AccountPlatform, Proxy, AdminGroup, WindowStats } from '@/types'
import type { Account, AccountPlatform, AccountType, Proxy, AdminGroup, WindowStats } from '@/types'
const { t } = useI18n()
const appStore = useAppStore()
@@ -324,6 +324,14 @@ const selPlatforms = computed<AccountPlatform[]>(() => {
)
return [...platforms]
})
const selTypes = computed<AccountType[]>(() => {
const types = new Set(
accounts.value
.filter(a => selIds.value.includes(a.id))
.map(a => a.type)
)
return [...types]
})
const showCreate = ref(false)
const showEdit = ref(false)
const showSync = ref(false)