feat: 添加5h窗口费用控制和会话数量限制

- 支持Anthropic OAuth/SetupToken账号的5h窗口费用阈值控制
- 支持账号级别的并发会话数量限制
- 使用Redis缓存窗口费用(30秒TTL)减少数据库压力
- 费用计算基于标准费用(不含账号倍率)
This commit is contained in:
shaw
2026-01-16 23:36:52 +08:00
parent 04811c00cb
commit 7379423325
21 changed files with 1329 additions and 53 deletions

View File

@@ -118,7 +118,8 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
concurrencyCache := repository.ProvideConcurrencyCache(redisClient, configConfig) concurrencyCache := repository.ProvideConcurrencyCache(redisClient, configConfig)
concurrencyService := service.ProvideConcurrencyService(concurrencyCache, accountRepository, configConfig) concurrencyService := service.ProvideConcurrencyService(concurrencyCache, accountRepository, configConfig)
crsSyncService := service.NewCRSSyncService(accountRepository, proxyRepository, oAuthService, openAIOAuthService, geminiOAuthService, configConfig) crsSyncService := service.NewCRSSyncService(accountRepository, proxyRepository, oAuthService, openAIOAuthService, geminiOAuthService, configConfig)
accountHandler := admin.NewAccountHandler(adminService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, rateLimitService, accountUsageService, accountTestService, concurrencyService, crsSyncService) sessionLimitCache := repository.ProvideSessionLimitCache(redisClient, configConfig)
accountHandler := admin.NewAccountHandler(adminService, oAuthService, openAIOAuthService, geminiOAuthService, antigravityOAuthService, rateLimitService, accountUsageService, accountTestService, concurrencyService, crsSyncService, sessionLimitCache)
oAuthHandler := admin.NewOAuthHandler(oAuthService) oAuthHandler := admin.NewOAuthHandler(oAuthService)
openAIOAuthHandler := admin.NewOpenAIOAuthHandler(openAIOAuthService, adminService) openAIOAuthHandler := admin.NewOpenAIOAuthHandler(openAIOAuthService, adminService)
geminiOAuthHandler := admin.NewGeminiOAuthHandler(geminiOAuthService) geminiOAuthHandler := admin.NewGeminiOAuthHandler(geminiOAuthService)
@@ -140,7 +141,7 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
identityService := service.NewIdentityService(identityCache) identityService := service.NewIdentityService(identityCache)
deferredService := service.ProvideDeferredService(accountRepository, timingWheelService) deferredService := service.ProvideDeferredService(accountRepository, timingWheelService)
claudeTokenProvider := service.NewClaudeTokenProvider(accountRepository, geminiTokenCache, oAuthService) claudeTokenProvider := service.NewClaudeTokenProvider(accountRepository, geminiTokenCache, oAuthService)
gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider) gatewayService := service.NewGatewayService(accountRepository, groupRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, identityService, httpUpstream, deferredService, claudeTokenProvider, sessionLimitCache)
openAITokenProvider := service.NewOpenAITokenProvider(accountRepository, geminiTokenCache, openAIOAuthService) openAITokenProvider := service.NewOpenAITokenProvider(accountRepository, geminiTokenCache, openAIOAuthService)
openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService, openAITokenProvider) openAIGatewayService := service.NewOpenAIGatewayService(accountRepository, usageLogRepository, userRepository, userSubscriptionRepository, gatewayCache, configConfig, schedulerSnapshotService, concurrencyService, billingService, rateLimitService, billingCacheService, httpUpstream, deferredService, openAITokenProvider)
geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig) geminiMessagesCompatService := service.NewGeminiMessagesCompatService(accountRepository, groupRepository, gatewayCache, schedulerSnapshotService, geminiTokenProvider, rateLimitService, httpUpstream, antigravityGatewayService, configConfig)

View File

@@ -234,6 +234,10 @@ type GatewayConfig struct {
// ConcurrencySlotTTLMinutes: 并发槽位过期时间(分钟) // ConcurrencySlotTTLMinutes: 并发槽位过期时间(分钟)
// 应大于最长 LLM 请求时间,防止请求完成前槽位过期 // 应大于最长 LLM 请求时间,防止请求完成前槽位过期
ConcurrencySlotTTLMinutes int `mapstructure:"concurrency_slot_ttl_minutes"` ConcurrencySlotTTLMinutes int `mapstructure:"concurrency_slot_ttl_minutes"`
// SessionIdleTimeoutMinutes: 会话空闲超时时间(分钟),默认 5 分钟
// 用于 Anthropic OAuth/SetupToken 账号的会话数量限制功能
// 空闲超过此时间的会话将被自动释放
SessionIdleTimeoutMinutes int `mapstructure:"session_idle_timeout_minutes"`
// StreamDataIntervalTimeout: 流数据间隔超时0表示禁用 // StreamDataIntervalTimeout: 流数据间隔超时0表示禁用
StreamDataIntervalTimeout int `mapstructure:"stream_data_interval_timeout"` StreamDataIntervalTimeout int `mapstructure:"stream_data_interval_timeout"`

View File

@@ -44,6 +44,7 @@ type AccountHandler struct {
accountTestService *service.AccountTestService accountTestService *service.AccountTestService
concurrencyService *service.ConcurrencyService concurrencyService *service.ConcurrencyService
crsSyncService *service.CRSSyncService crsSyncService *service.CRSSyncService
sessionLimitCache service.SessionLimitCache
} }
// NewAccountHandler creates a new admin account handler // NewAccountHandler creates a new admin account handler
@@ -58,6 +59,7 @@ func NewAccountHandler(
accountTestService *service.AccountTestService, accountTestService *service.AccountTestService,
concurrencyService *service.ConcurrencyService, concurrencyService *service.ConcurrencyService,
crsSyncService *service.CRSSyncService, crsSyncService *service.CRSSyncService,
sessionLimitCache service.SessionLimitCache,
) *AccountHandler { ) *AccountHandler {
return &AccountHandler{ return &AccountHandler{
adminService: adminService, adminService: adminService,
@@ -70,6 +72,7 @@ func NewAccountHandler(
accountTestService: accountTestService, accountTestService: accountTestService,
concurrencyService: concurrencyService, concurrencyService: concurrencyService,
crsSyncService: crsSyncService, crsSyncService: crsSyncService,
sessionLimitCache: sessionLimitCache,
} }
} }
@@ -130,6 +133,9 @@ type BulkUpdateAccountsRequest struct {
type AccountWithConcurrency struct { type AccountWithConcurrency struct {
*dto.Account *dto.Account
CurrentConcurrency int `json:"current_concurrency"` CurrentConcurrency int `json:"current_concurrency"`
// 以下字段仅对 Anthropic OAuth/SetupToken 账号有效,且仅在启用相应功能时返回
CurrentWindowCost *float64 `json:"current_window_cost,omitempty"` // 当前窗口费用
ActiveSessions *int `json:"active_sessions,omitempty"` // 当前活跃会话数
} }
// List handles listing all accounts with pagination // List handles listing all accounts with pagination
@@ -164,13 +170,89 @@ func (h *AccountHandler) List(c *gin.Context) {
concurrencyCounts = make(map[int64]int) concurrencyCounts = make(map[int64]int)
} }
// 识别需要查询窗口费用和会话数的账号Anthropic OAuth/SetupToken 且启用了相应功能)
windowCostAccountIDs := make([]int64, 0)
sessionLimitAccountIDs := make([]int64, 0)
for i := range accounts {
acc := &accounts[i]
if acc.IsAnthropicOAuthOrSetupToken() {
if acc.GetWindowCostLimit() > 0 {
windowCostAccountIDs = append(windowCostAccountIDs, acc.ID)
}
if acc.GetMaxSessions() > 0 {
sessionLimitAccountIDs = append(sessionLimitAccountIDs, acc.ID)
}
}
}
// 并行获取窗口费用和活跃会话数
var windowCosts map[int64]float64
var activeSessions map[int64]int
// 获取活跃会话数(批量查询)
if len(sessionLimitAccountIDs) > 0 && h.sessionLimitCache != nil {
activeSessions, _ = h.sessionLimitCache.GetActiveSessionCountBatch(c.Request.Context(), sessionLimitAccountIDs)
if activeSessions == nil {
activeSessions = make(map[int64]int)
}
}
// 获取窗口费用(并行查询)
if len(windowCostAccountIDs) > 0 {
windowCosts = make(map[int64]float64)
var mu sync.Mutex
g, gctx := errgroup.WithContext(c.Request.Context())
g.SetLimit(10) // 限制并发数
for i := range accounts {
acc := &accounts[i]
if !acc.IsAnthropicOAuthOrSetupToken() || acc.GetWindowCostLimit() <= 0 {
continue
}
accCopy := acc // 闭包捕获
g.Go(func() error {
var startTime time.Time
if accCopy.SessionWindowStart != nil {
startTime = *accCopy.SessionWindowStart
} else {
startTime = time.Now().Add(-5 * time.Hour)
}
stats, err := h.accountUsageService.GetAccountWindowStats(gctx, accCopy.ID, startTime)
if err == nil && stats != nil {
mu.Lock()
windowCosts[accCopy.ID] = stats.StandardCost // 使用标准费用
mu.Unlock()
}
return nil // 不返回错误,允许部分失败
})
}
_ = g.Wait()
}
// Build response with concurrency info // Build response with concurrency info
result := make([]AccountWithConcurrency, len(accounts)) result := make([]AccountWithConcurrency, len(accounts))
for i := range accounts { for i := range accounts {
result[i] = AccountWithConcurrency{ acc := &accounts[i]
Account: dto.AccountFromService(&accounts[i]), item := AccountWithConcurrency{
CurrentConcurrency: concurrencyCounts[accounts[i].ID], Account: dto.AccountFromService(acc),
CurrentConcurrency: concurrencyCounts[acc.ID],
} }
// 添加窗口费用(仅当启用时)
if windowCosts != nil {
if cost, ok := windowCosts[acc.ID]; ok {
item.CurrentWindowCost = &cost
}
}
// 添加活跃会话数(仅当启用时)
if activeSessions != nil {
if count, ok := activeSessions[acc.ID]; ok {
item.ActiveSessions = &count
}
}
result[i] = item
} }
response.Paginated(c, result, total, page, pageSize) response.Paginated(c, result, total, page, pageSize)

View File

@@ -116,7 +116,7 @@ func AccountFromServiceShallow(a *service.Account) *Account {
if a == nil { if a == nil {
return nil return nil
} }
return &Account{ out := &Account{
ID: a.ID, ID: a.ID,
Name: a.Name, Name: a.Name,
Notes: a.Notes, Notes: a.Notes,
@@ -146,6 +146,24 @@ func AccountFromServiceShallow(a *service.Account) *Account {
SessionWindowStatus: a.SessionWindowStatus, SessionWindowStatus: a.SessionWindowStatus,
GroupIDs: a.GroupIDs, GroupIDs: a.GroupIDs,
} }
// 提取 5h 窗口费用控制和会话数量控制配置(仅 Anthropic OAuth/SetupToken 账号有效)
if a.IsAnthropicOAuthOrSetupToken() {
if limit := a.GetWindowCostLimit(); limit > 0 {
out.WindowCostLimit = &limit
}
if reserve := a.GetWindowCostStickyReserve(); reserve > 0 {
out.WindowCostStickyReserve = &reserve
}
if maxSessions := a.GetMaxSessions(); maxSessions > 0 {
out.MaxSessions = &maxSessions
}
if idleTimeout := a.GetSessionIdleTimeoutMinutes(); idleTimeout > 0 {
out.SessionIdleTimeoutMin = &idleTimeout
}
}
return out
} }
func AccountFromService(a *service.Account) *Account { func AccountFromService(a *service.Account) *Account {

View File

@@ -102,6 +102,16 @@ type Account struct {
SessionWindowEnd *time.Time `json:"session_window_end"` SessionWindowEnd *time.Time `json:"session_window_end"`
SessionWindowStatus string `json:"session_window_status"` SessionWindowStatus string `json:"session_window_status"`
// 5h窗口费用控制仅 Anthropic OAuth/SetupToken 账号有效)
// 从 extra 字段提取,方便前端显示和编辑
WindowCostLimit *float64 `json:"window_cost_limit,omitempty"`
WindowCostStickyReserve *float64 `json:"window_cost_sticky_reserve,omitempty"`
// 会话数量控制(仅 Anthropic OAuth/SetupToken 账号有效)
// 从 extra 字段提取,方便前端显示和编辑
MaxSessions *int `json:"max_sessions,omitempty"`
SessionIdleTimeoutMin *int `json:"session_idle_timeout_minutes,omitempty"`
Proxy *Proxy `json:"proxy,omitempty"` Proxy *Proxy `json:"proxy,omitempty"`
AccountGroups []AccountGroup `json:"account_groups,omitempty"` AccountGroups []AccountGroup `json:"account_groups,omitempty"`

View File

@@ -185,7 +185,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
lastFailoverStatus := 0 lastFailoverStatus := 0
for { for {
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs) selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制
if err != nil { if err != nil {
if len(failedAccountIDs) == 0 { if len(failedAccountIDs) == 0 {
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted) h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)
@@ -320,7 +320,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
for { for {
// 选择支持该模型的账号 // 选择支持该模型的账号
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs) selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, parsedReq.MetadataUserID)
if err != nil { if err != nil {
if len(failedAccountIDs) == 0 { if len(failedAccountIDs) == 0 {
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted) h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts: "+err.Error(), streamStarted)

View File

@@ -226,7 +226,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
lastFailoverStatus := 0 lastFailoverStatus := 0
for { for {
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs) selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs, "") // Gemini 不使用会话限制
if err != nil { if err != nil {
if len(failedAccountIDs) == 0 { if len(failedAccountIDs) == 0 {
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error()) googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())

View File

@@ -0,0 +1,321 @@
package repository
import (
"context"
"fmt"
"strconv"
"time"
"github.com/Wei-Shaw/sub2api/internal/service"
"github.com/redis/go-redis/v9"
)
// 会话限制缓存常量定义
//
// 设计说明:
// 使用 Redis 有序集合Sorted Set跟踪每个账号的活跃会话
// - Key: session_limit:account:{accountID}
// - Member: sessionUUID从 metadata.user_id 中提取)
// - Score: Unix 时间戳(会话最后活跃时间)
//
// 通过 ZREMRANGEBYSCORE 自动清理过期会话,无需手动管理 TTL
const (
// 会话限制键前缀
// 格式: session_limit:account:{accountID}
sessionLimitKeyPrefix = "session_limit:account:"
// 窗口费用缓存键前缀
// 格式: window_cost:account:{accountID}
windowCostKeyPrefix = "window_cost:account:"
// 窗口费用缓存 TTL30秒
windowCostCacheTTL = 30 * time.Second
)
var (
// registerSessionScript 注册会话活动
// 使用 Redis TIME 命令获取服务器时间,避免多实例时钟不同步
// KEYS[1] = session_limit:account:{accountID}
// ARGV[1] = maxSessions
// ARGV[2] = idleTimeout
// ARGV[3] = sessionUUID
// 返回: 1 = 允许, 0 = 拒绝
registerSessionScript = redis.NewScript(`
local key = KEYS[1]
local maxSessions = tonumber(ARGV[1])
local idleTimeout = tonumber(ARGV[2])
local sessionUUID = ARGV[3]
-- 使用 Redis 服务器时间,确保多实例时钟一致
local timeResult = redis.call('TIME')
local now = tonumber(timeResult[1])
local expireBefore = now - idleTimeout
-- 清理过期会话
redis.call('ZREMRANGEBYSCORE', key, '-inf', expireBefore)
-- 检查会话是否已存在(支持刷新时间戳)
local exists = redis.call('ZSCORE', key, sessionUUID)
if exists ~= false then
-- 会话已存在,刷新时间戳
redis.call('ZADD', key, now, sessionUUID)
redis.call('EXPIRE', key, idleTimeout + 60)
return 1
end
-- 检查是否达到会话数量上限
local count = redis.call('ZCARD', key)
if count < maxSessions then
-- 未达上限,添加新会话
redis.call('ZADD', key, now, sessionUUID)
redis.call('EXPIRE', key, idleTimeout + 60)
return 1
end
-- 达到上限,拒绝新会话
return 0
`)
// refreshSessionScript 刷新会话时间戳
// KEYS[1] = session_limit:account:{accountID}
// ARGV[1] = idleTimeout
// ARGV[2] = sessionUUID
refreshSessionScript = redis.NewScript(`
local key = KEYS[1]
local idleTimeout = tonumber(ARGV[1])
local sessionUUID = ARGV[2]
local timeResult = redis.call('TIME')
local now = tonumber(timeResult[1])
-- 检查会话是否存在
local exists = redis.call('ZSCORE', key, sessionUUID)
if exists ~= false then
redis.call('ZADD', key, now, sessionUUID)
redis.call('EXPIRE', key, idleTimeout + 60)
end
return 1
`)
// getActiveSessionCountScript 获取活跃会话数
// KEYS[1] = session_limit:account:{accountID}
// ARGV[1] = idleTimeout
getActiveSessionCountScript = redis.NewScript(`
local key = KEYS[1]
local idleTimeout = tonumber(ARGV[1])
local timeResult = redis.call('TIME')
local now = tonumber(timeResult[1])
local expireBefore = now - idleTimeout
-- 清理过期会话
redis.call('ZREMRANGEBYSCORE', key, '-inf', expireBefore)
return redis.call('ZCARD', key)
`)
// isSessionActiveScript 检查会话是否活跃
// KEYS[1] = session_limit:account:{accountID}
// ARGV[1] = idleTimeout
// ARGV[2] = sessionUUID
isSessionActiveScript = redis.NewScript(`
local key = KEYS[1]
local idleTimeout = tonumber(ARGV[1])
local sessionUUID = ARGV[2]
local timeResult = redis.call('TIME')
local now = tonumber(timeResult[1])
local expireBefore = now - idleTimeout
-- 获取会话的时间戳
local score = redis.call('ZSCORE', key, sessionUUID)
if score == false then
return 0
end
-- 检查是否过期
if tonumber(score) <= expireBefore then
return 0
end
return 1
`)
)
type sessionLimitCache struct {
rdb *redis.Client
defaultIdleTimeout time.Duration // 默认空闲超时(用于 GetActiveSessionCount
}
// NewSessionLimitCache 创建会话限制缓存
// defaultIdleTimeoutMinutes: 默认空闲超时时间(分钟),用于无参数查询
func NewSessionLimitCache(rdb *redis.Client, defaultIdleTimeoutMinutes int) service.SessionLimitCache {
if defaultIdleTimeoutMinutes <= 0 {
defaultIdleTimeoutMinutes = 5 // 默认 5 分钟
}
return &sessionLimitCache{
rdb: rdb,
defaultIdleTimeout: time.Duration(defaultIdleTimeoutMinutes) * time.Minute,
}
}
// sessionLimitKey 生成会话限制的 Redis 键
func sessionLimitKey(accountID int64) string {
return fmt.Sprintf("%s%d", sessionLimitKeyPrefix, accountID)
}
// windowCostKey 生成窗口费用缓存的 Redis 键
func windowCostKey(accountID int64) string {
return fmt.Sprintf("%s%d", windowCostKeyPrefix, accountID)
}
// RegisterSession 注册会话活动
func (c *sessionLimitCache) RegisterSession(ctx context.Context, accountID int64, sessionUUID string, maxSessions int, idleTimeout time.Duration) (bool, error) {
if sessionUUID == "" || maxSessions <= 0 {
return true, nil // 无效参数,默认允许
}
key := sessionLimitKey(accountID)
idleTimeoutSeconds := int(idleTimeout.Seconds())
if idleTimeoutSeconds <= 0 {
idleTimeoutSeconds = int(c.defaultIdleTimeout.Seconds())
}
result, err := registerSessionScript.Run(ctx, c.rdb, []string{key}, maxSessions, idleTimeoutSeconds, sessionUUID).Int()
if err != nil {
return true, err // 失败开放:缓存错误时允许请求通过
}
return result == 1, nil
}
// RefreshSession 刷新会话时间戳
func (c *sessionLimitCache) RefreshSession(ctx context.Context, accountID int64, sessionUUID string, idleTimeout time.Duration) error {
if sessionUUID == "" {
return nil
}
key := sessionLimitKey(accountID)
idleTimeoutSeconds := int(idleTimeout.Seconds())
if idleTimeoutSeconds <= 0 {
idleTimeoutSeconds = int(c.defaultIdleTimeout.Seconds())
}
_, err := refreshSessionScript.Run(ctx, c.rdb, []string{key}, idleTimeoutSeconds, sessionUUID).Result()
return err
}
// GetActiveSessionCount 获取活跃会话数
func (c *sessionLimitCache) GetActiveSessionCount(ctx context.Context, accountID int64) (int, error) {
key := sessionLimitKey(accountID)
idleTimeoutSeconds := int(c.defaultIdleTimeout.Seconds())
result, err := getActiveSessionCountScript.Run(ctx, c.rdb, []string{key}, idleTimeoutSeconds).Int()
if err != nil {
return 0, err
}
return result, nil
}
// GetActiveSessionCountBatch 批量获取多个账号的活跃会话数
func (c *sessionLimitCache) GetActiveSessionCountBatch(ctx context.Context, accountIDs []int64) (map[int64]int, error) {
if len(accountIDs) == 0 {
return make(map[int64]int), nil
}
results := make(map[int64]int, len(accountIDs))
// 使用 pipeline 批量执行
pipe := c.rdb.Pipeline()
idleTimeoutSeconds := int(c.defaultIdleTimeout.Seconds())
cmds := make(map[int64]*redis.Cmd, len(accountIDs))
for _, accountID := range accountIDs {
key := sessionLimitKey(accountID)
cmds[accountID] = getActiveSessionCountScript.Run(ctx, pipe, []string{key}, idleTimeoutSeconds)
}
// 执行 pipeline即使部分失败也尝试获取成功的结果
_, _ = pipe.Exec(ctx)
for accountID, cmd := range cmds {
if result, err := cmd.Int(); err == nil {
results[accountID] = result
}
}
return results, nil
}
// IsSessionActive 检查会话是否活跃
func (c *sessionLimitCache) IsSessionActive(ctx context.Context, accountID int64, sessionUUID string) (bool, error) {
if sessionUUID == "" {
return false, nil
}
key := sessionLimitKey(accountID)
idleTimeoutSeconds := int(c.defaultIdleTimeout.Seconds())
result, err := isSessionActiveScript.Run(ctx, c.rdb, []string{key}, idleTimeoutSeconds, sessionUUID).Int()
if err != nil {
return false, err
}
return result == 1, nil
}
// ========== 5h窗口费用缓存实现 ==========
// GetWindowCost 获取缓存的窗口费用
func (c *sessionLimitCache) GetWindowCost(ctx context.Context, accountID int64) (float64, bool, error) {
key := windowCostKey(accountID)
val, err := c.rdb.Get(ctx, key).Float64()
if err == redis.Nil {
return 0, false, nil // 缓存未命中
}
if err != nil {
return 0, false, err
}
return val, true, nil
}
// SetWindowCost 设置窗口费用缓存
func (c *sessionLimitCache) SetWindowCost(ctx context.Context, accountID int64, cost float64) error {
key := windowCostKey(accountID)
return c.rdb.Set(ctx, key, cost, windowCostCacheTTL).Err()
}
// GetWindowCostBatch 批量获取窗口费用缓存
func (c *sessionLimitCache) GetWindowCostBatch(ctx context.Context, accountIDs []int64) (map[int64]float64, error) {
if len(accountIDs) == 0 {
return make(map[int64]float64), nil
}
// 构建批量查询的 keys
keys := make([]string, len(accountIDs))
for i, accountID := range accountIDs {
keys[i] = windowCostKey(accountID)
}
// 使用 MGET 批量获取
vals, err := c.rdb.MGet(ctx, keys...).Result()
if err != nil {
return nil, err
}
results := make(map[int64]float64, len(accountIDs))
for i, val := range vals {
if val == nil {
continue // 缓存未命中
}
// 尝试解析为 float64
switch v := val.(type) {
case string:
if cost, err := strconv.ParseFloat(v, 64); err == nil {
results[accountIDs[i]] = cost
}
case float64:
results[accountIDs[i]] = v
}
}
return results, nil
}

View File

@@ -37,6 +37,16 @@ func ProvidePricingRemoteClient(cfg *config.Config) service.PricingRemoteClient
return NewPricingRemoteClient(cfg.Update.ProxyURL) return NewPricingRemoteClient(cfg.Update.ProxyURL)
} }
// ProvideSessionLimitCache 创建会话限制缓存
// 用于 Anthropic OAuth/SetupToken 账号的并发会话数量控制
func ProvideSessionLimitCache(rdb *redis.Client, cfg *config.Config) service.SessionLimitCache {
defaultIdleTimeoutMinutes := 5 // 默认 5 分钟空闲超时
if cfg != nil && cfg.Gateway.SessionIdleTimeoutMinutes > 0 {
defaultIdleTimeoutMinutes = cfg.Gateway.SessionIdleTimeoutMinutes
}
return NewSessionLimitCache(rdb, defaultIdleTimeoutMinutes)
}
// ProviderSet is the Wire provider set for all repositories // ProviderSet is the Wire provider set for all repositories
var ProviderSet = wire.NewSet( var ProviderSet = wire.NewSet(
NewUserRepository, NewUserRepository,
@@ -61,6 +71,7 @@ var ProviderSet = wire.NewSet(
NewTempUnschedCache, NewTempUnschedCache,
NewTimeoutCounterCache, NewTimeoutCounterCache,
ProvideConcurrencyCache, ProvideConcurrencyCache,
ProvideSessionLimitCache,
NewDashboardCache, NewDashboardCache,
NewEmailCache, NewEmailCache,
NewIdentityCache, NewIdentityCache,

View File

@@ -557,3 +557,141 @@ func (a *Account) IsMixedSchedulingEnabled() bool {
} }
return false return false
} }
// WindowCostSchedulability 窗口费用调度状态
type WindowCostSchedulability int
const (
// WindowCostSchedulable 可正常调度
WindowCostSchedulable WindowCostSchedulability = iota
// WindowCostStickyOnly 仅允许粘性会话
WindowCostStickyOnly
// WindowCostNotSchedulable 完全不可调度
WindowCostNotSchedulable
)
// IsAnthropicOAuthOrSetupToken 判断是否为 Anthropic OAuth 或 SetupToken 类型账号
// 仅这两类账号支持 5h 窗口额度控制和会话数量控制
func (a *Account) IsAnthropicOAuthOrSetupToken() bool {
return a.Platform == PlatformAnthropic && (a.Type == AccountTypeOAuth || a.Type == AccountTypeSetupToken)
}
// GetWindowCostLimit 获取 5h 窗口费用阈值(美元)
// 返回 0 表示未启用
func (a *Account) GetWindowCostLimit() float64 {
if a.Extra == nil {
return 0
}
if v, ok := a.Extra["window_cost_limit"]; ok {
return parseExtraFloat64(v)
}
return 0
}
// GetWindowCostStickyReserve 获取粘性会话预留额度(美元)
// 默认值为 10
func (a *Account) GetWindowCostStickyReserve() float64 {
if a.Extra == nil {
return 10.0
}
if v, ok := a.Extra["window_cost_sticky_reserve"]; ok {
val := parseExtraFloat64(v)
if val > 0 {
return val
}
}
return 10.0
}
// GetMaxSessions 获取最大并发会话数
// 返回 0 表示未启用
func (a *Account) GetMaxSessions() int {
if a.Extra == nil {
return 0
}
if v, ok := a.Extra["max_sessions"]; ok {
return parseExtraInt(v)
}
return 0
}
// GetSessionIdleTimeoutMinutes 获取会话空闲超时分钟数
// 默认值为 5 分钟
func (a *Account) GetSessionIdleTimeoutMinutes() int {
if a.Extra == nil {
return 5
}
if v, ok := a.Extra["session_idle_timeout_minutes"]; ok {
val := parseExtraInt(v)
if val > 0 {
return val
}
}
return 5
}
// CheckWindowCostSchedulability 根据当前窗口费用检查调度状态
// - 费用 < 阈值: WindowCostSchedulable可正常调度
// - 费用 >= 阈值 且 < 阈值+预留: WindowCostStickyOnly仅粘性会话
// - 费用 >= 阈值+预留: WindowCostNotSchedulable不可调度
func (a *Account) CheckWindowCostSchedulability(currentWindowCost float64) WindowCostSchedulability {
limit := a.GetWindowCostLimit()
if limit <= 0 {
return WindowCostSchedulable
}
if currentWindowCost < limit {
return WindowCostSchedulable
}
stickyReserve := a.GetWindowCostStickyReserve()
if currentWindowCost < limit+stickyReserve {
return WindowCostStickyOnly
}
return WindowCostNotSchedulable
}
// parseExtraFloat64 从 extra 字段解析 float64 值
func parseExtraFloat64(value any) float64 {
switch v := value.(type) {
case float64:
return v
case float32:
return float64(v)
case int:
return float64(v)
case int64:
return float64(v)
case json.Number:
if f, err := v.Float64(); err == nil {
return f
}
case string:
if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil {
return f
}
}
return 0
}
// parseExtraInt 从 extra 字段解析 int 值
func parseExtraInt(value any) int {
switch v := value.(type) {
case int:
return v
case int64:
return int(v)
case float64:
return int(v)
case json.Number:
if i, err := v.Int64(); err == nil {
return int(i)
}
case string:
if i, err := strconv.Atoi(strings.TrimSpace(v)); err == nil {
return i
}
}
return 0
}

View File

@@ -575,3 +575,9 @@ func buildGeminiUsageProgress(used, limit int64, resetAt time.Time, tokens int64
}, },
} }
} }
// GetAccountWindowStats 获取账号在指定时间窗口内的使用统计
// 用于账号列表页面显示当前窗口费用
func (s *AccountUsageService) GetAccountWindowStats(ctx context.Context, accountID int64, startTime time.Time) (*usagestats.AccountStats, error) {
return s.usageLogRepo.GetAccountWindowStats(ctx, accountID, startTime)
}

View File

@@ -1052,7 +1052,7 @@ func TestGatewayService_SelectAccountWithLoadAwareness(t *testing.T) {
concurrencyService: nil, // No concurrency service concurrencyService: nil, // No concurrency service
} }
result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil) result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil, "")
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, result) require.NotNil(t, result)
require.NotNil(t, result.Account) require.NotNil(t, result.Account)
@@ -1105,7 +1105,7 @@ func TestGatewayService_SelectAccountWithLoadAwareness(t *testing.T) {
concurrencyService: nil, // legacy path concurrencyService: nil, // legacy path
} }
result, err := svc.SelectAccountWithLoadAwareness(ctx, &groupID, sessionHash, "claude-b", nil) result, err := svc.SelectAccountWithLoadAwareness(ctx, &groupID, sessionHash, "claude-b", nil, "")
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, result) require.NotNil(t, result)
require.NotNil(t, result.Account) require.NotNil(t, result.Account)
@@ -1137,7 +1137,7 @@ func TestGatewayService_SelectAccountWithLoadAwareness(t *testing.T) {
concurrencyService: nil, concurrencyService: nil,
} }
result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil) result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil, "")
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, result) require.NotNil(t, result)
require.NotNil(t, result.Account) require.NotNil(t, result.Account)
@@ -1169,7 +1169,7 @@ func TestGatewayService_SelectAccountWithLoadAwareness(t *testing.T) {
} }
excludedIDs := map[int64]struct{}{1: {}} excludedIDs := map[int64]struct{}{1: {}}
result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", excludedIDs) result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", excludedIDs, "")
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, result) require.NotNil(t, result)
require.NotNil(t, result.Account) require.NotNil(t, result.Account)
@@ -1203,7 +1203,7 @@ func TestGatewayService_SelectAccountWithLoadAwareness(t *testing.T) {
concurrencyService: NewConcurrencyService(concurrencyCache), concurrencyService: NewConcurrencyService(concurrencyCache),
} }
result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "sticky", "claude-3-5-sonnet-20241022", nil) result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "sticky", "claude-3-5-sonnet-20241022", nil, "")
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, result) require.NotNil(t, result)
require.NotNil(t, result.Account) require.NotNil(t, result.Account)
@@ -1239,7 +1239,7 @@ func TestGatewayService_SelectAccountWithLoadAwareness(t *testing.T) {
concurrencyService: NewConcurrencyService(concurrencyCache), concurrencyService: NewConcurrencyService(concurrencyCache),
} }
result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "sticky", "claude-3-5-sonnet-20241022", nil) result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "sticky", "claude-3-5-sonnet-20241022", nil, "")
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, result) require.NotNil(t, result)
require.NotNil(t, result.Account) require.NotNil(t, result.Account)
@@ -1266,7 +1266,7 @@ func TestGatewayService_SelectAccountWithLoadAwareness(t *testing.T) {
concurrencyService: nil, concurrencyService: nil,
} }
result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil) result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil, "")
require.Error(t, err) require.Error(t, err)
require.Nil(t, result) require.Nil(t, result)
require.Contains(t, err.Error(), "no available accounts") require.Contains(t, err.Error(), "no available accounts")
@@ -1298,7 +1298,7 @@ func TestGatewayService_SelectAccountWithLoadAwareness(t *testing.T) {
concurrencyService: nil, concurrencyService: nil,
} }
result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil) result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil, "")
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, result) require.NotNil(t, result)
require.NotNil(t, result.Account) require.NotNil(t, result.Account)
@@ -1331,7 +1331,7 @@ func TestGatewayService_SelectAccountWithLoadAwareness(t *testing.T) {
concurrencyService: nil, concurrencyService: nil,
} }
result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil) result, err := svc.SelectAccountWithLoadAwareness(ctx, nil, "", "claude-3-5-sonnet-20241022", nil, "")
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, result) require.NotNil(t, result)
require.NotNil(t, result.Account) require.NotNil(t, result.Account)

View File

@@ -176,6 +176,7 @@ type GatewayService struct {
deferredService *DeferredService deferredService *DeferredService
concurrencyService *ConcurrencyService concurrencyService *ConcurrencyService
claudeTokenProvider *ClaudeTokenProvider claudeTokenProvider *ClaudeTokenProvider
sessionLimitCache SessionLimitCache // 会话数量限制缓存(仅 Anthropic OAuth/SetupToken
} }
// NewGatewayService creates a new GatewayService // NewGatewayService creates a new GatewayService
@@ -196,6 +197,7 @@ func NewGatewayService(
httpUpstream HTTPUpstream, httpUpstream HTTPUpstream,
deferredService *DeferredService, deferredService *DeferredService,
claudeTokenProvider *ClaudeTokenProvider, claudeTokenProvider *ClaudeTokenProvider,
sessionLimitCache SessionLimitCache,
) *GatewayService { ) *GatewayService {
return &GatewayService{ return &GatewayService{
accountRepo: accountRepo, accountRepo: accountRepo,
@@ -214,6 +216,7 @@ func NewGatewayService(
httpUpstream: httpUpstream, httpUpstream: httpUpstream,
deferredService: deferredService, deferredService: deferredService,
claudeTokenProvider: claudeTokenProvider, claudeTokenProvider: claudeTokenProvider,
sessionLimitCache: sessionLimitCache,
} }
} }
@@ -407,8 +410,12 @@ func (s *GatewayService) SelectAccountForModelWithExclusions(ctx context.Context
} }
// SelectAccountWithLoadAwareness selects account with load-awareness and wait plan. // SelectAccountWithLoadAwareness selects account with load-awareness and wait plan.
func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) { // metadataUserID: 原始 metadata.user_id 字段(用于提取会话 UUID 进行会话数量限制)
func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, metadataUserID string) (*AccountSelectionResult, error) {
cfg := s.schedulingConfig() cfg := s.schedulingConfig()
// 提取会话 UUID用于会话数量限制
sessionUUID := extractSessionUUID(metadataUserID)
var stickyAccountID int64 var stickyAccountID int64
if sessionHash != "" && s.cache != nil { if sessionHash != "" && s.cache != nil {
if accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), sessionHash); err == nil { if accountID, err := s.cache.GetSessionAccountID(ctx, derefGroupID(groupID), sessionHash); err == nil {
@@ -527,7 +534,7 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
if len(routingAccountIDs) > 0 && s.concurrencyService != nil { if len(routingAccountIDs) > 0 && s.concurrencyService != nil {
// 1. 过滤出路由列表中可调度的账号 // 1. 过滤出路由列表中可调度的账号
var routingCandidates []*Account var routingCandidates []*Account
var filteredExcluded, filteredMissing, filteredUnsched, filteredPlatform, filteredModelScope, filteredModelMapping int var filteredExcluded, filteredMissing, filteredUnsched, filteredPlatform, filteredModelScope, filteredModelMapping, filteredWindowCost int
for _, routingAccountID := range routingAccountIDs { for _, routingAccountID := range routingAccountIDs {
if isExcluded(routingAccountID) { if isExcluded(routingAccountID) {
filteredExcluded++ filteredExcluded++
@@ -554,13 +561,18 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
filteredModelMapping++ filteredModelMapping++
continue continue
} }
// 窗口费用检查(非粘性会话路径)
if !s.isAccountSchedulableForWindowCost(ctx, account, false) {
filteredWindowCost++
continue
}
routingCandidates = append(routingCandidates, account) routingCandidates = append(routingCandidates, account)
} }
if s.debugModelRoutingEnabled() { if s.debugModelRoutingEnabled() {
log.Printf("[ModelRoutingDebug] routed candidates: group_id=%v model=%s routed=%d candidates=%d filtered(excluded=%d missing=%d unsched=%d platform=%d model_scope=%d model_mapping=%d)", log.Printf("[ModelRoutingDebug] routed candidates: group_id=%v model=%s routed=%d candidates=%d filtered(excluded=%d missing=%d unsched=%d platform=%d model_scope=%d model_mapping=%d window_cost=%d)",
derefGroupID(groupID), requestedModel, len(routingAccountIDs), len(routingCandidates), derefGroupID(groupID), requestedModel, len(routingAccountIDs), len(routingCandidates),
filteredExcluded, filteredMissing, filteredUnsched, filteredPlatform, filteredModelScope, filteredModelMapping) filteredExcluded, filteredMissing, filteredUnsched, filteredPlatform, filteredModelScope, filteredModelMapping, filteredWindowCost)
} }
if len(routingCandidates) > 0 { if len(routingCandidates) > 0 {
@@ -573,18 +585,25 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
if stickyAccount.IsSchedulable() && if stickyAccount.IsSchedulable() &&
s.isAccountAllowedForPlatform(stickyAccount, platform, useMixed) && s.isAccountAllowedForPlatform(stickyAccount, platform, useMixed) &&
stickyAccount.IsSchedulableForModel(requestedModel) && stickyAccount.IsSchedulableForModel(requestedModel) &&
(requestedModel == "" || s.isModelSupportedByAccount(stickyAccount, requestedModel)) { (requestedModel == "" || s.isModelSupportedByAccount(stickyAccount, requestedModel)) &&
s.isAccountSchedulableForWindowCost(ctx, stickyAccount, true) { // 粘性会话窗口费用检查
result, err := s.tryAcquireAccountSlot(ctx, stickyAccountID, stickyAccount.Concurrency) result, err := s.tryAcquireAccountSlot(ctx, stickyAccountID, stickyAccount.Concurrency)
if err == nil && result.Acquired { if err == nil && result.Acquired {
_ = s.cache.RefreshSessionTTL(ctx, derefGroupID(groupID), sessionHash, stickySessionTTL) // 会话数量限制检查
if s.debugModelRoutingEnabled() { if !s.checkAndRegisterSession(ctx, stickyAccount, sessionUUID) {
log.Printf("[ModelRoutingDebug] routed sticky hit: group_id=%v model=%s session=%s account=%d", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), stickyAccountID) result.ReleaseFunc() // 释放槽位
// 继续到负载感知选择
} else {
_ = s.cache.RefreshSessionTTL(ctx, derefGroupID(groupID), sessionHash, stickySessionTTL)
if s.debugModelRoutingEnabled() {
log.Printf("[ModelRoutingDebug] routed sticky hit: group_id=%v model=%s session=%s account=%d", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), stickyAccountID)
}
return &AccountSelectionResult{
Account: stickyAccount,
Acquired: true,
ReleaseFunc: result.ReleaseFunc,
}, nil
} }
return &AccountSelectionResult{
Account: stickyAccount,
Acquired: true,
ReleaseFunc: result.ReleaseFunc,
}, nil
} }
waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, stickyAccountID) waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, stickyAccountID)
@@ -657,6 +676,11 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
for _, item := range routingAvailable { for _, item := range routingAvailable {
result, err := s.tryAcquireAccountSlot(ctx, item.account.ID, item.account.Concurrency) result, err := s.tryAcquireAccountSlot(ctx, item.account.ID, item.account.Concurrency)
if err == nil && result.Acquired { if err == nil && result.Acquired {
// 会话数量限制检查
if !s.checkAndRegisterSession(ctx, item.account, sessionUUID) {
result.ReleaseFunc() // 释放槽位,继续尝试下一个账号
continue
}
if sessionHash != "" && s.cache != nil { if sessionHash != "" && s.cache != nil {
_ = s.cache.SetSessionAccountID(ctx, derefGroupID(groupID), sessionHash, item.account.ID, stickySessionTTL) _ = s.cache.SetSessionAccountID(ctx, derefGroupID(groupID), sessionHash, item.account.ID, stickySessionTTL)
} }
@@ -699,15 +723,21 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
if ok && s.isAccountInGroup(account, groupID) && if ok && s.isAccountInGroup(account, groupID) &&
s.isAccountAllowedForPlatform(account, platform, useMixed) && s.isAccountAllowedForPlatform(account, platform, useMixed) &&
account.IsSchedulableForModel(requestedModel) && account.IsSchedulableForModel(requestedModel) &&
(requestedModel == "" || s.isModelSupportedByAccount(account, requestedModel)) { (requestedModel == "" || s.isModelSupportedByAccount(account, requestedModel)) &&
s.isAccountSchedulableForWindowCost(ctx, account, true) { // 粘性会话窗口费用检查
result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency) result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
if err == nil && result.Acquired { if err == nil && result.Acquired {
_ = s.cache.RefreshSessionTTL(ctx, derefGroupID(groupID), sessionHash, stickySessionTTL) // 会话数量限制检查
return &AccountSelectionResult{ if !s.checkAndRegisterSession(ctx, account, sessionUUID) {
Account: account, result.ReleaseFunc() // 释放槽位,继续到 Layer 2
Acquired: true, } else {
ReleaseFunc: result.ReleaseFunc, _ = s.cache.RefreshSessionTTL(ctx, derefGroupID(groupID), sessionHash, stickySessionTTL)
}, nil return &AccountSelectionResult{
Account: account,
Acquired: true,
ReleaseFunc: result.ReleaseFunc,
}, nil
}
} }
waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, accountID) waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, accountID)
@@ -748,6 +778,10 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
if requestedModel != "" && !s.isModelSupportedByAccount(acc, requestedModel) { if requestedModel != "" && !s.isModelSupportedByAccount(acc, requestedModel) {
continue continue
} }
// 窗口费用检查(非粘性会话路径)
if !s.isAccountSchedulableForWindowCost(ctx, acc, false) {
continue
}
candidates = append(candidates, acc) candidates = append(candidates, acc)
} }
@@ -765,7 +799,7 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
loadMap, err := s.concurrencyService.GetAccountsLoadBatch(ctx, accountLoads) loadMap, err := s.concurrencyService.GetAccountsLoadBatch(ctx, accountLoads)
if err != nil { if err != nil {
if result, ok := s.tryAcquireByLegacyOrder(ctx, candidates, groupID, sessionHash, preferOAuth); ok { if result, ok := s.tryAcquireByLegacyOrder(ctx, candidates, groupID, sessionHash, preferOAuth, sessionUUID); ok {
return result, nil return result, nil
} }
} else { } else {
@@ -814,6 +848,11 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
for _, item := range available { for _, item := range available {
result, err := s.tryAcquireAccountSlot(ctx, item.account.ID, item.account.Concurrency) result, err := s.tryAcquireAccountSlot(ctx, item.account.ID, item.account.Concurrency)
if err == nil && result.Acquired { if err == nil && result.Acquired {
// 会话数量限制检查
if !s.checkAndRegisterSession(ctx, item.account, sessionUUID) {
result.ReleaseFunc() // 释放槽位,继续尝试下一个账号
continue
}
if sessionHash != "" && s.cache != nil { if sessionHash != "" && s.cache != nil {
_ = s.cache.SetSessionAccountID(ctx, derefGroupID(groupID), sessionHash, item.account.ID, stickySessionTTL) _ = s.cache.SetSessionAccountID(ctx, derefGroupID(groupID), sessionHash, item.account.ID, stickySessionTTL)
} }
@@ -843,13 +882,18 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
return nil, errors.New("no available accounts") return nil, errors.New("no available accounts")
} }
func (s *GatewayService) tryAcquireByLegacyOrder(ctx context.Context, candidates []*Account, groupID *int64, sessionHash string, preferOAuth bool) (*AccountSelectionResult, bool) { func (s *GatewayService) tryAcquireByLegacyOrder(ctx context.Context, candidates []*Account, groupID *int64, sessionHash string, preferOAuth bool, sessionUUID string) (*AccountSelectionResult, bool) {
ordered := append([]*Account(nil), candidates...) ordered := append([]*Account(nil), candidates...)
sortAccountsByPriorityAndLastUsed(ordered, preferOAuth) sortAccountsByPriorityAndLastUsed(ordered, preferOAuth)
for _, acc := range ordered { for _, acc := range ordered {
result, err := s.tryAcquireAccountSlot(ctx, acc.ID, acc.Concurrency) result, err := s.tryAcquireAccountSlot(ctx, acc.ID, acc.Concurrency)
if err == nil && result.Acquired { if err == nil && result.Acquired {
// 会话数量限制检查
if !s.checkAndRegisterSession(ctx, acc, sessionUUID) {
result.ReleaseFunc() // 释放槽位,继续尝试下一个账号
continue
}
if sessionHash != "" && s.cache != nil { if sessionHash != "" && s.cache != nil {
_ = s.cache.SetSessionAccountID(ctx, derefGroupID(groupID), sessionHash, acc.ID, stickySessionTTL) _ = s.cache.SetSessionAccountID(ctx, derefGroupID(groupID), sessionHash, acc.ID, stickySessionTTL)
} }
@@ -1081,6 +1125,107 @@ func (s *GatewayService) tryAcquireAccountSlot(ctx context.Context, accountID in
return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency) return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency)
} }
// isAccountSchedulableForWindowCost 检查账号是否可根据窗口费用进行调度
// 仅适用于 Anthropic OAuth/SetupToken 账号
// 返回 true 表示可调度false 表示不可调度
func (s *GatewayService) isAccountSchedulableForWindowCost(ctx context.Context, account *Account, isSticky bool) bool {
// 只检查 Anthropic OAuth/SetupToken 账号
if !account.IsAnthropicOAuthOrSetupToken() {
return true
}
limit := account.GetWindowCostLimit()
if limit <= 0 {
return true // 未启用窗口费用限制
}
// 尝试从缓存获取窗口费用
var currentCost float64
if s.sessionLimitCache != nil {
if cost, hit, err := s.sessionLimitCache.GetWindowCost(ctx, account.ID); err == nil && hit {
currentCost = cost
goto checkSchedulability
}
}
// 缓存未命中,从数据库查询
{
var startTime time.Time
if account.SessionWindowStart != nil {
startTime = *account.SessionWindowStart
} else {
startTime = time.Now().Add(-5 * time.Hour)
}
stats, err := s.usageLogRepo.GetAccountWindowStats(ctx, account.ID, startTime)
if err != nil {
// 失败开放:查询失败时允许调度
return true
}
// 使用标准费用(不含账号倍率)
currentCost = stats.StandardCost
// 设置缓存(忽略错误)
if s.sessionLimitCache != nil {
_ = s.sessionLimitCache.SetWindowCost(ctx, account.ID, currentCost)
}
}
checkSchedulability:
schedulability := account.CheckWindowCostSchedulability(currentCost)
switch schedulability {
case WindowCostSchedulable:
return true
case WindowCostStickyOnly:
return isSticky
case WindowCostNotSchedulable:
return false
}
return true
}
// checkAndRegisterSession 检查并注册会话,用于会话数量限制
// 仅适用于 Anthropic OAuth/SetupToken 账号
// 返回 true 表示允许在限制内或会话已存在false 表示拒绝(超出限制且是新会话)
func (s *GatewayService) checkAndRegisterSession(ctx context.Context, account *Account, sessionUUID string) bool {
// 只检查 Anthropic OAuth/SetupToken 账号
if !account.IsAnthropicOAuthOrSetupToken() {
return true
}
maxSessions := account.GetMaxSessions()
if maxSessions <= 0 || sessionUUID == "" {
return true // 未启用会话限制或无会话ID
}
if s.sessionLimitCache == nil {
return true // 缓存不可用时允许通过
}
idleTimeout := time.Duration(account.GetSessionIdleTimeoutMinutes()) * time.Minute
allowed, err := s.sessionLimitCache.RegisterSession(ctx, account.ID, sessionUUID, maxSessions, idleTimeout)
if err != nil {
// 失败开放:缓存错误时允许通过
return true
}
return allowed
}
// extractSessionUUID 从 metadata.user_id 中提取会话 UUID
// 格式: user_{64位hex}_account__session_{uuid}
func extractSessionUUID(metadataUserID string) string {
if metadataUserID == "" {
return ""
}
if match := sessionIDRegex.FindStringSubmatch(metadataUserID); len(match) > 1 {
return match[1]
}
return ""
}
func (s *GatewayService) getSchedulableAccount(ctx context.Context, accountID int64) (*Account, error) { func (s *GatewayService) getSchedulableAccount(ctx context.Context, accountID int64) (*Account, error) {
if s.schedulerSnapshot != nil { if s.schedulerSnapshot != nil {
return s.schedulerSnapshot.GetAccount(ctx, accountID) return s.schedulerSnapshot.GetAccount(ctx, accountID)

View File

@@ -514,7 +514,7 @@ func (s *OpsService) selectAccountForRetry(ctx context.Context, reqType opsRetry
if s.gatewayService == nil { if s.gatewayService == nil {
return nil, fmt.Errorf("gateway service not available") return nil, fmt.Errorf("gateway service not available")
} }
return s.gatewayService.SelectAccountWithLoadAwareness(ctx, groupID, "", model, excludedIDs) return s.gatewayService.SelectAccountWithLoadAwareness(ctx, groupID, "", model, excludedIDs, "") // 重试不使用会话限制
default: default:
return nil, fmt.Errorf("unsupported retry type: %s", reqType) return nil, fmt.Errorf("unsupported retry type: %s", reqType)
} }

View File

@@ -0,0 +1,63 @@
package service
import (
"context"
"time"
)
// SessionLimitCache 管理账号级别的活跃会话跟踪
// 用于 Anthropic OAuth/SetupToken 账号的会话数量限制
//
// Key 格式: session_limit:account:{accountID}
// 数据结构: Sorted Set (member=sessionUUID, score=timestamp)
//
// 会话在空闲超时后自动过期,无需手动清理
type SessionLimitCache interface {
// RegisterSession 注册会话活动
// - 如果会话已存在,刷新其时间戳并返回 true
// - 如果会话不存在且活跃会话数 < maxSessions添加新会话并返回 true
// - 如果会话不存在且活跃会话数 >= maxSessions返回 false拒绝
//
// 参数:
// accountID: 账号 ID
// sessionUUID: 从 metadata.user_id 中提取的会话 UUID
// maxSessions: 最大并发会话数限制
// idleTimeout: 会话空闲超时时间
//
// 返回:
// allowed: true 表示允许在限制内或会话已存在false 表示拒绝(超出限制且是新会话)
// error: 操作错误
RegisterSession(ctx context.Context, accountID int64, sessionUUID string, maxSessions int, idleTimeout time.Duration) (allowed bool, err error)
// RefreshSession 刷新现有会话的时间戳
// 用于活跃会话保持活动状态
RefreshSession(ctx context.Context, accountID int64, sessionUUID string, idleTimeout time.Duration) error
// GetActiveSessionCount 获取当前活跃会话数
// 返回未过期的会话数量
GetActiveSessionCount(ctx context.Context, accountID int64) (int, error)
// GetActiveSessionCountBatch 批量获取多个账号的活跃会话数
// 返回 map[accountID]count查询失败的账号不在 map 中
GetActiveSessionCountBatch(ctx context.Context, accountIDs []int64) (map[int64]int, error)
// IsSessionActive 检查特定会话是否活跃(未过期)
IsSessionActive(ctx context.Context, accountID int64, sessionUUID string) (bool, error)
// ========== 5h窗口费用缓存 ==========
// Key 格式: window_cost:account:{accountID}
// 用于缓存账号在当前5h窗口内的标准费用减少数据库聚合查询压力
// GetWindowCost 获取缓存的窗口费用
// 返回 (cost, true, nil) 如果缓存命中
// 返回 (0, false, nil) 如果缓存未命中
// 返回 (0, false, err) 如果发生错误
GetWindowCost(ctx context.Context, accountID int64) (cost float64, hit bool, err error)
// SetWindowCost 设置窗口费用缓存
SetWindowCost(ctx context.Context, accountID int64, cost float64) error
// GetWindowCostBatch 批量获取窗口费用缓存
// 返回 map[accountID]cost缓存未命中的账号不在 map 中
GetWindowCostBatch(ctx context.Context, accountIDs []int64) (map[int64]float64, error)
}

View File

@@ -0,0 +1,199 @@
<template>
<div class="flex flex-col gap-1.5">
<!-- 并发槽位 -->
<div class="flex items-center gap-1.5">
<span
:class="[
'inline-flex items-center gap-1 rounded-md px-2 py-0.5 text-xs font-medium',
concurrencyClass
]"
>
<svg class="h-3 w-3" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M3.75 6A2.25 2.25 0 016 3.75h2.25A2.25 2.25 0 0110.5 6v2.25a2.25 2.25 0 01-2.25 2.25H6a2.25 2.25 0 01-2.25-2.25V6zM3.75 15.75A2.25 2.25 0 016 13.5h2.25a2.25 2.25 0 012.25 2.25V18a2.25 2.25 0 01-2.25 2.25H6A2.25 2.25 0 013.75 18v-2.25zM13.5 6a2.25 2.25 0 012.25-2.25H18A2.25 2.25 0 0120.25 6v2.25A2.25 2.25 0 0118 10.5h-2.25a2.25 2.25 0 01-2.25-2.25V6zM13.5 15.75a2.25 2.25 0 012.25-2.25H18a2.25 2.25 0 012.25 2.25V18A2.25 2.25 0 0118 20.25h-2.25A2.25 2.25 0 0113.5 18v-2.25z" />
</svg>
<span class="font-mono">{{ currentConcurrency }}</span>
<span class="text-gray-400 dark:text-gray-500">/</span>
<span class="font-mono">{{ account.concurrency }}</span>
</span>
</div>
<!-- 5h窗口费用限制 Anthropic OAuth/SetupToken 且启用时显示 -->
<div v-if="showWindowCost" class="flex items-center gap-1">
<span
:class="[
'inline-flex items-center gap-1 rounded-md px-1.5 py-0.5 text-[10px] font-medium',
windowCostClass
]"
:title="windowCostTooltip"
>
<svg class="h-2.5 w-2.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M12 6v12m-3-2.818l.879.659c1.171.879 3.07.879 4.242 0 1.172-.879 1.172-2.303 0-3.182C13.536 12.219 12.768 12 12 12c-.725 0-1.45-.22-2.003-.659-1.106-.879-1.106-2.303 0-3.182s2.9-.879 4.006 0l.415.33M21 12a9 9 0 11-18 0 9 9 0 0118 0z" />
</svg>
<span class="font-mono">${{ formatCost(currentWindowCost) }}</span>
<span class="text-gray-400 dark:text-gray-500">/</span>
<span class="font-mono">${{ formatCost(account.window_cost_limit) }}</span>
</span>
</div>
<!-- 会话数量限制 Anthropic OAuth/SetupToken 且启用时显示 -->
<div v-if="showSessionLimit" class="flex items-center gap-1">
<span
:class="[
'inline-flex items-center gap-1 rounded-md px-1.5 py-0.5 text-[10px] font-medium',
sessionLimitClass
]"
:title="sessionLimitTooltip"
>
<svg class="h-2.5 w-2.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M15 19.128a9.38 9.38 0 002.625.372 9.337 9.337 0 004.121-.952 4.125 4.125 0 00-7.533-2.493M15 19.128v-.003c0-1.113-.285-2.16-.786-3.07M15 19.128v.106A12.318 12.318 0 018.624 21c-2.331 0-4.512-.645-6.374-1.766l-.001-.109a6.375 6.375 0 0111.964-3.07M12 6.375a3.375 3.375 0 11-6.75 0 3.375 3.375 0 016.75 0zm8.25 2.25a2.625 2.625 0 11-5.25 0 2.625 2.625 0 015.25 0z" />
</svg>
<span class="font-mono">{{ activeSessions }}</span>
<span class="text-gray-400 dark:text-gray-500">/</span>
<span class="font-mono">{{ account.max_sessions }}</span>
</span>
</div>
</div>
</template>
<script setup lang="ts">
import { computed } from 'vue'
import { useI18n } from 'vue-i18n'
import type { Account } from '@/types'
const props = defineProps<{
account: Account
}>()
const { t } = useI18n()
// 当前并发数
const currentConcurrency = computed(() => props.account.current_concurrency || 0)
// 是否为 Anthropic OAuth/SetupToken 账号
const isAnthropicOAuthOrSetupToken = computed(() => {
return (
props.account.platform === 'anthropic' &&
(props.account.type === 'oauth' || props.account.type === 'setup-token')
)
})
// 是否显示窗口费用限制
const showWindowCost = computed(() => {
return (
isAnthropicOAuthOrSetupToken.value &&
props.account.window_cost_limit !== undefined &&
props.account.window_cost_limit !== null &&
props.account.window_cost_limit > 0
)
})
// 当前窗口费用
const currentWindowCost = computed(() => props.account.current_window_cost ?? 0)
// 是否显示会话限制
const showSessionLimit = computed(() => {
return (
isAnthropicOAuthOrSetupToken.value &&
props.account.max_sessions !== undefined &&
props.account.max_sessions !== null &&
props.account.max_sessions > 0
)
})
// 当前活跃会话数
const activeSessions = computed(() => props.account.active_sessions ?? 0)
// 并发状态样式
const concurrencyClass = computed(() => {
const current = currentConcurrency.value
const max = props.account.concurrency
if (current >= max) {
return 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400'
}
if (current > 0) {
return 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400'
}
return 'bg-gray-100 text-gray-600 dark:bg-gray-800 dark:text-gray-400'
})
// 窗口费用状态样式
const windowCostClass = computed(() => {
if (!showWindowCost.value) return ''
const current = currentWindowCost.value
const limit = props.account.window_cost_limit || 0
const reserve = props.account.window_cost_sticky_reserve || 10
// >= 阈值+预留: 完全不可调度 (红色)
if (current >= limit + reserve) {
return 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400'
}
// >= 阈值: 仅粘性会话 (橙色)
if (current >= limit) {
return 'bg-orange-100 text-orange-700 dark:bg-orange-900/30 dark:text-orange-400'
}
// >= 80% 阈值: 警告 (黄色)
if (current >= limit * 0.8) {
return 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400'
}
// 正常 (绿色)
return 'bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-400'
})
// 窗口费用提示文字
const windowCostTooltip = computed(() => {
if (!showWindowCost.value) return ''
const current = currentWindowCost.value
const limit = props.account.window_cost_limit || 0
const reserve = props.account.window_cost_sticky_reserve || 10
if (current >= limit + reserve) {
return t('admin.accounts.capacity.windowCost.blocked')
}
if (current >= limit) {
return t('admin.accounts.capacity.windowCost.stickyOnly')
}
return t('admin.accounts.capacity.windowCost.normal')
})
// 会话限制状态样式
const sessionLimitClass = computed(() => {
if (!showSessionLimit.value) return ''
const current = activeSessions.value
const max = props.account.max_sessions || 0
// >= 最大: 完全占满 (红色)
if (current >= max) {
return 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400'
}
// >= 80%: 警告 (黄色)
if (current >= max * 0.8) {
return 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400'
}
// 正常 (绿色)
return 'bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-400'
})
// 会话限制提示文字
const sessionLimitTooltip = computed(() => {
if (!showSessionLimit.value) return ''
const current = activeSessions.value
const max = props.account.max_sessions || 0
const idle = props.account.session_idle_timeout_minutes || 5
if (current >= max) {
return t('admin.accounts.capacity.sessions.full', { idle })
}
return t('admin.accounts.capacity.sessions.normal', { idle })
})
// 格式化费用显示
const formatCost = (value: number | null | undefined) => {
if (value === null || value === undefined) return '0'
return value.toFixed(2)
}
</script>

View File

@@ -604,6 +604,136 @@
</div> </div>
</div> </div>
<!-- Quota Control Section (Anthropic OAuth/SetupToken only) -->
<div
v-if="account?.platform === 'anthropic' && (account?.type === 'oauth' || account?.type === 'setup-token')"
class="border-t border-gray-200 pt-4 dark:border-dark-600 space-y-4"
>
<div class="mb-3">
<h3 class="input-label mb-0 text-base font-semibold">{{ t('admin.accounts.quotaControl.title') }}</h3>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.accounts.quotaControl.hint') }}
</p>
</div>
<!-- Window Cost Limit -->
<div class="rounded-lg border border-gray-200 p-4 dark:border-dark-600">
<div class="mb-3 flex items-center justify-between">
<div>
<label class="input-label mb-0">{{ t('admin.accounts.quotaControl.windowCost.label') }}</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.accounts.quotaControl.windowCost.hint') }}
</p>
</div>
<button
type="button"
@click="windowCostEnabled = !windowCostEnabled"
:class="[
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
windowCostEnabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
]"
>
<span
:class="[
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
windowCostEnabled ? 'translate-x-5' : 'translate-x-0'
]"
/>
</button>
</div>
<div v-if="windowCostEnabled" class="grid grid-cols-2 gap-4">
<div>
<label class="input-label">{{ t('admin.accounts.quotaControl.windowCost.limit') }}</label>
<div class="relative">
<span class="absolute left-3 top-1/2 -translate-y-1/2 text-gray-500 dark:text-gray-400">$</span>
<input
v-model.number="windowCostLimit"
type="number"
min="0"
step="1"
class="input pl-7"
:placeholder="t('admin.accounts.quotaControl.windowCost.limitPlaceholder')"
/>
</div>
<p class="input-hint">{{ t('admin.accounts.quotaControl.windowCost.limitHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.accounts.quotaControl.windowCost.stickyReserve') }}</label>
<div class="relative">
<span class="absolute left-3 top-1/2 -translate-y-1/2 text-gray-500 dark:text-gray-400">$</span>
<input
v-model.number="windowCostStickyReserve"
type="number"
min="0"
step="1"
class="input pl-7"
:placeholder="t('admin.accounts.quotaControl.windowCost.stickyReservePlaceholder')"
/>
</div>
<p class="input-hint">{{ t('admin.accounts.quotaControl.windowCost.stickyReserveHint') }}</p>
</div>
</div>
</div>
<!-- Session Limit -->
<div class="rounded-lg border border-gray-200 p-4 dark:border-dark-600">
<div class="mb-3 flex items-center justify-between">
<div>
<label class="input-label mb-0">{{ t('admin.accounts.quotaControl.sessionLimit.label') }}</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.accounts.quotaControl.sessionLimit.hint') }}
</p>
</div>
<button
type="button"
@click="sessionLimitEnabled = !sessionLimitEnabled"
:class="[
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
sessionLimitEnabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
]"
>
<span
:class="[
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
sessionLimitEnabled ? 'translate-x-5' : 'translate-x-0'
]"
/>
</button>
</div>
<div v-if="sessionLimitEnabled" class="grid grid-cols-2 gap-4">
<div>
<label class="input-label">{{ t('admin.accounts.quotaControl.sessionLimit.maxSessions') }}</label>
<input
v-model.number="maxSessions"
type="number"
min="1"
step="1"
class="input"
:placeholder="t('admin.accounts.quotaControl.sessionLimit.maxSessionsPlaceholder')"
/>
<p class="input-hint">{{ t('admin.accounts.quotaControl.sessionLimit.maxSessionsHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.accounts.quotaControl.sessionLimit.idleTimeout') }}</label>
<div class="relative">
<input
v-model.number="sessionIdleTimeout"
type="number"
min="1"
step="1"
class="input pr-12"
:placeholder="t('admin.accounts.quotaControl.sessionLimit.idleTimeoutPlaceholder')"
/>
<span class="absolute right-3 top-1/2 -translate-y-1/2 text-gray-500 dark:text-gray-400">{{ t('common.minutes') }}</span>
</div>
<p class="input-hint">{{ t('admin.accounts.quotaControl.sessionLimit.idleTimeoutHint') }}</p>
</div>
</div>
</div>
</div>
<div class="border-t border-gray-200 pt-4 dark:border-dark-600"> <div class="border-t border-gray-200 pt-4 dark:border-dark-600">
<div> <div>
<label class="input-label">{{ t('common.status') }}</label> <label class="input-label">{{ t('common.status') }}</label>
@@ -767,6 +897,14 @@ const mixedScheduling = ref(false) // For antigravity accounts: enable mixed sch
const tempUnschedEnabled = ref(false) const tempUnschedEnabled = ref(false)
const tempUnschedRules = ref<TempUnschedRuleForm[]>([]) const tempUnschedRules = ref<TempUnschedRuleForm[]>([])
// Quota control state (Anthropic OAuth/SetupToken only)
const windowCostEnabled = ref(false)
const windowCostLimit = ref<number | null>(null)
const windowCostStickyReserve = ref<number | null>(null)
const sessionLimitEnabled = ref(false)
const maxSessions = ref<number | null>(null)
const sessionIdleTimeout = ref<number | null>(null)
// Computed: current preset mappings based on platform // Computed: current preset mappings based on platform
const presetMappings = computed(() => getPresetMappingsByPlatform(props.account?.platform || 'anthropic')) const presetMappings = computed(() => getPresetMappingsByPlatform(props.account?.platform || 'anthropic'))
const tempUnschedPresets = computed(() => [ const tempUnschedPresets = computed(() => [
@@ -854,6 +992,9 @@ watch(
const extra = newAccount.extra as Record<string, unknown> | undefined const extra = newAccount.extra as Record<string, unknown> | undefined
mixedScheduling.value = extra?.mixed_scheduling === true mixedScheduling.value = extra?.mixed_scheduling === true
// Load quota control settings (Anthropic OAuth/SetupToken only)
loadQuotaControlSettings(newAccount)
loadTempUnschedRules(credentials) loadTempUnschedRules(credentials)
// Initialize API Key fields for apikey type // Initialize API Key fields for apikey type
@@ -1087,6 +1228,35 @@ function loadTempUnschedRules(credentials?: Record<string, unknown>) {
}) })
} }
// Load quota control settings from account (Anthropic OAuth/SetupToken only)
function loadQuotaControlSettings(account: Account) {
// Reset all quota control state first
windowCostEnabled.value = false
windowCostLimit.value = null
windowCostStickyReserve.value = null
sessionLimitEnabled.value = false
maxSessions.value = null
sessionIdleTimeout.value = null
// Only applies to Anthropic OAuth/SetupToken accounts
if (account.platform !== 'anthropic' || (account.type !== 'oauth' && account.type !== 'setup-token')) {
return
}
// Load from extra field (via backend DTO fields)
if (account.window_cost_limit != null && account.window_cost_limit > 0) {
windowCostEnabled.value = true
windowCostLimit.value = account.window_cost_limit
windowCostStickyReserve.value = account.window_cost_sticky_reserve ?? 10
}
if (account.max_sessions != null && account.max_sessions > 0) {
sessionLimitEnabled.value = true
maxSessions.value = account.max_sessions
sessionIdleTimeout.value = account.session_idle_timeout_minutes ?? 5
}
}
function formatTempUnschedKeywords(value: unknown) { function formatTempUnschedKeywords(value: unknown) {
if (Array.isArray(value)) { if (Array.isArray(value)) {
return value return value
@@ -1214,6 +1384,32 @@ const handleSubmit = async () => {
updatePayload.extra = newExtra updatePayload.extra = newExtra
} }
// For Anthropic OAuth/SetupToken accounts, handle quota control settings in extra
if (props.account.platform === 'anthropic' && (props.account.type === 'oauth' || props.account.type === 'setup-token')) {
const currentExtra = (props.account.extra as Record<string, unknown>) || {}
const newExtra: Record<string, unknown> = { ...currentExtra }
// Window cost limit settings
if (windowCostEnabled.value && windowCostLimit.value != null && windowCostLimit.value > 0) {
newExtra.window_cost_limit = windowCostLimit.value
newExtra.window_cost_sticky_reserve = windowCostStickyReserve.value ?? 10
} else {
delete newExtra.window_cost_limit
delete newExtra.window_cost_sticky_reserve
}
// Session limit settings
if (sessionLimitEnabled.value && maxSessions.value != null && maxSessions.value > 0) {
newExtra.max_sessions = maxSessions.value
newExtra.session_idle_timeout_minutes = sessionIdleTimeout.value ?? 5
} else {
delete newExtra.max_sessions
delete newExtra.session_idle_timeout_minutes
}
updatePayload.extra = newExtra
}
await adminAPI.accounts.update(props.account.id, updatePayload) await adminAPI.accounts.update(props.account.id, updatePayload)
appStore.showSuccess(t('admin.accounts.accountUpdated')) appStore.showSuccess(t('admin.accounts.accountUpdated'))
emit('updated') emit('updated')

View File

@@ -163,6 +163,7 @@ export default {
notAvailable: 'N/A', notAvailable: 'N/A',
now: 'Now', now: 'Now',
unknown: 'Unknown', unknown: 'Unknown',
minutes: 'min',
time: { time: {
never: 'Never', never: 'Never',
justNow: 'Just now', justNow: 'Just now',
@@ -1082,7 +1083,7 @@ export default {
platformType: 'Platform/Type', platformType: 'Platform/Type',
platform: 'Platform', platform: 'Platform',
type: 'Type', type: 'Type',
concurrencyStatus: 'Concurrency', capacity: 'Capacity',
notes: 'Notes', notes: 'Notes',
priority: 'Priority', priority: 'Priority',
billingRateMultiplier: 'Billing Rate', billingRateMultiplier: 'Billing Rate',
@@ -1096,6 +1097,18 @@ export default {
expiresAt: 'Expires At', expiresAt: 'Expires At',
actions: 'Actions' actions: 'Actions'
}, },
// Capacity status tooltips
capacity: {
windowCost: {
blocked: '5h window cost exceeded, account scheduling paused',
stickyOnly: '5h window cost at threshold, only sticky sessions allowed',
normal: '5h window cost normal'
},
sessions: {
full: 'Active sessions full, new sessions must wait (idle timeout: {idle} min)',
normal: 'Active sessions normal (idle timeout: {idle} min)'
}
},
tempUnschedulable: { tempUnschedulable: {
title: 'Temp Unschedulable', title: 'Temp Unschedulable',
statusTitle: 'Temp Unschedulable Status', statusTitle: 'Temp Unschedulable Status',
@@ -1247,6 +1260,31 @@ export default {
'When enabled, warmup requests like title generation will return mock responses without consuming upstream tokens', 'When enabled, warmup requests like title generation will return mock responses without consuming upstream tokens',
autoPauseOnExpired: 'Auto Pause On Expired', autoPauseOnExpired: 'Auto Pause On Expired',
autoPauseOnExpiredDesc: 'When enabled, the account will auto pause scheduling after it expires', autoPauseOnExpiredDesc: 'When enabled, the account will auto pause scheduling after it expires',
// Quota control (Anthropic OAuth/SetupToken only)
quotaControl: {
title: 'Quota Control',
hint: 'Only applies to Anthropic OAuth/Setup Token accounts',
windowCost: {
label: '5h Window Cost Limit',
hint: 'Limit account cost usage within the 5-hour window',
limit: 'Cost Threshold',
limitPlaceholder: '50',
limitHint: 'Account will not participate in new scheduling after reaching threshold',
stickyReserve: 'Sticky Reserve',
stickyReservePlaceholder: '10',
stickyReserveHint: 'Additional reserve for sticky sessions'
},
sessionLimit: {
label: 'Session Count Limit',
hint: 'Limit the number of active concurrent sessions',
maxSessions: 'Max Sessions',
maxSessionsPlaceholder: '3',
maxSessionsHint: 'Maximum number of active concurrent sessions',
idleTimeout: 'Idle Timeout',
idleTimeoutPlaceholder: '5',
idleTimeoutHint: 'Sessions will be released after idle timeout'
}
},
expired: 'Expired', expired: 'Expired',
proxy: 'Proxy', proxy: 'Proxy',
noProxy: 'No Proxy', noProxy: 'No Proxy',

View File

@@ -160,6 +160,7 @@ export default {
notAvailable: '不可用', notAvailable: '不可用',
now: '现在', now: '现在',
unknown: '未知', unknown: '未知',
minutes: '分钟',
time: { time: {
never: '从未', never: '从未',
justNow: '刚刚', justNow: '刚刚',
@@ -1131,7 +1132,7 @@ export default {
platformType: '平台/类型', platformType: '平台/类型',
platform: '平台', platform: '平台',
type: '类型', type: '类型',
concurrencyStatus: '并发', capacity: '容量',
notes: '备注', notes: '备注',
priority: '优先级', priority: '优先级',
billingRateMultiplier: '账号倍率', billingRateMultiplier: '账号倍率',
@@ -1145,6 +1146,18 @@ export default {
expiresAt: '过期时间', expiresAt: '过期时间',
actions: '操作' actions: '操作'
}, },
// 容量状态提示
capacity: {
windowCost: {
blocked: '5h窗口费用超限账号暂停调度',
stickyOnly: '5h窗口费用达阈值仅允许粘性会话',
normal: '5h窗口费用正常'
},
sessions: {
full: '活跃会话已满,新会话需等待(空闲超时:{idle}分钟)',
normal: '活跃会话正常(空闲超时:{idle}分钟)'
}
},
clearRateLimit: '清除速率限制', clearRateLimit: '清除速率限制',
testConnection: '测试连接', testConnection: '测试连接',
reAuthorize: '重新授权', reAuthorize: '重新授权',
@@ -1380,6 +1393,31 @@ export default {
interceptWarmupRequestsDesc: '启用后,标题生成等预热请求将返回 mock 响应,不消耗上游 token', interceptWarmupRequestsDesc: '启用后,标题生成等预热请求将返回 mock 响应,不消耗上游 token',
autoPauseOnExpired: '过期自动暂停调度', autoPauseOnExpired: '过期自动暂停调度',
autoPauseOnExpiredDesc: '启用后,账号过期将自动暂停调度', autoPauseOnExpiredDesc: '启用后,账号过期将自动暂停调度',
// Quota control (Anthropic OAuth/SetupToken only)
quotaControl: {
title: '配额控制',
hint: '仅适用于 Anthropic OAuth/Setup Token 账号',
windowCost: {
label: '5h窗口费用控制',
hint: '限制账号在5小时窗口内的费用使用',
limit: '费用阈值',
limitPlaceholder: '50',
limitHint: '达到阈值后不参与新请求调度',
stickyReserve: '粘性预留额度',
stickyReservePlaceholder: '10',
stickyReserveHint: '为粘性会话预留的额外额度'
},
sessionLimit: {
label: '会话数量控制',
hint: '限制同时活跃的会话数量',
maxSessions: '最大会话数',
maxSessionsPlaceholder: '3',
maxSessionsHint: '同时活跃的最大会话数量',
idleTimeout: '空闲超时',
idleTimeoutPlaceholder: '5',
idleTimeoutHint: '会话空闲超时后自动释放'
}
},
expired: '已过期', expired: '已过期',
proxy: '代理', proxy: '代理',
noProxy: '无代理', noProxy: '无代理',

View File

@@ -471,6 +471,18 @@ export interface Account {
session_window_start: string | null session_window_start: string | null
session_window_end: string | null session_window_end: string | null
session_window_status: 'allowed' | 'allowed_warning' | 'rejected' | null session_window_status: 'allowed' | 'allowed_warning' | 'rejected' | null
// 5h窗口费用控制仅 Anthropic OAuth/SetupToken 账号有效)
window_cost_limit?: number | null
window_cost_sticky_reserve?: number | null
// 会话数量控制(仅 Anthropic OAuth/SetupToken 账号有效)
max_sessions?: number | null
session_idle_timeout_minutes?: number | null
// 运行时状态(仅当启用对应限制时返回)
current_window_cost?: number | null // 当前窗口费用
active_sessions?: number | null // 当前活跃会话数
} }
// Account Usage types // Account Usage types

View File

@@ -34,15 +34,8 @@
<template #cell-platform_type="{ row }"> <template #cell-platform_type="{ row }">
<PlatformTypeBadge :platform="row.platform" :type="row.type" /> <PlatformTypeBadge :platform="row.platform" :type="row.type" />
</template> </template>
<template #cell-concurrency="{ row }"> <template #cell-capacity="{ row }">
<div class="flex items-center gap-1.5"> <AccountCapacityCell :account="row" />
<span :class="['inline-flex items-center gap-1 rounded-md px-2 py-0.5 text-xs font-medium', (row.current_concurrency || 0) >= row.concurrency ? 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400' : (row.current_concurrency || 0) > 0 ? 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400' : 'bg-gray-100 text-gray-600 dark:bg-gray-800 dark:text-gray-400']">
<svg class="h-3 w-3" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2"><path stroke-linecap="round" stroke-linejoin="round" d="M3.75 6A2.25 2.25 0 016 3.75h2.25A2.25 2.25 0 0110.5 6v2.25a2.25 2.25 0 01-2.25 2.25H6a2.25 2.25 0 01-2.25-2.25V6zM3.75 15.75A2.25 2.25 0 016 13.5h2.25a2.25 2.25 0 012.25 2.25V18a2.25 2.25 0 01-2.25 2.25H6A2.25 2.25 0 013.75 18v-2.25zM13.5 6a2.25 2.25 0 012.25-2.25H18A2.25 2.25 0 0120.25 6v2.25A2.25 2.25 0 0118 10.5h-2.25a2.25 2.25 0 01-2.25-2.25V6zM13.5 15.75a2.25 2.25 0 012.25-2.25H18a2.25 2.25 0 012.25 2.25V18A2.25 2.25 0 0118 20.25h-2.25A2.25 2.25 0 0113.5 18v-2.25z" /></svg>
<span class="font-mono">{{ row.current_concurrency || 0 }}</span>
<span class="text-gray-400 dark:text-gray-500">/</span>
<span class="font-mono">{{ row.concurrency }}</span>
</span>
</div>
</template> </template>
<template #cell-status="{ row }"> <template #cell-status="{ row }">
<AccountStatusIndicator :account="row" @show-temp-unsched="handleShowTempUnsched" /> <AccountStatusIndicator :account="row" @show-temp-unsched="handleShowTempUnsched" />
@@ -148,6 +141,7 @@ import AccountStatusIndicator from '@/components/account/AccountStatusIndicator.
import AccountUsageCell from '@/components/account/AccountUsageCell.vue' import AccountUsageCell from '@/components/account/AccountUsageCell.vue'
import AccountTodayStatsCell from '@/components/account/AccountTodayStatsCell.vue' import AccountTodayStatsCell from '@/components/account/AccountTodayStatsCell.vue'
import AccountGroupsCell from '@/components/account/AccountGroupsCell.vue' import AccountGroupsCell from '@/components/account/AccountGroupsCell.vue'
import AccountCapacityCell from '@/components/account/AccountCapacityCell.vue'
import PlatformTypeBadge from '@/components/common/PlatformTypeBadge.vue' import PlatformTypeBadge from '@/components/common/PlatformTypeBadge.vue'
import { formatDateTime, formatRelativeTime } from '@/utils/format' import { formatDateTime, formatRelativeTime } from '@/utils/format'
import type { Account, Proxy, Group } from '@/types' import type { Account, Proxy, Group } from '@/types'
@@ -187,7 +181,7 @@ const cols = computed(() => {
{ key: 'select', label: '', sortable: false }, { key: 'select', label: '', sortable: false },
{ key: 'name', label: t('admin.accounts.columns.name'), sortable: true }, { key: 'name', label: t('admin.accounts.columns.name'), sortable: true },
{ key: 'platform_type', label: t('admin.accounts.columns.platformType'), sortable: false }, { key: 'platform_type', label: t('admin.accounts.columns.platformType'), sortable: false },
{ key: 'concurrency', label: t('admin.accounts.columns.concurrencyStatus'), sortable: false }, { key: 'capacity', label: t('admin.accounts.columns.capacity'), sortable: false },
{ key: 'status', label: t('admin.accounts.columns.status'), sortable: true }, { key: 'status', label: t('admin.accounts.columns.status'), sortable: true },
{ key: 'schedulable', label: t('admin.accounts.columns.schedulable'), sortable: true }, { key: 'schedulable', label: t('admin.accounts.columns.schedulable'), sortable: true },
{ key: 'today_stats', label: t('admin.accounts.columns.todayStats'), sortable: false } { key: 'today_stats', label: t('admin.accounts.columns.todayStats'), sortable: false }