feat: integrate RPM scheduling checks into account selection flow
This commit is contained in:
@@ -1157,6 +1157,7 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
|
|||||||
return nil, errors.New("no available accounts")
|
return nil, errors.New("no available accounts")
|
||||||
}
|
}
|
||||||
ctx = s.withWindowCostPrefetch(ctx, accounts)
|
ctx = s.withWindowCostPrefetch(ctx, accounts)
|
||||||
|
ctx = s.withRPMPrefetch(ctx, accounts)
|
||||||
|
|
||||||
isExcluded := func(accountID int64) bool {
|
isExcluded := func(accountID int64) bool {
|
||||||
if excludedIDs == nil {
|
if excludedIDs == nil {
|
||||||
@@ -1232,6 +1233,10 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
|
|||||||
filteredWindowCost++
|
filteredWindowCost++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// RPM 检查(非粘性会话路径)
|
||||||
|
if !s.isAccountSchedulableForRPM(ctx, account, false) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
routingCandidates = append(routingCandidates, account)
|
routingCandidates = append(routingCandidates, account)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1255,7 +1260,9 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
|
|||||||
s.isAccountAllowedForPlatform(stickyAccount, platform, useMixed) &&
|
s.isAccountAllowedForPlatform(stickyAccount, platform, useMixed) &&
|
||||||
(requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, stickyAccount, requestedModel)) &&
|
(requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, stickyAccount, requestedModel)) &&
|
||||||
s.isAccountSchedulableForModelSelection(ctx, stickyAccount, requestedModel) &&
|
s.isAccountSchedulableForModelSelection(ctx, stickyAccount, requestedModel) &&
|
||||||
s.isAccountSchedulableForWindowCost(ctx, stickyAccount, true) { // 粘性会话窗口费用检查
|
s.isAccountSchedulableForWindowCost(ctx, stickyAccount, true) &&
|
||||||
|
|
||||||
|
s.isAccountSchedulableForRPM(ctx, stickyAccount, true) { // 粘性会话窗口费用+RPM 检查
|
||||||
result, err := s.tryAcquireAccountSlot(ctx, stickyAccountID, stickyAccount.Concurrency)
|
result, err := s.tryAcquireAccountSlot(ctx, stickyAccountID, stickyAccount.Concurrency)
|
||||||
if err == nil && result.Acquired {
|
if err == nil && result.Acquired {
|
||||||
// 会话数量限制检查
|
// 会话数量限制检查
|
||||||
@@ -1409,7 +1416,9 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
|
|||||||
s.isAccountAllowedForPlatform(account, platform, useMixed) &&
|
s.isAccountAllowedForPlatform(account, platform, useMixed) &&
|
||||||
(requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) &&
|
(requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) &&
|
||||||
s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) &&
|
s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) &&
|
||||||
s.isAccountSchedulableForWindowCost(ctx, account, true) { // 粘性会话窗口费用检查
|
s.isAccountSchedulableForWindowCost(ctx, account, true) &&
|
||||||
|
|
||||||
|
s.isAccountSchedulableForRPM(ctx, account, true) { // 粘性会话窗口费用+RPM 检查
|
||||||
result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
|
result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
|
||||||
if err == nil && result.Acquired {
|
if err == nil && result.Acquired {
|
||||||
// 会话数量限制检查
|
// 会话数量限制检查
|
||||||
@@ -1475,6 +1484,10 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
|
|||||||
if !s.isAccountSchedulableForWindowCost(ctx, acc, false) {
|
if !s.isAccountSchedulableForWindowCost(ctx, acc, false) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
// RPM 检查(非粘性会话路径)
|
||||||
|
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
candidates = append(candidates, acc)
|
candidates = append(candidates, acc)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2158,6 +2171,76 @@ checkSchedulability:
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// rpmPrefetchContextKey is the context key for prefetched RPM counts.
|
||||||
|
type rpmPrefetchContextKeyType struct{}
|
||||||
|
|
||||||
|
var rpmPrefetchContextKey = rpmPrefetchContextKeyType{}
|
||||||
|
|
||||||
|
func rpmFromPrefetchContext(ctx context.Context, accountID int64) (int, bool) {
|
||||||
|
if v, ok := ctx.Value(rpmPrefetchContextKey).(map[int64]int); ok {
|
||||||
|
count, found := v[accountID]
|
||||||
|
return count, found
|
||||||
|
}
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
|
||||||
|
// withRPMPrefetch 批量预取所有候选账号的 RPM 计数
|
||||||
|
func (s *GatewayService) withRPMPrefetch(ctx context.Context, accounts []Account) context.Context {
|
||||||
|
if s.rpmCache == nil {
|
||||||
|
return ctx
|
||||||
|
}
|
||||||
|
|
||||||
|
var ids []int64
|
||||||
|
for i := range accounts {
|
||||||
|
if accounts[i].IsAnthropicOAuthOrSetupToken() && accounts[i].GetBaseRPM() > 0 {
|
||||||
|
ids = append(ids, accounts[i].ID)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(ids) == 0 {
|
||||||
|
return ctx
|
||||||
|
}
|
||||||
|
|
||||||
|
counts, err := s.rpmCache.GetRPMBatch(ctx, ids)
|
||||||
|
if err != nil {
|
||||||
|
return ctx // 失败开放
|
||||||
|
}
|
||||||
|
return context.WithValue(ctx, rpmPrefetchContextKey, counts)
|
||||||
|
}
|
||||||
|
|
||||||
|
// isAccountSchedulableForRPM 检查账号是否可根据 RPM 进行调度
|
||||||
|
// 仅适用于 Anthropic OAuth/SetupToken 账号
|
||||||
|
func (s *GatewayService) isAccountSchedulableForRPM(ctx context.Context, account *Account, isSticky bool) bool {
|
||||||
|
if !account.IsAnthropicOAuthOrSetupToken() {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
baseRPM := account.GetBaseRPM()
|
||||||
|
if baseRPM <= 0 {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// 尝试从预取缓存获取
|
||||||
|
var currentRPM int
|
||||||
|
if count, ok := rpmFromPrefetchContext(ctx, account.ID); ok {
|
||||||
|
currentRPM = count
|
||||||
|
} else if s.rpmCache != nil {
|
||||||
|
if count, err := s.rpmCache.GetRPM(ctx, account.ID); err == nil {
|
||||||
|
currentRPM = count
|
||||||
|
}
|
||||||
|
// 失败开放:GetRPM 错误时允许调度
|
||||||
|
}
|
||||||
|
|
||||||
|
schedulability := account.CheckRPMSchedulability(currentRPM)
|
||||||
|
switch schedulability {
|
||||||
|
case WindowCostSchedulable:
|
||||||
|
return true
|
||||||
|
case WindowCostStickyOnly:
|
||||||
|
return isSticky
|
||||||
|
case WindowCostNotSchedulable:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// checkAndRegisterSession 检查并注册会话,用于会话数量限制
|
// checkAndRegisterSession 检查并注册会话,用于会话数量限制
|
||||||
// 仅适用于 Anthropic OAuth/SetupToken 账号
|
// 仅适用于 Anthropic OAuth/SetupToken 账号
|
||||||
// sessionID: 会话标识符(使用粘性会话的 hash)
|
// sessionID: 会话标识符(使用粘性会话的 hash)
|
||||||
@@ -2492,7 +2575,7 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
|
|||||||
if clearSticky {
|
if clearSticky {
|
||||||
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
|
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
|
||||||
}
|
}
|
||||||
if !clearSticky && s.isAccountInGroup(account, groupID) && account.Platform == platform && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) {
|
if !clearSticky && s.isAccountInGroup(account, groupID) && account.Platform == platform && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) && s.isAccountSchedulableForRPM(ctx, account, true) {
|
||||||
if s.debugModelRoutingEnabled() {
|
if s.debugModelRoutingEnabled() {
|
||||||
logger.LegacyPrintf("service.gateway", "[ModelRoutingDebug] legacy routed sticky hit: group_id=%v model=%s session=%s account=%d", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), accountID)
|
logger.LegacyPrintf("service.gateway", "[ModelRoutingDebug] legacy routed sticky hit: group_id=%v model=%s session=%s account=%d", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), accountID)
|
||||||
}
|
}
|
||||||
@@ -2542,6 +2625,9 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
|
|||||||
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
|
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if selected == nil {
|
if selected == nil {
|
||||||
selected = acc
|
selected = acc
|
||||||
continue
|
continue
|
||||||
@@ -2592,7 +2678,7 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
|
|||||||
if clearSticky {
|
if clearSticky {
|
||||||
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
|
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
|
||||||
}
|
}
|
||||||
if !clearSticky && s.isAccountInGroup(account, groupID) && account.Platform == platform && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) {
|
if !clearSticky && s.isAccountInGroup(account, groupID) && account.Platform == platform && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) && s.isAccountSchedulableForRPM(ctx, account, true) {
|
||||||
return account, nil
|
return account, nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2631,6 +2717,9 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
|
|||||||
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
|
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if selected == nil {
|
if selected == nil {
|
||||||
selected = acc
|
selected = acc
|
||||||
continue
|
continue
|
||||||
@@ -2700,7 +2789,7 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
|
|||||||
if clearSticky {
|
if clearSticky {
|
||||||
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
|
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
|
||||||
}
|
}
|
||||||
if !clearSticky && s.isAccountInGroup(account, groupID) && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) {
|
if !clearSticky && s.isAccountInGroup(account, groupID) && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) && s.isAccountSchedulableForRPM(ctx, account, true) {
|
||||||
if account.Platform == nativePlatform || (account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled()) {
|
if account.Platform == nativePlatform || (account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled()) {
|
||||||
if s.debugModelRoutingEnabled() {
|
if s.debugModelRoutingEnabled() {
|
||||||
logger.LegacyPrintf("service.gateway", "[ModelRoutingDebug] legacy mixed routed sticky hit: group_id=%v model=%s session=%s account=%d", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), accountID)
|
logger.LegacyPrintf("service.gateway", "[ModelRoutingDebug] legacy mixed routed sticky hit: group_id=%v model=%s session=%s account=%d", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), accountID)
|
||||||
@@ -2752,6 +2841,9 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
|
|||||||
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
|
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if selected == nil {
|
if selected == nil {
|
||||||
selected = acc
|
selected = acc
|
||||||
continue
|
continue
|
||||||
@@ -2802,7 +2894,7 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
|
|||||||
if clearSticky {
|
if clearSticky {
|
||||||
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
|
_ = s.cache.DeleteSessionAccountID(ctx, derefGroupID(groupID), sessionHash)
|
||||||
}
|
}
|
||||||
if !clearSticky && s.isAccountInGroup(account, groupID) && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) {
|
if !clearSticky && s.isAccountInGroup(account, groupID) && (requestedModel == "" || s.isModelSupportedByAccountWithContext(ctx, account, requestedModel)) && s.isAccountSchedulableForModelSelection(ctx, account, requestedModel) && s.isAccountSchedulableForRPM(ctx, account, true) {
|
||||||
if account.Platform == nativePlatform || (account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled()) {
|
if account.Platform == nativePlatform || (account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled()) {
|
||||||
return account, nil
|
return account, nil
|
||||||
}
|
}
|
||||||
@@ -2843,6 +2935,9 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
|
|||||||
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
|
if !s.isAccountSchedulableForModelSelection(ctx, acc, requestedModel) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if !s.isAccountSchedulableForRPM(ctx, acc, false) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
if selected == nil {
|
if selected == nil {
|
||||||
selected = acc
|
selected = acc
|
||||||
continue
|
continue
|
||||||
|
|||||||
Reference in New Issue
Block a user