fix: address code review issues for RPM limiting feature

- Use TxPipeline (MULTI/EXEC) instead of Pipeline for atomic INCR+EXPIRE
- Filter negative values in GetBaseRPM(), update test expectation
- Add RPM batch query (GetRPMBatch) to account List API
- Add warn logs for RPM increment failures in gateway handler
- Reset enableRpmLimit on BulkEditAccountModal close
- Use union type 'tiered' | 'sticky_exempt' for rpmStrategy refs
- Add design decision comments for rdb.Time() RTT trade-off
This commit is contained in:
QTom
2026-02-28 10:16:34 +08:00
parent 28ca7df297
commit 607237571f
13 changed files with 509 additions and 80 deletions

View File

@@ -2708,6 +2708,9 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
}
}
// 批量预取 RPM 计数避免逐个账号查询N+1
ctx = s.withRPMPrefetch(ctx, accounts)
// 3. 按优先级+最久未用选择(考虑模型支持)
var selected *Account
for i := range accounts {
@@ -2922,6 +2925,9 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
}
}
// 批量预取 RPM 计数避免逐个账号查询N+1
ctx = s.withRPMPrefetch(ctx, accounts)
// 3. 按优先级+最久未用选择(考虑模型支持和混合调度)
var selected *Account
for i := range accounts {