feat(gateway): Cache-Driven RPM Buffer
- buffer 公式从 baseRPM/5 改为 concurrency + maxSessions 保留 baseRPM/5 作为 floor 向后兼容 - 粘性路径 fallback 新增 [StickyCacheMiss] 结构化日志 reason: rpm_red / gate_check / session_limit / wait_queue_full / account_cleared - session_limit 路径跳过 wait queue 重试(RegisterSession 拒绝无副作用) - 典型配置 buffer 从 3 提升至 13,大幅减少高峰期 Prompt Cache Miss Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1727,22 +1727,47 @@ func (a *Account) GetRPMStrategy() string {
|
||||
}
|
||||
|
||||
// GetRPMStickyBuffer 获取 RPM 粘性缓冲数量
|
||||
// tiered 模式下的黄区大小,默认为 base_rpm 的 20%(至少 1)
|
||||
// Cache-driven: buffer = concurrency + maxSessions(覆盖幽灵窗口 + 稳态会话需求)
|
||||
// floor = baseRPM / 5(向后兼容 maxSessions=0 且 concurrency=0 场景)
|
||||
func (a *Account) GetRPMStickyBuffer() int {
|
||||
if a.Extra == nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
// 手动 override 最高优先级
|
||||
if v, ok := a.Extra["rpm_sticky_buffer"]; ok {
|
||||
val := parseExtraInt(v)
|
||||
if val > 0 {
|
||||
return val
|
||||
}
|
||||
}
|
||||
|
||||
base := a.GetBaseRPM()
|
||||
buffer := base / 5
|
||||
if buffer < 1 && base > 0 {
|
||||
buffer = 1
|
||||
if base <= 0 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Cache-driven buffer = concurrency + maxSessions
|
||||
conc := a.Concurrency
|
||||
if conc < 0 {
|
||||
conc = 0
|
||||
}
|
||||
sess := a.GetMaxSessions()
|
||||
if sess < 0 {
|
||||
sess = 0
|
||||
}
|
||||
|
||||
buffer := conc + sess
|
||||
|
||||
// floor: 向后兼容
|
||||
floor := base / 5
|
||||
if floor < 1 {
|
||||
floor = 1
|
||||
}
|
||||
if buffer < floor {
|
||||
buffer = floor
|
||||
}
|
||||
|
||||
return buffer
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user