feat(gateway): Cache-Driven RPM Buffer

- buffer 公式从 baseRPM/5 改为 concurrency + maxSessions
  保留 baseRPM/5 作为 floor 向后兼容
- 粘性路径 fallback 新增 [StickyCacheMiss] 结构化日志
  reason: rpm_red / gate_check / session_limit / wait_queue_full / account_cleared
- session_limit 路径跳过 wait queue 重试(RegisterSession 拒绝无副作用)
- 典型配置 buffer 从 3 提升至 13,大幅减少高峰期 Prompt Cache Miss

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
QTom
2026-03-31 13:19:40 +08:00
parent 318aa5e0d3
commit 72e5876c64
3 changed files with 110 additions and 39 deletions

View File

@@ -1727,22 +1727,47 @@ func (a *Account) GetRPMStrategy() string {
}
// GetRPMStickyBuffer 获取 RPM 粘性缓冲数量
// tiered 模式下的黄区大小,默认为 base_rpm 的 20%(至少 1
// Cache-driven: buffer = concurrency + maxSessions覆盖幽灵窗口 + 稳态会话需求
// floor = baseRPM / 5向后兼容 maxSessions=0 且 concurrency=0 场景)
func (a *Account) GetRPMStickyBuffer() int {
if a.Extra == nil {
return 0
}
// 手动 override 最高优先级
if v, ok := a.Extra["rpm_sticky_buffer"]; ok {
val := parseExtraInt(v)
if val > 0 {
return val
}
}
base := a.GetBaseRPM()
buffer := base / 5
if buffer < 1 && base > 0 {
buffer = 1
if base <= 0 {
return 0
}
// Cache-driven buffer = concurrency + maxSessions
conc := a.Concurrency
if conc < 0 {
conc = 0
}
sess := a.GetMaxSessions()
if sess < 0 {
sess = 0
}
buffer := conc + sess
// floor: 向后兼容
floor := base / 5
if floor < 1 {
floor = 1
}
if buffer < floor {
buffer = floor
}
return buffer
}