Merge pull request #591 from miraserver/main

feat: add Cache TTL Override per account
This commit is contained in:
Wesley Liddick
2026-02-18 15:59:25 +08:00
committed by GitHub
25 changed files with 533 additions and 19 deletions

View File

@@ -211,6 +211,13 @@ func AccountFromServiceShallow(a *service.Account) *Account {
enabled := true
out.EnableSessionIDMasking = &enabled
}
// 缓存 TTL 强制替换
if a.IsCacheTTLOverrideEnabled() {
enabled := true
out.CacheTTLOverrideEnabled = &enabled
target := a.GetCacheTTLOverrideTarget()
out.CacheTTLOverrideTarget = &target
}
}
return out
@@ -398,6 +405,7 @@ func usageLogFromServiceUser(l *service.UsageLog) UsageLog {
ImageCount: l.ImageCount,
ImageSize: l.ImageSize,
UserAgent: l.UserAgent,
CacheTTLOverridden: l.CacheTTLOverridden,
CreatedAt: l.CreatedAt,
User: UserFromServiceShallow(l.User),
APIKey: APIKeyFromService(l.APIKey),

View File

@@ -150,6 +150,11 @@ type Account struct {
// 从 extra 字段提取,方便前端显示和编辑
EnableSessionIDMasking *bool `json:"session_id_masking_enabled,omitempty"`
// 缓存 TTL 强制替换(仅 Anthropic OAuth/SetupToken 账号有效)
// 启用后将所有 cache creation tokens 归入指定的 TTL 类型计费
CacheTTLOverrideEnabled *bool `json:"cache_ttl_override_enabled,omitempty"`
CacheTTLOverrideTarget *string `json:"cache_ttl_override_target,omitempty"`
Proxy *Proxy `json:"proxy,omitempty"`
AccountGroups []AccountGroup `json:"account_groups,omitempty"`
@@ -273,6 +278,9 @@ type UsageLog struct {
// User-Agent
UserAgent *string `json:"user_agent"`
// Cache TTL Override 标记
CacheTTLOverridden bool `json:"cache_ttl_overridden"`
CreatedAt time.Time `json:"created_at"`
User *User `json:"user,omitempty"`

View File

@@ -22,7 +22,7 @@ import (
"github.com/lib/pq"
)
const usageLogSelectColumns = "id, user_id, api_key_id, account_id, request_id, model, group_id, subscription_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, cache_creation_5m_tokens, cache_creation_1h_tokens, input_cost, output_cost, cache_creation_cost, cache_read_cost, total_cost, actual_cost, rate_multiplier, account_rate_multiplier, billing_type, stream, duration_ms, first_token_ms, user_agent, ip_address, image_count, image_size, reasoning_effort, created_at"
const usageLogSelectColumns = "id, user_id, api_key_id, account_id, request_id, model, group_id, subscription_id, input_tokens, output_tokens, cache_creation_tokens, cache_read_tokens, cache_creation_5m_tokens, cache_creation_1h_tokens, input_cost, output_cost, cache_creation_cost, cache_read_cost, total_cost, actual_cost, rate_multiplier, account_rate_multiplier, billing_type, stream, duration_ms, first_token_ms, user_agent, ip_address, image_count, image_size, reasoning_effort, cache_ttl_overridden, created_at"
type usageLogRepository struct {
client *dbent.Client
@@ -115,6 +115,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
image_count,
image_size,
reasoning_effort,
cache_ttl_overridden,
created_at
) VALUES (
$1, $2, $3, $4, $5,
@@ -122,7 +123,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
$8, $9, $10, $11,
$12, $13,
$14, $15, $16, $17, $18, $19,
$20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31
$20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32
)
ON CONFLICT (request_id, api_key_id) DO NOTHING
RETURNING id, created_at
@@ -173,6 +174,7 @@ func (r *usageLogRepository) Create(ctx context.Context, log *service.UsageLog)
log.ImageCount,
imageSize,
reasoningEffort,
log.CacheTTLOverridden,
createdAt,
}
if err := scanSingleRow(ctx, sqlq, query, args, &log.ID, &log.CreatedAt); err != nil {
@@ -2195,6 +2197,7 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
imageCount int
imageSize sql.NullString
reasoningEffort sql.NullString
cacheTTLOverridden bool
createdAt time.Time
)
@@ -2230,6 +2233,7 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
&imageCount,
&imageSize,
&reasoningEffort,
&cacheTTLOverridden,
&createdAt,
); err != nil {
return nil, err
@@ -2258,6 +2262,7 @@ func scanUsageLog(scanner interface{ Scan(...any) error }) (*service.UsageLog, e
BillingType: int8(billingType),
Stream: stream,
ImageCount: imageCount,
CacheTTLOverridden: cacheTTLOverridden,
CreatedAt: createdAt,
}

View File

@@ -401,6 +401,7 @@ func TestAPIContracts(t *testing.T) {
"first_token_ms": 50,
"image_count": 0,
"image_size": null,
"cache_ttl_overridden": false,
"created_at": "2025-01-02T03:04:05Z",
"user_agent": null
}

View File

@@ -752,6 +752,38 @@ func (a *Account) IsSessionIDMaskingEnabled() bool {
return false
}
// IsCacheTTLOverrideEnabled 检查是否启用缓存 TTL 强制替换
// 仅适用于 Anthropic OAuth/SetupToken 类型账号
// 启用后将所有 cache creation tokens 归入指定的 TTL 类型5m 或 1h
func (a *Account) IsCacheTTLOverrideEnabled() bool {
if !a.IsAnthropicOAuthOrSetupToken() {
return false
}
if a.Extra == nil {
return false
}
if v, ok := a.Extra["cache_ttl_override_enabled"]; ok {
if enabled, ok := v.(bool); ok {
return enabled
}
}
return false
}
// GetCacheTTLOverrideTarget 获取缓存 TTL 强制替换的目标类型
// 返回 "5m" 或 "1h",默认 "5m"
func (a *Account) GetCacheTTLOverrideTarget() string {
if a.Extra == nil {
return "5m"
}
if v, ok := a.Extra["cache_ttl_override_target"]; ok {
if target, ok := v.(string); ok && (target == "5m" || target == "1h") {
return target
}
}
return "5m"
}
// GetWindowCostLimit 获取 5h 窗口费用阈值(美元)
// 返回 0 表示未启用
func (a *Account) GetWindowCostLimit() float64 {

View File

@@ -4276,6 +4276,23 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
}
}
// Cache TTL Override: 重写 SSE 事件中的 cache_creation 分类
if account.IsCacheTTLOverrideEnabled() {
overrideTarget := account.GetCacheTTLOverrideTarget()
if eventType == "message_start" {
if msg, ok := event["message"].(map[string]any); ok {
if u, ok := msg["usage"].(map[string]any); ok {
rewriteCacheCreationJSON(u, overrideTarget)
}
}
}
if eventType == "message_delta" {
if u, ok := event["usage"].(map[string]any); ok {
rewriteCacheCreationJSON(u, overrideTarget)
}
}
}
if needModelReplace {
if msg, ok := event["message"].(map[string]any); ok {
if model, ok := msg["model"].(string); ok && model == mappedModel {
@@ -4450,6 +4467,58 @@ func (s *GatewayService) parseSSEUsage(data string, usage *ClaudeUsage) {
}
}
// applyCacheTTLOverride 将所有 cache creation tokens 归入指定的 TTL 类型。
// target 为 "5m" 或 "1h"。返回 true 表示发生了变更。
func applyCacheTTLOverride(usage *ClaudeUsage, target string) bool {
// Fallback: 如果只有聚合字段但无 5m/1h 明细,将聚合字段归入 5m 默认类别
if usage.CacheCreation5mTokens == 0 && usage.CacheCreation1hTokens == 0 && usage.CacheCreationInputTokens > 0 {
usage.CacheCreation5mTokens = usage.CacheCreationInputTokens
}
total := usage.CacheCreation5mTokens + usage.CacheCreation1hTokens
if total == 0 {
return false
}
switch target {
case "1h":
if usage.CacheCreation1hTokens == total {
return false // 已经全是 1h
}
usage.CacheCreation1hTokens = total
usage.CacheCreation5mTokens = 0
default: // "5m"
if usage.CacheCreation5mTokens == total {
return false // 已经全是 5m
}
usage.CacheCreation5mTokens = total
usage.CacheCreation1hTokens = 0
}
return true
}
// rewriteCacheCreationJSON 在 JSON usage 对象中重写 cache_creation 嵌套对象的 TTL 分类。
// usageObj 是 usage JSON 对象map[string]any
func rewriteCacheCreationJSON(usageObj map[string]any, target string) {
ccObj, ok := usageObj["cache_creation"].(map[string]any)
if !ok {
return
}
v5m, _ := ccObj["ephemeral_5m_input_tokens"].(float64)
v1h, _ := ccObj["ephemeral_1h_input_tokens"].(float64)
total := v5m + v1h
if total == 0 {
return
}
switch target {
case "1h":
ccObj["ephemeral_1h_input_tokens"] = total
ccObj["ephemeral_5m_input_tokens"] = float64(0)
default: // "5m"
ccObj["ephemeral_5m_input_tokens"] = total
ccObj["ephemeral_1h_input_tokens"] = float64(0)
}
}
func (s *GatewayService) handleNonStreamingResponse(ctx context.Context, resp *http.Response, c *gin.Context, account *Account, originalModel, mappedModel string) (*ClaudeUsage, error) {
// 更新5h窗口状态
s.rateLimitService.UpdateSessionWindow(ctx, account, resp.Header)
@@ -4486,6 +4555,20 @@ func (s *GatewayService) handleNonStreamingResponse(ctx context.Context, resp *h
}
}
// Cache TTL Override: 重写 non-streaming 响应中的 cache_creation 分类
if account.IsCacheTTLOverrideEnabled() {
overrideTarget := account.GetCacheTTLOverrideTarget()
if applyCacheTTLOverride(&response.Usage, overrideTarget) {
// 同步更新 body JSON 中的嵌套 cache_creation 对象
if newBody, err := sjson.SetBytes(body, "usage.cache_creation.ephemeral_5m_input_tokens", response.Usage.CacheCreation5mTokens); err == nil {
body = newBody
}
if newBody, err := sjson.SetBytes(body, "usage.cache_creation.ephemeral_1h_input_tokens", response.Usage.CacheCreation1hTokens); err == nil {
body = newBody
}
}
}
// 如果有模型映射替换响应中的model字段
if originalModel != mappedModel {
body = s.replaceModelInResponseBody(body, mappedModel, originalModel)
@@ -4562,6 +4645,13 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu
result.Usage.InputTokens = 0
}
// Cache TTL Override: 确保计费时 token 分类与账号设置一致
cacheTTLOverridden := false
if account.IsCacheTTLOverrideEnabled() {
applyCacheTTLOverride(&result.Usage, account.GetCacheTTLOverrideTarget())
cacheTTLOverridden = (result.Usage.CacheCreation5mTokens + result.Usage.CacheCreation1hTokens) > 0
}
// 获取费率倍数(优先级:用户专属 > 分组默认 > 系统默认)
multiplier := s.cfg.Default.RateMultiplier
if apiKey.GroupID != nil && apiKey.Group != nil {
@@ -4647,6 +4737,7 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu
FirstTokenMs: result.FirstTokenMs,
ImageCount: result.ImageCount,
ImageSize: imageSize,
CacheTTLOverridden: cacheTTLOverridden,
CreatedAt: time.Now(),
}
@@ -4747,6 +4838,13 @@ func (s *GatewayService) RecordUsageWithLongContext(ctx context.Context, input *
result.Usage.InputTokens = 0
}
// Cache TTL Override: 确保计费时 token 分类与账号设置一致
cacheTTLOverridden := false
if account.IsCacheTTLOverrideEnabled() {
applyCacheTTLOverride(&result.Usage, account.GetCacheTTLOverrideTarget())
cacheTTLOverridden = (result.Usage.CacheCreation5mTokens + result.Usage.CacheCreation1hTokens) > 0
}
// 获取费率倍数(优先级:用户专属 > 分组默认 > 系统默认)
multiplier := s.cfg.Default.RateMultiplier
if apiKey.GroupID != nil && apiKey.Group != nil {
@@ -4832,6 +4930,7 @@ func (s *GatewayService) RecordUsageWithLongContext(ctx context.Context, input *
FirstTokenMs: result.FirstTokenMs,
ImageCount: result.ImageCount,
ImageSize: imageSize,
CacheTTLOverridden: cacheTTLOverridden,
CreatedAt: time.Now(),
}

View File

@@ -46,6 +46,9 @@ type UsageLog struct {
UserAgent *string
IPAddress *string
// Cache TTL Override 标记(管理员强制替换了缓存 TTL 计费)
CacheTTLOverridden bool
// 图片生成字段
ImageCount int
ImageSize *string