Files
kirogo/proxy/cache_tracker_test.go
huangzhenpc e8ab5b11e7
Some checks failed
Build Docker Image / build (push) Has been cancelled
fix: stop deducting simulated cache tokens from input_tokens
Kiro backend does not support Anthropic prompt cache protocol.
The local cache tracker simulates cache hits/creation for Claude Code
compatibility, but subtracting those values from input_tokens caused
the reported input_tokens to drop to single digits.

input_tokens now reflects the real value; cache_creation_input_tokens
and cache_read_input_tokens are still reported for protocol compliance.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-12 11:14:51 +08:00

179 lines
5.8 KiB
Go

package proxy
import (
"strings"
"testing"
"time"
)
func TestPromptCacheTrackerComputeAndUpdate(t *testing.T) {
tracker := newPromptCacheTracker(time.Hour)
longSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
req := &ClaudeRequest{
Model: "claude-sonnet-4.5",
System: []interface{}{
map[string]interface{}{
"type": "text",
"text": longSystem,
"cache_control": map[string]interface{}{
"type": "ephemeral",
},
},
},
Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
}
profile := tracker.BuildClaudeProfile(req, 120)
if profile == nil {
t.Fatalf("expected cache profile to be built")
}
first := tracker.Compute("acct-1", profile)
if first.CacheCreationInputTokens <= 0 {
t.Fatalf("expected first request to create cache tokens, got %+v", first)
}
if first.CacheReadInputTokens != 0 {
t.Fatalf("expected first request to have zero cache reads, got %+v", first)
}
tracker.Update("acct-1", profile)
second := tracker.Compute("acct-1", profile)
if second.CacheReadInputTokens <= 0 {
t.Fatalf("expected repeated request to read cache tokens, got %+v", second)
}
if second.CacheCreationInputTokens != 0 {
t.Fatalf("expected repeated request to avoid cache creation, got %+v", second)
}
}
func TestBuildClaudeUsageMapIncludesCacheFields(t *testing.T) {
usage := promptCacheUsage{
CacheCreationInputTokens: 30,
CacheReadInputTokens: 20,
CacheCreation5mInputTokens: 10,
CacheCreation1hInputTokens: 20,
}
m := buildClaudeUsageMap(100, 50, usage, true)
if got := m["input_tokens"]; got != 100 {
t.Fatalf("expected input tokens 100 (no deduction), got %#v", got)
}
if got := m["cache_creation_input_tokens"]; got != 30 {
t.Fatalf("expected cache creation tokens 30, got %#v", got)
}
if got := m["cache_read_input_tokens"]; got != 20 {
t.Fatalf("expected cache read tokens 20, got %#v", got)
}
creation, ok := m["cache_creation"].(map[string]int)
if !ok {
t.Fatalf("expected typed cache creation map, got %#v", m["cache_creation"])
}
if creation["ephemeral_5m_input_tokens"] != 10 || creation["ephemeral_1h_input_tokens"] != 20 {
t.Fatalf("unexpected ttl breakdown: %#v", creation)
}
}
// TestPromptCacheStableAcrossBillingHeaderDrift verifies that Claude Code's
// per-request "x-anthropic-billing-header: cc_version=...; cch=...;" system
// block (whose content drifts on every request) does not break cache hits.
// The normalization logic should ensure the same conversation still matches.
func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
tracker := newPromptCacheTracker(time.Hour)
mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
build := func(billingHdr string) *ClaudeRequest {
return &ClaudeRequest{
Model: "claude-sonnet-4.5",
System: []interface{}{
map[string]interface{}{
"type": "text",
"text": billingHdr,
},
map[string]interface{}{
"type": "text",
"text": mainSystem,
"cache_control": map[string]interface{}{
"type": "ephemeral",
},
},
},
Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
}
}
req1 := build("x-anthropic-billing-header: cc_version=2.1.87.1; cch=aaaa;")
profile1 := tracker.BuildClaudeProfile(req1, 2048)
if profile1 == nil {
t.Fatalf("profile1 should be built")
}
first := tracker.Compute("acct-1", profile1)
if first.CacheReadInputTokens != 0 {
t.Fatalf("expected no cache read on first request, got %+v", first)
}
tracker.Update("acct-1", profile1)
req2 := build("x-anthropic-billing-header: cc_version=2.1.87.42; cch=bbbb; padding=xxyyzz;")
profile2 := tracker.BuildClaudeProfile(req2, 2048)
if profile2 == nil {
t.Fatalf("profile2 should be built")
}
second := tracker.Compute("acct-1", profile2)
if second.CacheReadInputTokens == 0 {
t.Fatalf("expected cache read after billing header drift, got %+v", second)
}
}
// TestPromptCacheImplicitBreakpointAtMessageEnd verifies that once any
// explicit cache_control breakpoint has been seen, subsequent message-end
// boundaries act as implicit breakpoints. This allows multi-turn conversations
// to hit earlier stored prefix fingerprints even when the newest messages
// lack explicit cache_control.
func TestPromptCacheImplicitBreakpointAtMessageEnd(t *testing.T) {
tracker := newPromptCacheTracker(time.Hour)
systemText := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
baseSystem := []interface{}{
map[string]interface{}{
"type": "text",
"text": systemText,
"cache_control": map[string]interface{}{
"type": "ephemeral",
},
},
}
// Round 1: single user message.
req1 := &ClaudeRequest{
Model: "claude-sonnet-4.5",
System: baseSystem,
Messages: []ClaudeMessage{{Role: "user", Content: "question one"}},
}
profile1 := tracker.BuildClaudeProfile(req1, 2048)
if profile1 == nil {
t.Fatalf("profile1 should be built")
}
tracker.Update("acct-1", profile1)
// Round 2: conversation continues with new messages. The latest user
// message has no explicit cache_control; it should still hit the stored
// prefix via the implicit message-end breakpoint.
req2 := &ClaudeRequest{
Model: "claude-sonnet-4.5",
System: baseSystem,
Messages: []ClaudeMessage{
{Role: "user", Content: "question one"},
{Role: "assistant", Content: "answer one"},
{Role: "user", Content: "follow-up question"},
},
}
profile2 := tracker.BuildClaudeProfile(req2, 4096)
if profile2 == nil {
t.Fatalf("profile2 should be built")
}
result := tracker.Compute("acct-1", profile2)
if result.CacheReadInputTokens == 0 {
t.Fatalf("expected cache read via implicit message-end breakpoint, got %+v", result)
}
}