Some checks failed
Build Docker Image / build (push) Has been cancelled
Kiro backend does not support Anthropic prompt cache protocol. The local cache tracker simulates cache hits/creation for Claude Code compatibility, but subtracting those values from input_tokens caused the reported input_tokens to drop to single digits. input_tokens now reflects the real value; cache_creation_input_tokens and cache_read_input_tokens are still reported for protocol compliance. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
179 lines
5.8 KiB
Go
179 lines
5.8 KiB
Go
package proxy
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestPromptCacheTrackerComputeAndUpdate(t *testing.T) {
|
|
tracker := newPromptCacheTracker(time.Hour)
|
|
longSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
|
|
req := &ClaudeRequest{
|
|
Model: "claude-sonnet-4.5",
|
|
System: []interface{}{
|
|
map[string]interface{}{
|
|
"type": "text",
|
|
"text": longSystem,
|
|
"cache_control": map[string]interface{}{
|
|
"type": "ephemeral",
|
|
},
|
|
},
|
|
},
|
|
Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
|
|
}
|
|
|
|
profile := tracker.BuildClaudeProfile(req, 120)
|
|
if profile == nil {
|
|
t.Fatalf("expected cache profile to be built")
|
|
}
|
|
|
|
first := tracker.Compute("acct-1", profile)
|
|
if first.CacheCreationInputTokens <= 0 {
|
|
t.Fatalf("expected first request to create cache tokens, got %+v", first)
|
|
}
|
|
if first.CacheReadInputTokens != 0 {
|
|
t.Fatalf("expected first request to have zero cache reads, got %+v", first)
|
|
}
|
|
|
|
tracker.Update("acct-1", profile)
|
|
second := tracker.Compute("acct-1", profile)
|
|
if second.CacheReadInputTokens <= 0 {
|
|
t.Fatalf("expected repeated request to read cache tokens, got %+v", second)
|
|
}
|
|
if second.CacheCreationInputTokens != 0 {
|
|
t.Fatalf("expected repeated request to avoid cache creation, got %+v", second)
|
|
}
|
|
}
|
|
|
|
func TestBuildClaudeUsageMapIncludesCacheFields(t *testing.T) {
|
|
usage := promptCacheUsage{
|
|
CacheCreationInputTokens: 30,
|
|
CacheReadInputTokens: 20,
|
|
CacheCreation5mInputTokens: 10,
|
|
CacheCreation1hInputTokens: 20,
|
|
}
|
|
|
|
m := buildClaudeUsageMap(100, 50, usage, true)
|
|
|
|
if got := m["input_tokens"]; got != 100 {
|
|
t.Fatalf("expected input tokens 100 (no deduction), got %#v", got)
|
|
}
|
|
if got := m["cache_creation_input_tokens"]; got != 30 {
|
|
t.Fatalf("expected cache creation tokens 30, got %#v", got)
|
|
}
|
|
if got := m["cache_read_input_tokens"]; got != 20 {
|
|
t.Fatalf("expected cache read tokens 20, got %#v", got)
|
|
}
|
|
creation, ok := m["cache_creation"].(map[string]int)
|
|
if !ok {
|
|
t.Fatalf("expected typed cache creation map, got %#v", m["cache_creation"])
|
|
}
|
|
if creation["ephemeral_5m_input_tokens"] != 10 || creation["ephemeral_1h_input_tokens"] != 20 {
|
|
t.Fatalf("unexpected ttl breakdown: %#v", creation)
|
|
}
|
|
}
|
|
|
|
// TestPromptCacheStableAcrossBillingHeaderDrift verifies that Claude Code's
|
|
// per-request "x-anthropic-billing-header: cc_version=...; cch=...;" system
|
|
// block (whose content drifts on every request) does not break cache hits.
|
|
// The normalization logic should ensure the same conversation still matches.
|
|
func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
|
|
tracker := newPromptCacheTracker(time.Hour)
|
|
mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
|
|
|
|
build := func(billingHdr string) *ClaudeRequest {
|
|
return &ClaudeRequest{
|
|
Model: "claude-sonnet-4.5",
|
|
System: []interface{}{
|
|
map[string]interface{}{
|
|
"type": "text",
|
|
"text": billingHdr,
|
|
},
|
|
map[string]interface{}{
|
|
"type": "text",
|
|
"text": mainSystem,
|
|
"cache_control": map[string]interface{}{
|
|
"type": "ephemeral",
|
|
},
|
|
},
|
|
},
|
|
Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
|
|
}
|
|
}
|
|
|
|
req1 := build("x-anthropic-billing-header: cc_version=2.1.87.1; cch=aaaa;")
|
|
profile1 := tracker.BuildClaudeProfile(req1, 2048)
|
|
if profile1 == nil {
|
|
t.Fatalf("profile1 should be built")
|
|
}
|
|
first := tracker.Compute("acct-1", profile1)
|
|
if first.CacheReadInputTokens != 0 {
|
|
t.Fatalf("expected no cache read on first request, got %+v", first)
|
|
}
|
|
tracker.Update("acct-1", profile1)
|
|
|
|
req2 := build("x-anthropic-billing-header: cc_version=2.1.87.42; cch=bbbb; padding=xxyyzz;")
|
|
profile2 := tracker.BuildClaudeProfile(req2, 2048)
|
|
if profile2 == nil {
|
|
t.Fatalf("profile2 should be built")
|
|
}
|
|
second := tracker.Compute("acct-1", profile2)
|
|
if second.CacheReadInputTokens == 0 {
|
|
t.Fatalf("expected cache read after billing header drift, got %+v", second)
|
|
}
|
|
}
|
|
|
|
// TestPromptCacheImplicitBreakpointAtMessageEnd verifies that once any
|
|
// explicit cache_control breakpoint has been seen, subsequent message-end
|
|
// boundaries act as implicit breakpoints. This allows multi-turn conversations
|
|
// to hit earlier stored prefix fingerprints even when the newest messages
|
|
// lack explicit cache_control.
|
|
func TestPromptCacheImplicitBreakpointAtMessageEnd(t *testing.T) {
|
|
tracker := newPromptCacheTracker(time.Hour)
|
|
systemText := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
|
|
|
|
baseSystem := []interface{}{
|
|
map[string]interface{}{
|
|
"type": "text",
|
|
"text": systemText,
|
|
"cache_control": map[string]interface{}{
|
|
"type": "ephemeral",
|
|
},
|
|
},
|
|
}
|
|
|
|
// Round 1: single user message.
|
|
req1 := &ClaudeRequest{
|
|
Model: "claude-sonnet-4.5",
|
|
System: baseSystem,
|
|
Messages: []ClaudeMessage{{Role: "user", Content: "question one"}},
|
|
}
|
|
profile1 := tracker.BuildClaudeProfile(req1, 2048)
|
|
if profile1 == nil {
|
|
t.Fatalf("profile1 should be built")
|
|
}
|
|
tracker.Update("acct-1", profile1)
|
|
|
|
// Round 2: conversation continues with new messages. The latest user
|
|
// message has no explicit cache_control; it should still hit the stored
|
|
// prefix via the implicit message-end breakpoint.
|
|
req2 := &ClaudeRequest{
|
|
Model: "claude-sonnet-4.5",
|
|
System: baseSystem,
|
|
Messages: []ClaudeMessage{
|
|
{Role: "user", Content: "question one"},
|
|
{Role: "assistant", Content: "answer one"},
|
|
{Role: "user", Content: "follow-up question"},
|
|
},
|
|
}
|
|
profile2 := tracker.BuildClaudeProfile(req2, 4096)
|
|
if profile2 == nil {
|
|
t.Fatalf("profile2 should be built")
|
|
}
|
|
result := tracker.Compute("acct-1", profile2)
|
|
if result.CacheReadInputTokens == 0 {
|
|
t.Fatalf("expected cache read via implicit message-end breakpoint, got %+v", result)
|
|
}
|
|
}
|