fix: stabilize prompt cache fingerprints (#47)
This commit is contained in:
@@ -254,9 +254,10 @@ func flattenClaudeCacheBlocks(req *ClaudeRequest) []cacheablePromptBlock {
|
||||
"description": tool.Description,
|
||||
"input_schema": tool.InputSchema,
|
||||
}
|
||||
fingerprintValue := stripCachePositionKeys(toolValue)
|
||||
blocks = append(blocks, cacheablePromptBlock{
|
||||
Value: toolValue,
|
||||
Tokens: estimateApproxTokens(canonicalizeCacheValue(toolValue)),
|
||||
Value: fingerprintValue,
|
||||
Tokens: estimateApproxTokens(canonicalizeCacheValue(fingerprintValue)),
|
||||
TTL: normalizePromptCacheTTL(extractPromptCacheTTL(tool)),
|
||||
})
|
||||
}
|
||||
@@ -357,59 +358,52 @@ func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interf
|
||||
blockValue := wrapper["block"]
|
||||
ttl := normalizePromptCacheTTL(extractPromptCacheTTL(blockValue))
|
||||
|
||||
// Normalize volatile text (e.g. Claude Code's x-anthropic-billing-header
|
||||
// which drifts on every request) so that fingerprints remain stable across
|
||||
// requests within the same conversation.
|
||||
if normalized, changed := normalizeCacheBlockContent(blockValue); changed {
|
||||
cloned := make(map[string]interface{}, len(wrapper))
|
||||
for k, v := range wrapper {
|
||||
cloned[k] = v
|
||||
}
|
||||
cloned["block"] = normalized
|
||||
wrapper = cloned
|
||||
// Drop volatile billing metadata from the cache fingerprint. Claude Code's
|
||||
// x-anthropic-billing-header can drift, appear, or disappear across
|
||||
// otherwise identical requests, and it does not change model semantics.
|
||||
if isAnthropicBillingHeaderBlock(blockValue) {
|
||||
return
|
||||
}
|
||||
|
||||
canonical := canonicalizeCacheValue(wrapper)
|
||||
fingerprintValue := stripCachePositionKeys(wrapper)
|
||||
canonical := canonicalizeCacheValue(fingerprintValue)
|
||||
*blocks = append(*blocks, cacheablePromptBlock{
|
||||
Value: wrapper,
|
||||
Value: fingerprintValue,
|
||||
Tokens: estimateApproxTokens(canonical),
|
||||
TTL: ttl,
|
||||
IsMessageEnd: isMessageEnd,
|
||||
})
|
||||
}
|
||||
|
||||
// normalizeCacheBlockContent replaces volatile but semantically irrelevant
|
||||
// fields with a placeholder so that the cumulative fingerprint stays stable
|
||||
// across requests in the same session. Currently handles:
|
||||
// - Claude Code's "x-anthropic-billing-header: ..." system text block
|
||||
// whose content drifts on every request (version, telemetry hash, etc.)
|
||||
func normalizeCacheBlockContent(value interface{}) (interface{}, bool) {
|
||||
func stripCachePositionKeys(value map[string]interface{}) map[string]interface{} {
|
||||
cloned := make(map[string]interface{}, len(value))
|
||||
for key, item := range value {
|
||||
if isCachePositionKey(key) {
|
||||
continue
|
||||
}
|
||||
cloned[key] = item
|
||||
}
|
||||
return cloned
|
||||
}
|
||||
|
||||
func isAnthropicBillingHeaderBlock(value interface{}) bool {
|
||||
blockMap, ok := value.(map[string]interface{})
|
||||
if !ok {
|
||||
return value, false
|
||||
return false
|
||||
}
|
||||
|
||||
// Only normalize text blocks (or blocks without an explicit type but containing text).
|
||||
if t, ok := blockMap["type"].(string); ok && t != "" && t != "text" {
|
||||
return value, false
|
||||
return false
|
||||
}
|
||||
|
||||
text, ok := blockMap["text"].(string)
|
||||
if !ok {
|
||||
return value, false
|
||||
return false
|
||||
}
|
||||
|
||||
trimmed := strings.TrimLeft(text, " \t\r\n")
|
||||
if !strings.HasPrefix(strings.ToLower(trimmed), "x-anthropic-billing-header:") {
|
||||
return value, false
|
||||
}
|
||||
|
||||
cloned := make(map[string]interface{}, len(blockMap))
|
||||
for k, v := range blockMap {
|
||||
cloned[k] = v
|
||||
}
|
||||
cloned["text"] = "__anthropic_billing_header__"
|
||||
return cloned, true
|
||||
return strings.HasPrefix(strings.ToLower(trimmed), "x-anthropic-billing-header:")
|
||||
}
|
||||
|
||||
func extractPromptCacheTTL(value interface{}) time.Duration {
|
||||
@@ -590,6 +584,15 @@ func writeCanonicalJSON(buf *bytes.Buffer, value interface{}) {
|
||||
}
|
||||
}
|
||||
|
||||
func isCachePositionKey(key string) bool {
|
||||
switch key {
|
||||
case "tool_index", "system_index", "message_index", "block_index":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func writeHashChunk(hasher hashWriter, chunk string) {
|
||||
length := strconv.Itoa(len(chunk))
|
||||
hasher.Write([]byte(length))
|
||||
|
||||
@@ -77,7 +77,7 @@ func TestBuildClaudeUsageMapIncludesCacheFields(t *testing.T) {
|
||||
// TestPromptCacheStableAcrossBillingHeaderDrift verifies that Claude Code's
|
||||
// per-request "x-anthropic-billing-header: cc_version=...; cch=...;" system
|
||||
// block (whose content drifts on every request) does not break cache hits.
|
||||
// The normalization logic should ensure the same conversation still matches.
|
||||
// The tracker should ignore that metadata when fingerprinting cached prefixes.
|
||||
func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
|
||||
tracker := newPromptCacheTracker(time.Hour)
|
||||
mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
|
||||
@@ -124,6 +124,92 @@ func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPromptCacheStableWhenBillingHeaderAppearsOrDisappears(t *testing.T) {
|
||||
tracker := newPromptCacheTracker(time.Hour)
|
||||
mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
|
||||
|
||||
build := func(includeBilling bool) *ClaudeRequest {
|
||||
system := []interface{}{}
|
||||
if includeBilling {
|
||||
system = append(system, map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": "x-anthropic-billing-header: cc_version=2.1.87.1; cch=aaaa;",
|
||||
})
|
||||
}
|
||||
system = append(system, map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": mainSystem,
|
||||
"cache_control": map[string]interface{}{
|
||||
"type": "ephemeral",
|
||||
},
|
||||
})
|
||||
return &ClaudeRequest{
|
||||
Model: "claude-sonnet-4.5",
|
||||
System: system,
|
||||
Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
|
||||
}
|
||||
}
|
||||
|
||||
withBilling := tracker.BuildClaudeProfile(build(true), 2048)
|
||||
if withBilling == nil {
|
||||
t.Fatalf("profile with billing header should be built")
|
||||
}
|
||||
tracker.Update("acct-1", withBilling)
|
||||
|
||||
withoutBilling := tracker.BuildClaudeProfile(build(false), 2048)
|
||||
if withoutBilling == nil {
|
||||
t.Fatalf("profile without billing header should be built")
|
||||
}
|
||||
result := tracker.Compute("acct-1", withoutBilling)
|
||||
if result.CacheReadInputTokens == 0 {
|
||||
t.Fatalf("expected cache read when billing header disappears, got %+v", result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCanonicalCacheValueIgnoresPositionKeys(t *testing.T) {
|
||||
first := canonicalizeCacheValue(stripCachePositionKeys(map[string]interface{}{
|
||||
"kind": "system",
|
||||
"system_index": 0,
|
||||
"block": map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": "stable",
|
||||
},
|
||||
}))
|
||||
second := canonicalizeCacheValue(stripCachePositionKeys(map[string]interface{}{
|
||||
"kind": "system",
|
||||
"system_index": 1,
|
||||
"block": map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": "stable",
|
||||
},
|
||||
}))
|
||||
if first != second {
|
||||
t.Fatalf("expected position keys to be ignored, got %q vs %q", first, second)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCanonicalCacheValuePreservesSemanticPositionKeys(t *testing.T) {
|
||||
first := canonicalizeCacheValue(map[string]interface{}{
|
||||
"kind": "system",
|
||||
"block": map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": "stable",
|
||||
"block_index": 1,
|
||||
},
|
||||
})
|
||||
second := canonicalizeCacheValue(map[string]interface{}{
|
||||
"kind": "system",
|
||||
"block": map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": "stable",
|
||||
"block_index": 2,
|
||||
},
|
||||
})
|
||||
if first == second {
|
||||
t.Fatalf("expected semantic block_index fields to remain fingerprinted")
|
||||
}
|
||||
}
|
||||
|
||||
// TestPromptCacheImplicitBreakpointAtMessageEnd verifies that once any
|
||||
// explicit cache_control breakpoint has been seen, subsequent message-end
|
||||
// boundaries act as implicit breakpoints. This allows multi-turn conversations
|
||||
|
||||
Reference in New Issue
Block a user