fix: stabilize prompt cache fingerprints (#47)

This commit is contained in:
Delicious233
2026-05-12 18:57:12 +08:00
committed by GitHub
parent 08a9747c99
commit 1732b17ff9
2 changed files with 123 additions and 34 deletions

View File

@@ -254,9 +254,10 @@ func flattenClaudeCacheBlocks(req *ClaudeRequest) []cacheablePromptBlock {
"description": tool.Description,
"input_schema": tool.InputSchema,
}
fingerprintValue := stripCachePositionKeys(toolValue)
blocks = append(blocks, cacheablePromptBlock{
Value: toolValue,
Tokens: estimateApproxTokens(canonicalizeCacheValue(toolValue)),
Value: fingerprintValue,
Tokens: estimateApproxTokens(canonicalizeCacheValue(fingerprintValue)),
TTL: normalizePromptCacheTTL(extractPromptCacheTTL(tool)),
})
}
@@ -357,59 +358,52 @@ func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interf
blockValue := wrapper["block"]
ttl := normalizePromptCacheTTL(extractPromptCacheTTL(blockValue))
// Normalize volatile text (e.g. Claude Code's x-anthropic-billing-header
// which drifts on every request) so that fingerprints remain stable across
// requests within the same conversation.
if normalized, changed := normalizeCacheBlockContent(blockValue); changed {
cloned := make(map[string]interface{}, len(wrapper))
for k, v := range wrapper {
cloned[k] = v
}
cloned["block"] = normalized
wrapper = cloned
// Drop volatile billing metadata from the cache fingerprint. Claude Code's
// x-anthropic-billing-header can drift, appear, or disappear across
// otherwise identical requests, and it does not change model semantics.
if isAnthropicBillingHeaderBlock(blockValue) {
return
}
canonical := canonicalizeCacheValue(wrapper)
fingerprintValue := stripCachePositionKeys(wrapper)
canonical := canonicalizeCacheValue(fingerprintValue)
*blocks = append(*blocks, cacheablePromptBlock{
Value: wrapper,
Value: fingerprintValue,
Tokens: estimateApproxTokens(canonical),
TTL: ttl,
IsMessageEnd: isMessageEnd,
})
}
// normalizeCacheBlockContent replaces volatile but semantically irrelevant
// fields with a placeholder so that the cumulative fingerprint stays stable
// across requests in the same session. Currently handles:
// - Claude Code's "x-anthropic-billing-header: ..." system text block
// whose content drifts on every request (version, telemetry hash, etc.)
func normalizeCacheBlockContent(value interface{}) (interface{}, bool) {
func stripCachePositionKeys(value map[string]interface{}) map[string]interface{} {
cloned := make(map[string]interface{}, len(value))
for key, item := range value {
if isCachePositionKey(key) {
continue
}
cloned[key] = item
}
return cloned
}
func isAnthropicBillingHeaderBlock(value interface{}) bool {
blockMap, ok := value.(map[string]interface{})
if !ok {
return value, false
return false
}
// Only normalize text blocks (or blocks without an explicit type but containing text).
if t, ok := blockMap["type"].(string); ok && t != "" && t != "text" {
return value, false
return false
}
text, ok := blockMap["text"].(string)
if !ok {
return value, false
return false
}
trimmed := strings.TrimLeft(text, " \t\r\n")
if !strings.HasPrefix(strings.ToLower(trimmed), "x-anthropic-billing-header:") {
return value, false
}
cloned := make(map[string]interface{}, len(blockMap))
for k, v := range blockMap {
cloned[k] = v
}
cloned["text"] = "__anthropic_billing_header__"
return cloned, true
return strings.HasPrefix(strings.ToLower(trimmed), "x-anthropic-billing-header:")
}
func extractPromptCacheTTL(value interface{}) time.Duration {
@@ -590,6 +584,15 @@ func writeCanonicalJSON(buf *bytes.Buffer, value interface{}) {
}
}
func isCachePositionKey(key string) bool {
switch key {
case "tool_index", "system_index", "message_index", "block_index":
return true
default:
return false
}
}
func writeHashChunk(hasher hashWriter, chunk string) {
length := strconv.Itoa(len(chunk))
hasher.Write([]byte(length))

View File

@@ -77,7 +77,7 @@ func TestBuildClaudeUsageMapIncludesCacheFields(t *testing.T) {
// TestPromptCacheStableAcrossBillingHeaderDrift verifies that Claude Code's
// per-request "x-anthropic-billing-header: cc_version=...; cch=...;" system
// block (whose content drifts on every request) does not break cache hits.
// The normalization logic should ensure the same conversation still matches.
// The tracker should ignore that metadata when fingerprinting cached prefixes.
func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
tracker := newPromptCacheTracker(time.Hour)
mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
@@ -124,6 +124,92 @@ func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
}
}
func TestPromptCacheStableWhenBillingHeaderAppearsOrDisappears(t *testing.T) {
tracker := newPromptCacheTracker(time.Hour)
mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
build := func(includeBilling bool) *ClaudeRequest {
system := []interface{}{}
if includeBilling {
system = append(system, map[string]interface{}{
"type": "text",
"text": "x-anthropic-billing-header: cc_version=2.1.87.1; cch=aaaa;",
})
}
system = append(system, map[string]interface{}{
"type": "text",
"text": mainSystem,
"cache_control": map[string]interface{}{
"type": "ephemeral",
},
})
return &ClaudeRequest{
Model: "claude-sonnet-4.5",
System: system,
Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
}
}
withBilling := tracker.BuildClaudeProfile(build(true), 2048)
if withBilling == nil {
t.Fatalf("profile with billing header should be built")
}
tracker.Update("acct-1", withBilling)
withoutBilling := tracker.BuildClaudeProfile(build(false), 2048)
if withoutBilling == nil {
t.Fatalf("profile without billing header should be built")
}
result := tracker.Compute("acct-1", withoutBilling)
if result.CacheReadInputTokens == 0 {
t.Fatalf("expected cache read when billing header disappears, got %+v", result)
}
}
func TestCanonicalCacheValueIgnoresPositionKeys(t *testing.T) {
first := canonicalizeCacheValue(stripCachePositionKeys(map[string]interface{}{
"kind": "system",
"system_index": 0,
"block": map[string]interface{}{
"type": "text",
"text": "stable",
},
}))
second := canonicalizeCacheValue(stripCachePositionKeys(map[string]interface{}{
"kind": "system",
"system_index": 1,
"block": map[string]interface{}{
"type": "text",
"text": "stable",
},
}))
if first != second {
t.Fatalf("expected position keys to be ignored, got %q vs %q", first, second)
}
}
func TestCanonicalCacheValuePreservesSemanticPositionKeys(t *testing.T) {
first := canonicalizeCacheValue(map[string]interface{}{
"kind": "system",
"block": map[string]interface{}{
"type": "text",
"text": "stable",
"block_index": 1,
},
})
second := canonicalizeCacheValue(map[string]interface{}{
"kind": "system",
"block": map[string]interface{}{
"type": "text",
"text": "stable",
"block_index": 2,
},
})
if first == second {
t.Fatalf("expected semantic block_index fields to remain fingerprinted")
}
}
// TestPromptCacheImplicitBreakpointAtMessageEnd verifies that once any
// explicit cache_control breakpoint has been seen, subsequent message-end
// boundaries act as implicit breakpoints. This allows multi-turn conversations