From a24529d7838470e44783e90658e349f59b1d8b03 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 18:57:40 +0800
Subject: [PATCH 01/22] chore: sync dev branch proxy and workflow updates

---
 .dockerignore                |  10 +
 .github/workflows/docker.yml |   4 +-
 config/config.go             |  52 +++-
 proxy/cache_tracker.go       | 511 +++++++++++++++++++++++++++++++++++
 proxy/cache_tracker_test.go  |  73 +++++
 proxy/handler.go             | 134 +++++++--
 proxy/handler_test.go        |  50 ++++
 proxy/kiro.go                |  24 +-
 proxy/kiro_api.go            |  36 +--
 proxy/kiro_headers.go        |  68 +++++
 proxy/kiro_headers_test.go   |  43 +++
 proxy/translator.go          |  91 ++++---
 proxy/translator_test.go     |  66 ++++-
 13 files changed, 1062 insertions(+), 100 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 proxy/cache_tracker.go
 create mode 100644 proxy/cache_tracker_test.go
 create mode 100644 proxy/kiro_headers.go
 create mode 100644 proxy/kiro_headers_test.go

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..a43909b
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,10 @@
+.git
+.github
+Kiro-Go
+data
+backup*
+PR-*.md
+docker-compose.yml
+README.md
+README_CN.md
+*.log
\ No newline at end of file
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index b1ad3d1..4727b6d 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -2,10 +2,10 @@ name: Build Docker Image
 
 on:
   push:
-    branches: [main, master]
+    branches: [main, master, dev]
     tags: ['v*']
   pull_request:
-    branches: [main, master]
+    branches: [main, master, dev]
   workflow_dispatch:
 
 env:
diff --git a/config/config.go b/config/config.go
index e2a7c58..a70eaea 100644
--- a/config/config.go
+++ b/config/config.go
@@ -15,6 +15,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"os"
+	"runtime"
 	"sync"
 )
 
@@ -94,7 +95,10 @@ type Config struct {
 	Host          string    `json:"host"`             // HTTP server bind address (default: 0.0.0.0)
 	ApiKey        string    `json:"apiKey,omitempty"` // API key for client authentication
 	RequireApiKey bool      `json:"requireApiKey"`    // Whether to enforce API key validation
-	Accounts      []Account `json:"accounts"`         // Registered Kiro accounts
+	KiroVersion   string    `json:"kiroVersion,omitempty"`
+	SystemVersion string    `json:"systemVersion,omitempty"`
+	NodeVersion   string    `json:"nodeVersion,omitempty"`
+	Accounts      []Account `json:"accounts"` // Registered Kiro accounts
 
 	// Thinking mode configuration for extended reasoning output
 	ThinkingSuffix       string `json:"thinkingSuffix,omitempty"`       // Model suffix to trigger thinking mode (default: "-thinking")
@@ -440,3 +444,49 @@ func UpdatePreferredEndpoint(endpoint string) error {
 	cfg.PreferredEndpoint = endpoint
 	return Save()
 }
+
+type KiroClientConfig struct {
+	KiroVersion   string
+	SystemVersion string
+	NodeVersion   string
+}
+
+func GetKiroClientConfig() KiroClientConfig {
+	cfgLock.RLock()
+	defer cfgLock.RUnlock()
+
+	kiroVersion := "0.11.107"
+	if cfg != nil && cfg.KiroVersion != "" {
+		kiroVersion = cfg.KiroVersion
+	}
+
+	systemVersion := ""
+	if cfg != nil {
+		systemVersion = cfg.SystemVersion
+	}
+	if systemVersion == "" {
+		systemVersion = defaultSystemVersion()
+	}
+
+	nodeVersion := "22.22.0"
+	if cfg != nil && cfg.NodeVersion != "" {
+		nodeVersion = cfg.NodeVersion
+	}
+
+	return KiroClientConfig{
+		KiroVersion:   kiroVersion,
+		SystemVersion: systemVersion,
+		NodeVersion:   nodeVersion,
+	}
+}
+
+func defaultSystemVersion() string {
+	switch runtime.GOOS {
+	case "windows":
+		return "win32#10.0.22631"
+	case "darwin":
+		return "darwin#24.6.0"
+	default:
+		return "linux#6.6.87"
+	}
+}
diff --git a/proxy/cache_tracker.go b/proxy/cache_tracker.go
new file mode 100644
index 0000000..338f208
--- /dev/null
+++ b/proxy/cache_tracker.go
@@ -0,0 +1,511 @@
+package proxy
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/json"
+	"sort"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+)
+
+const defaultPromptCacheTTL = 5 * time.Minute
+
+type promptCacheUsage struct {
+	CacheCreationInputTokens   int
+	CacheReadInputTokens       int
+	CacheCreation5mInputTokens int
+	CacheCreation1hInputTokens int
+}
+
+type promptCacheBreakpoint struct {
+	Fingerprint      [32]byte
+	CumulativeTokens int
+	TTL              time.Duration
+}
+
+type promptCacheProfile struct {
+	Breakpoints      []promptCacheBreakpoint
+	TotalInputTokens int
+}
+
+type promptCacheEntry struct {
+	ExpiresAt time.Time
+	TTL       time.Duration
+}
+
+type promptCacheTracker struct {
+	mu               sync.Mutex
+	entriesByAccount map[string]map[[32]byte]promptCacheEntry
+	maxSupportedTTL  time.Duration
+}
+
+func newPromptCacheTracker(maxTTL time.Duration) *promptCacheTracker {
+	if maxTTL <= 0 {
+		maxTTL = defaultPromptCacheTTL
+	}
+	return &promptCacheTracker{
+		entriesByAccount: make(map[string]map[[32]byte]promptCacheEntry),
+		maxSupportedTTL:  maxTTL,
+	}
+}
+
+func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTokens int) *promptCacheProfile {
+	blocks := flattenClaudeCacheBlocks(req)
+	if len(blocks) == 0 {
+		return nil
+	}
+
+	hasher := sha256.New()
+	breakpoints := make([]promptCacheBreakpoint, 0)
+	cumulativeTokens := 0
+
+	for _, block := range blocks {
+		canonical := canonicalizeCacheValue(block.Value)
+		writeHashChunk(hasher, canonical)
+		cumulativeTokens += block.Tokens
+
+		if block.TTL <= 0 {
+			continue
+		}
+
+		var fingerprint [32]byte
+		copy(fingerprint[:], hasher.Sum(nil))
+		breakpoints = append(breakpoints, promptCacheBreakpoint{
+			Fingerprint:      fingerprint,
+			CumulativeTokens: cumulativeTokens,
+			TTL:              block.TTL,
+		})
+	}
+
+	if len(breakpoints) == 0 {
+		return nil
+	}
+
+	if totalInputTokens < cumulativeTokens {
+		totalInputTokens = cumulativeTokens
+	}
+
+	return &promptCacheProfile{
+		Breakpoints:      breakpoints,
+		TotalInputTokens: totalInputTokens,
+	}
+}
+
+func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfile) promptCacheUsage {
+	if t == nil || profile == nil || len(profile.Breakpoints) == 0 || accountID == "" {
+		return promptCacheUsage{}
+	}
+
+	last := profile.Breakpoints[len(profile.Breakpoints)-1]
+	lastTokens := minInt(last.CumulativeTokens, profile.TotalInputTokens)
+	now := time.Now()
+
+	t.mu.Lock()
+	defer t.mu.Unlock()
+	t.pruneExpiredLocked(now)
+
+	entries := t.entriesByAccount[accountID]
+	if len(entries) == 0 {
+		cache5m, cache1h := computePromptCacheTTLBreakdown(profile, 0)
+		return promptCacheUsage{
+			CacheCreationInputTokens:   lastTokens,
+			CacheReadInputTokens:       0,
+			CacheCreation5mInputTokens: cache5m,
+			CacheCreation1hInputTokens: cache1h,
+		}
+	}
+
+	matchedTokens := 0
+	for i := len(profile.Breakpoints) - 1; i >= 0; i-- {
+		breakpoint := profile.Breakpoints[i]
+		entry, ok := entries[breakpoint.Fingerprint]
+		if !ok || entry.ExpiresAt.Before(now) {
+			continue
+		}
+		entry.ExpiresAt = now.Add(entry.TTL)
+		entries[breakpoint.Fingerprint] = entry
+		matchedTokens = minInt(breakpoint.CumulativeTokens, profile.TotalInputTokens)
+		break
+	}
+
+	creation := maxInt(lastTokens-matchedTokens, 0)
+	cache5m, cache1h := computePromptCacheTTLBreakdown(profile, matchedTokens)
+	return promptCacheUsage{
+		CacheCreationInputTokens:   creation,
+		CacheReadInputTokens:       matchedTokens,
+		CacheCreation5mInputTokens: cache5m,
+		CacheCreation1hInputTokens: cache1h,
+	}
+}
+
+func (t *promptCacheTracker) Update(accountID string, profile *promptCacheProfile) {
+	if t == nil || profile == nil || len(profile.Breakpoints) == 0 || accountID == "" {
+		return
+	}
+
+	now := time.Now()
+	t.mu.Lock()
+	defer t.mu.Unlock()
+	t.pruneExpiredLocked(now)
+
+	entries := t.entriesByAccount[accountID]
+	if entries == nil {
+		entries = make(map[[32]byte]promptCacheEntry)
+		t.entriesByAccount[accountID] = entries
+	}
+
+	for _, breakpoint := range profile.Breakpoints {
+		entries[breakpoint.Fingerprint] = promptCacheEntry{
+			ExpiresAt: now.Add(breakpoint.TTL),
+			TTL:       breakpoint.TTL,
+		}
+	}
+}
+
+func (t *promptCacheTracker) pruneExpiredLocked(now time.Time) {
+	for accountID, entries := range t.entriesByAccount {
+		for fingerprint, entry := range entries {
+			if !entry.ExpiresAt.After(now) {
+				delete(entries, fingerprint)
+			}
+		}
+		if len(entries) == 0 {
+			delete(t.entriesByAccount, accountID)
+		}
+	}
+}
+
+type cacheablePromptBlock struct {
+	Value  interface{}
+	Tokens int
+	TTL    time.Duration
+}
+
+func flattenClaudeCacheBlocks(req *ClaudeRequest) []cacheablePromptBlock {
+	blocks := make([]cacheablePromptBlock, 0)
+	blocks = append(blocks, buildCachePreludeBlock(req))
+
+	for toolIndex, tool := range req.Tools {
+		toolValue := map[string]interface{}{
+			"kind":         "tool",
+			"tool_index":   toolIndex,
+			"name":         tool.Name,
+			"description":  tool.Description,
+			"input_schema": tool.InputSchema,
+		}
+		blocks = append(blocks, cacheablePromptBlock{
+			Value:  toolValue,
+			Tokens: estimateApproxTokens(canonicalizeCacheValue(toolValue)),
+			TTL:    normalizePromptCacheTTL(extractPromptCacheTTL(tool)),
+		})
+	}
+
+	appendSystemCacheBlocks(&blocks, req.System)
+
+	for messageIndex, msg := range req.Messages {
+		appendMessageCacheBlocks(&blocks, messageIndex, msg)
+	}
+
+	return blocks
+}
+
+func buildCachePreludeBlock(req *ClaudeRequest) cacheablePromptBlock {
+	prelude := map[string]interface{}{
+		"kind":        "request_prelude",
+		"model":       req.Model,
+		"tool_choice": req.ToolChoice,
+	}
+	return cacheablePromptBlock{
+		Value:  prelude,
+		Tokens: estimateApproxTokens(canonicalizeCacheValue(prelude)),
+	}
+}
+
+func appendSystemCacheBlocks(blocks *[]cacheablePromptBlock, system interface{}) {
+	switch v := system.(type) {
+	case string:
+		appendPromptBlock(blocks, map[string]interface{}{
+			"kind":         "system",
+			"system_index": 0,
+			"block": map[string]interface{}{
+				"type": "text",
+				"text": v,
+			},
+		})
+	case []interface{}:
+		for i, block := range v {
+			appendPromptBlock(blocks, map[string]interface{}{
+				"kind":         "system",
+				"system_index": i,
+				"block":        block,
+			})
+		}
+	case []string:
+		for i, block := range v {
+			appendPromptBlock(blocks, map[string]interface{}{
+				"kind":         "system",
+				"system_index": i,
+				"block": map[string]interface{}{
+					"type": "text",
+					"text": block,
+				},
+			})
+		}
+	}
+}
+
+func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int, msg ClaudeMessage) {
+	role := msg.Role
+	switch content := msg.Content.(type) {
+	case string:
+		appendPromptBlock(blocks, map[string]interface{}{
+			"kind":          "message",
+			"message_index": messageIndex,
+			"role":          role,
+			"block_index":   0,
+			"block": map[string]interface{}{
+				"type": "text",
+				"text": content,
+			},
+		})
+	case []interface{}:
+		for blockIndex, block := range content {
+			appendPromptBlock(blocks, map[string]interface{}{
+				"kind":          "message",
+				"message_index": messageIndex,
+				"role":          role,
+				"block_index":   blockIndex,
+				"block":         block,
+			})
+		}
+	default:
+		if content != nil {
+			appendPromptBlock(blocks, map[string]interface{}{
+				"kind":          "message",
+				"message_index": messageIndex,
+				"role":          role,
+				"block_index":   0,
+				"block":         content,
+			})
+		}
+	}
+}
+
+func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interface{}) {
+	blockValue, _ := wrapper["block"]
+	ttl := normalizePromptCacheTTL(extractPromptCacheTTL(blockValue))
+	canonical := canonicalizeCacheValue(wrapper)
+	*blocks = append(*blocks, cacheablePromptBlock{
+		Value:  wrapper,
+		Tokens: estimateApproxTokens(canonical),
+		TTL:    ttl,
+	})
+}
+
+func extractPromptCacheTTL(value interface{}) time.Duration {
+	block, ok := value.(map[string]interface{})
+	if !ok {
+		if raw, err := json.Marshal(value); err == nil {
+			var decoded map[string]interface{}
+			if json.Unmarshal(raw, &decoded) == nil {
+				block = decoded
+				ok = true
+			}
+		}
+	}
+	if !ok {
+		return 0
+	}
+
+	rawCache, ok := block["cache_control"]
+	if !ok {
+		return 0
+	}
+	cacheControl, ok := rawCache.(map[string]interface{})
+	if !ok {
+		return 0
+	}
+	cacheType, _ := cacheControl["type"].(string)
+	if !strings.EqualFold(cacheType, "ephemeral") {
+		return 0
+	}
+
+	if ttl, ok := parsePromptCacheTTLValue(cacheControl["ttl"]); ok {
+		return ttl
+	}
+	return defaultPromptCacheTTL
+}
+
+func parsePromptCacheTTLValue(value interface{}) (time.Duration, bool) {
+	switch v := value.(type) {
+	case string:
+		trimmed := strings.TrimSpace(strings.ToLower(v))
+		if trimmed == "" {
+			return 0, false
+		}
+		if d, err := time.ParseDuration(trimmed); err == nil {
+			return d, true
+		}
+		if seconds, err := strconv.Atoi(trimmed); err == nil {
+			return time.Duration(seconds) * time.Second, true
+		}
+	case float64:
+		if v > 0 {
+			return time.Duration(v) * time.Second, true
+		}
+	case int:
+		if v > 0 {
+			return time.Duration(v) * time.Second, true
+		}
+	case int64:
+		if v > 0 {
+			return time.Duration(v) * time.Second, true
+		}
+	}
+	return 0, false
+}
+
+func normalizePromptCacheTTL(ttl time.Duration) time.Duration {
+	if ttl <= 0 {
+		return 0
+	}
+	if ttl > time.Hour {
+		return time.Hour
+	}
+	if ttl > defaultPromptCacheTTL {
+		return time.Hour
+	}
+	return defaultPromptCacheTTL
+}
+
+func computePromptCacheTTLBreakdown(profile *promptCacheProfile, matchedTokens int) (int, int) {
+	if profile == nil || len(profile.Breakpoints) == 0 {
+		return 0, 0
+	}
+
+	cache5m := 0
+	cache1h := 0
+	previous := matchedTokens
+	for _, breakpoint := range profile.Breakpoints {
+		current := minInt(breakpoint.CumulativeTokens, profile.TotalInputTokens)
+		if current <= previous {
+			continue
+		}
+		delta := current - previous
+		if breakpoint.TTL >= time.Hour {
+			cache1h += delta
+		} else {
+			cache5m += delta
+		}
+		previous = current
+	}
+	return cache5m, cache1h
+}
+
+func billedClaudeInputTokens(inputTokens int, usage promptCacheUsage) int {
+	return maxInt(inputTokens-usage.CacheCreationInputTokens-usage.CacheReadInputTokens, 0)
+}
+
+func buildClaudeUsageMap(inputTokens, outputTokens int, usage promptCacheUsage, includeCache bool) map[string]interface{} {
+	result := map[string]interface{}{
+		"input_tokens":  billedClaudeInputTokens(inputTokens, usage),
+		"output_tokens": outputTokens,
+	}
+	if !includeCache {
+		return result
+	}
+	result["cache_creation_input_tokens"] = usage.CacheCreationInputTokens
+	result["cache_read_input_tokens"] = usage.CacheReadInputTokens
+	result["cache_creation"] = map[string]int{
+		"ephemeral_5m_input_tokens": usage.CacheCreation5mInputTokens,
+		"ephemeral_1h_input_tokens": usage.CacheCreation1hInputTokens,
+	}
+	return result
+}
+
+func canonicalizeCacheValue(value interface{}) string {
+	var buf bytes.Buffer
+	writeCanonicalJSON(&buf, value)
+	return buf.String()
+}
+
+func writeCanonicalJSON(buf *bytes.Buffer, value interface{}) {
+	switch v := value.(type) {
+	case nil:
+		buf.WriteString("null")
+	case string:
+		encoded, _ := json.Marshal(v)
+		buf.Write(encoded)
+	case bool:
+		if v {
+			buf.WriteString("true")
+		} else {
+			buf.WriteString("false")
+		}
+	case float64, float32, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, json.Number:
+		encoded, _ := json.Marshal(v)
+		buf.Write(encoded)
+	case []interface{}:
+		buf.WriteByte('[')
+		for i, item := range v {
+			if i > 0 {
+				buf.WriteByte(',')
+			}
+			writeCanonicalJSON(buf, item)
+		}
+		buf.WriteByte(']')
+	case map[string]interface{}:
+		buf.WriteByte('{')
+		keys := make([]string, 0, len(v))
+		for key := range v {
+			if key == "cache_control" {
+				continue
+			}
+			keys = append(keys, key)
+		}
+		sort.Strings(keys)
+		for i, key := range keys {
+			if i > 0 {
+				buf.WriteByte(',')
+			}
+			encoded, _ := json.Marshal(key)
+			buf.Write(encoded)
+			buf.WriteByte(':')
+			writeCanonicalJSON(buf, v[key])
+		}
+		buf.WriteByte('}')
+	default:
+		encoded, _ := json.Marshal(v)
+		buf.Write(encoded)
+	}
+}
+
+func writeHashChunk(hasher hashWriter, chunk string) {
+	length := strconv.Itoa(len(chunk))
+	hasher.Write([]byte(length))
+	hasher.Write([]byte{0})
+	hasher.Write([]byte(chunk))
+	hasher.Write([]byte{0})
+}
+
+type hashWriter interface {
+	Write([]byte) (int, error)
+	Sum([]byte) []byte
+}
+
+func minInt(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func maxInt(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}
diff --git a/proxy/cache_tracker_test.go b/proxy/cache_tracker_test.go
new file mode 100644
index 0000000..1beba02
--- /dev/null
+++ b/proxy/cache_tracker_test.go
@@ -0,0 +1,73 @@
+package proxy
+
+import (
+	"testing"
+	"time"
+)
+
+func TestPromptCacheTrackerComputeAndUpdate(t *testing.T) {
+	tracker := newPromptCacheTracker(time.Hour)
+	req := &ClaudeRequest{
+		Model: "claude-sonnet-4.5",
+		System: []interface{}{
+			map[string]interface{}{
+				"type": "text",
+				"text": "system prompt",
+				"cache_control": map[string]interface{}{
+					"type": "ephemeral",
+				},
+			},
+		},
+		Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
+	}
+
+	profile := tracker.BuildClaudeProfile(req, 120)
+	if profile == nil {
+		t.Fatalf("expected cache profile to be built")
+	}
+
+	first := tracker.Compute("acct-1", profile)
+	if first.CacheCreationInputTokens <= 0 {
+		t.Fatalf("expected first request to create cache tokens, got %+v", first)
+	}
+	if first.CacheReadInputTokens != 0 {
+		t.Fatalf("expected first request to have zero cache reads, got %+v", first)
+	}
+
+	tracker.Update("acct-1", profile)
+	second := tracker.Compute("acct-1", profile)
+	if second.CacheReadInputTokens <= 0 {
+		t.Fatalf("expected repeated request to read cache tokens, got %+v", second)
+	}
+	if second.CacheCreationInputTokens != 0 {
+		t.Fatalf("expected repeated request to avoid cache creation, got %+v", second)
+	}
+}
+
+func TestBuildClaudeUsageMapIncludesCacheFields(t *testing.T) {
+	usage := promptCacheUsage{
+		CacheCreationInputTokens:   30,
+		CacheReadInputTokens:       20,
+		CacheCreation5mInputTokens: 10,
+		CacheCreation1hInputTokens: 20,
+	}
+
+	m := buildClaudeUsageMap(100, 50, usage, true)
+
+	if got := m["input_tokens"]; got != 50 {
+		t.Fatalf("expected billed input tokens 50, got %#v", got)
+	}
+	if got := m["cache_creation_input_tokens"]; got != 30 {
+		t.Fatalf("expected cache creation tokens 30, got %#v", got)
+	}
+	if got := m["cache_read_input_tokens"]; got != 20 {
+		t.Fatalf("expected cache read tokens 20, got %#v", got)
+	}
+	creation, ok := m["cache_creation"].(map[string]int)
+	if !ok {
+		t.Fatalf("expected typed cache creation map, got %#v", m["cache_creation"])
+	}
+	if creation["ephemeral_5m_input_tokens"] != 10 || creation["ephemeral_1h_input_tokens"] != 20 {
+		t.Fatalf("unexpected ttl breakdown: %#v", creation)
+	}
+}
diff --git a/proxy/handler.go b/proxy/handler.go
index 2b1490b..eb3bd36 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -33,6 +33,7 @@ type Handler struct {
 	cachedModels    []ModelInfo
 	modelsCacheMu   sync.RWMutex
 	modelsCacheTime int64
+	promptCache     *promptCacheTracker
 }
 
 type thinkingStreamSource int
@@ -61,6 +62,77 @@ func allowTagSource(source *thinkingStreamSource) bool {
 	return *source == thinkingSourceTagBlock
 }
 
+func validateClaudeRequestShape(req *ClaudeRequest) string {
+	if len(req.Messages) == 0 {
+		return "messages must not be empty"
+	}
+
+	hasUserContext := false
+	lastRole := ""
+	for _, msg := range req.Messages {
+		role := strings.TrimSpace(msg.Role)
+		if role == "" {
+			continue
+		}
+		lastRole = role
+		if role != "user" {
+			continue
+		}
+
+		text, images, toolResults := extractClaudeUserContent(msg.Content)
+		if normalizeUserContent(text, len(images) > 0) != "" || len(toolResults) > 0 {
+			hasUserContext = true
+		}
+	}
+
+	if lastRole == "assistant" {
+		return "assistant-prefill final message is not supported; last message must be user"
+	}
+	if !hasUserContext {
+		return "at least one non-empty user message is required"
+	}
+	return ""
+}
+
+func validateOpenAIRequestShape(req *OpenAIRequest) string {
+	if len(req.Messages) == 0 {
+		return "messages must not be empty"
+	}
+
+	hasNonSystem := false
+	hasUserContext := false
+	lastRole := ""
+	for _, msg := range req.Messages {
+		role := strings.TrimSpace(msg.Role)
+		if role == "" {
+			continue
+		}
+		if role != "system" {
+			hasNonSystem = true
+			lastRole = role
+		}
+
+		if role != "user" {
+			continue
+		}
+		text, images := extractOpenAIUserContent(msg.Content)
+		if normalizeUserContent(text, len(images) > 0) != "" {
+			hasUserContext = true
+		}
+	}
+
+	if !hasNonSystem {
+		return "at least one non-system message is required"
+	}
+	if lastRole == "assistant" {
+		return "assistant-prefill final message is not supported; last message must be user or tool"
+	}
+	if !hasUserContext {
+		return "at least one non-empty user message is required"
+	}
+	return ""
+}
+
 func NewHandler() *Handler {
 	totalReq, successReq, failedReq, totalTokens, totalCredits := config.GetStats()
 	h := &Handler{
@@ -73,6 +145,7 @@ func NewHandler() *Handler {
 		startTime:       time.Now().Unix(),
 		stopRefresh:     make(chan struct{}),
 		stopStatsSaver:  make(chan struct{}),
+		promptCache:     newPromptCacheTracker(defaultPromptCacheTTL),
 	}
 	// 启动后台刷新
 	go h.backgroundRefresh()
@@ -286,8 +359,8 @@ func (h *Handler) handleModels(w http.ResponseWriter, r *http.Request) {
 			buildModelInfo("claude-sonnet-4.6"+thinkingSuffix, "anthropic", true),
 			buildModelInfo("claude-opus-4.6", "anthropic", true),
 			buildModelInfo("claude-opus-4.6"+thinkingSuffix, "anthropic", true),
-	        buildModelInfo("claude-opus-4-7", "anthropic", true),
-            buildModelInfo("claude-opus-4-7"+thinkingSuffix, "anthropic", true),
+			buildModelInfo("claude-opus-4-7", "anthropic", true),
+			buildModelInfo("claude-opus-4-7"+thinkingSuffix, "anthropic", true),
 			buildModelInfo("claude-sonnet-4.5", "anthropic", true),
 			buildModelInfo("claude-sonnet-4.5"+thinkingSuffix, "anthropic", true),
 			buildModelInfo("claude-sonnet-4", "anthropic", true),
@@ -433,6 +506,10 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
 		h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON: "+err.Error())
 		return
 	}
+	if msg := validateClaudeRequestShape(&req); msg != "" {
+		h.sendClaudeError(w, 400, "invalid_request_error", msg)
+		return
+	}
 
 	// 获取账号
 	account := h.pool.GetNext()
@@ -452,20 +529,22 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
 	actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix)
 	req.Model = actualModel
 	estimatedInputTokens := estimateClaudeRequestInputTokens(&req)
+	cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens)
+	cacheUsage := h.promptCache.Compute(account.ID, cacheProfile)
 
 	// 转换请求
 	kiroPayload := ClaudeToKiro(&req, thinking)
 
 	// 流式或非流式
 	if req.Stream {
-		h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens)
+		h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
 	} else {
-		h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens)
+		h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
 	}
 }
 
 // handleClaudeStream Claude 流式响应
-func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int) {
+func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
 	w.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
 	w.Header().Set("Cache-Control", "no-cache")
 	w.Header().Set("Connection", "keep-alive")
@@ -737,10 +816,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 			"model":         model,
 			"stop_reason":   nil,
 			"stop_sequence": nil,
-			"usage": map[string]int{
-				"input_tokens":  startInputTokens,
-				"output_tokens": 0,
-			},
+			"usage":         buildClaudeUsageMap(startInputTokens, 0, cacheUsage, cacheProfile != nil),
 		},
 	})
 
@@ -827,7 +903,9 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 	}
 	closeActiveBlock()
 
-	inputTokens = estimatedInputTokens
+	if inputTokens <= 0 {
+		inputTokens = estimatedInputTokens
+	}
 	outputContent, extractedReasoning := extractThinkingFromContent(rawContentBuilder.String())
 	thinkingOutput := rawThinkingBuilder.String()
 	if thinking && thinkingOutput == "" && extractedReasoning != "" {
@@ -841,6 +919,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 	h.recordSuccess(inputTokens, outputTokens, credits)
 	h.pool.RecordSuccess(account.ID)
 	h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
+	h.promptCache.Update(account.ID, cacheProfile)
 
 	// 发送 message_delta
 	stopReason := "end_turn"
@@ -853,10 +932,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 		"delta": map[string]interface{}{
 			"stop_reason": stopReason,
 		},
-		"usage": map[string]int{
-			"input_tokens":  inputTokens,
-			"output_tokens": outputTokens,
-		},
+		"usage": buildClaudeUsageMap(inputTokens, outputTokens, cacheUsage, cacheProfile != nil),
 	})
 
 	h.sendSSE(w, flusher, "message_stop", map[string]interface{}{
@@ -925,7 +1001,7 @@ func (h *Handler) recordFailure() {
 }
 
 // handleClaudeNonStream Claude 非流式响应
-func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int) {
+func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
 	var content string
 	var thinkingContent string
 	var toolUses []KiroToolUse
@@ -973,12 +1049,15 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
 		thinkingContent = ""
 	}
 
-	inputTokens = estimatedInputTokens
+	if inputTokens <= 0 {
+		inputTokens = estimatedInputTokens
+	}
 	outputTokens = estimateClaudeOutputTokens(finalContent, thinkingContent, toolUses)
 
 	h.recordSuccess(inputTokens, outputTokens, credits)
 	h.pool.RecordSuccess(account.ID)
 	h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
+	h.promptCache.Update(account.ID, cacheProfile)
 
 	if thinking && thinkingContent != "" {
 		switch thinkingFormat {
@@ -993,6 +1072,15 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
 	}
 
 	resp := KiroToClaudeResponse(finalContent, thinkingContent, toolUses, inputTokens, outputTokens, model)
+	resp.Usage.InputTokens = billedClaudeInputTokens(inputTokens, cacheUsage)
+	resp.Usage.CacheCreationInputTokens = cacheUsage.CacheCreationInputTokens
+	resp.Usage.CacheReadInputTokens = cacheUsage.CacheReadInputTokens
+	if cacheProfile != nil {
+		resp.Usage.CacheCreation = &ClaudeCacheCreationUsage{
+			Ephemeral5mInputTokens: cacheUsage.CacheCreation5mInputTokens,
+			Ephemeral1hInputTokens: cacheUsage.CacheCreation1hInputTokens,
+		}
+	}
 	w.Header().Set("Content-Type", "application/json; charset=utf-8")
 	json.NewEncoder(w).Encode(resp)
 }
@@ -1027,6 +1115,10 @@ func (h *Handler) handleOpenAIChat(w http.ResponseWriter, r *http.Request) {
 		h.sendOpenAIError(w, 400, "invalid_request_error", "Invalid JSON")
 		return
 	}
+	if msg := validateOpenAIRequestShape(&req); msg != "" {
+		h.sendOpenAIError(w, 400, "invalid_request_error", msg)
+		return
+	}
 
 	account := h.pool.GetNext()
 	if account == nil {
@@ -1382,7 +1474,9 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco
 		eventThinkingOpen = false
 	}
 
-	inputTokens = estimatedInputTokens
+	if inputTokens <= 0 {
+		inputTokens = estimatedInputTokens
+	}
 	outputContent, extractedReasoning := extractThinkingFromContent(rawContentBuilder.String())
 	reasoningOutput := rawReasoningBuilder.String()
 	if thinking && reasoningOutput == "" && extractedReasoning != "" {
@@ -1467,7 +1561,9 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A
 		reasoningContent = ""
 	}
 
-	inputTokens = estimatedInputTokens
+	if inputTokens <= 0 {
+		inputTokens = estimatedInputTokens
+	}
 	outputTokens = estimateOpenAIOutputTokens(finalContent, reasoningContent, toolUses)
 
 	h.recordSuccess(inputTokens, outputTokens, credits)
@@ -1814,7 +1910,7 @@ func (h *Handler) apiBatchAccounts(w http.ResponseWriter, r *http.Request) {
 		}
 		h.pool.Reload()
 		json.NewEncoder(w).Encode(map[string]interface{}{
-			"success": true,
+			"success":   true,
 			"refreshed": successCount,
 			"failed":    failCount,
 		})
diff --git a/proxy/handler_test.go b/proxy/handler_test.go
index e45b8dd..a435dec 100644
--- a/proxy/handler_test.go
+++ b/proxy/handler_test.go
@@ -48,3 +48,53 @@ func TestThinkingSourceSameSourceRemainsAllowed(t *testing.T) {
 		t.Fatalf("expected repeated reasoning source selection to stay allowed")
 	}
 }
+
+func TestValidateOpenAIRequestShapeRejectsAssistantPrefill(t *testing.T) {
+	req := &OpenAIRequest{
+		Messages: []OpenAIMessage{
+			{Role: "user", Content: "hello"},
+			{Role: "assistant", Content: "prefill"},
+		},
+	}
+
+	if msg := validateOpenAIRequestShape(req); msg == "" {
+		t.Fatalf("expected assistant-prefill final message to be rejected")
+	}
+}
+
+func TestValidateOpenAIRequestShapeAllowsToolResultFinalTurn(t *testing.T) {
+	req := &OpenAIRequest{
+		Messages: []OpenAIMessage{
+			{Role: "user", Content: "find weather"},
+			{
+				Role: "assistant",
+				ToolCalls: []ToolCall{{
+					ID:   "call_1",
+					Type: "function",
+					Function: struct {
+						Name      string `json:"name"`
+						Arguments string `json:"arguments"`
+					}{Name: "get_weather", Arguments: "{}"},
+				}},
+			},
+			{Role: "tool", ToolCallID: "call_1", Content: "sunny"},
+		},
+	}
+
+	if msg := validateOpenAIRequestShape(req); msg != "" {
+		t.Fatalf("expected tool-result final turn to be valid, got %q", msg)
+	}
+}
+
+func TestValidateClaudeRequestShapeRejectsAssistantPrefill(t *testing.T) {
+	req := &ClaudeRequest{
+		Messages: []ClaudeMessage{
+			{Role: "user", Content: "hello"},
+			{Role: "assistant", Content: "prefill"},
+		},
+	}
+
+	if msg := validateClaudeRequestShape(req); msg == "" {
+		t.Fatalf("expected assistant-prefill final message to be rejected")
+	}
+}
diff --git a/proxy/kiro.go b/proxy/kiro.go
index a58eff8..00109b7 100644
--- a/proxy/kiro.go
+++ b/proxy/kiro.go
@@ -9,6 +9,7 @@ import (
 	"io"
 	"kiro-api-proxy/config"
 	"net/http"
+	"net/url"
 	"strconv"
 	"strings"
 	"time"
@@ -16,8 +17,6 @@ import (
 	"github.com/google/uuid"
 )
 
-const KiroVersion = "0.7.45"
-
 // 双端点配置（429 时自动 fallback）
 type kiroEndpoint struct {
 	URL       string
@@ -164,17 +163,6 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
 		return err
 	}
 
-	// User-Agent
-	machineId := account.MachineId
-	var userAgent, amzUserAgent string
-	if machineId != "" {
-		userAgent = fmt.Sprintf("aws-sdk-js/1.0.27 ua/2.1 os/linux lang/js md/nodejs#22.21.1 api/codewhispererstreaming#1.0.27 m/E KiroIDE-%s-%s", KiroVersion, machineId)
-		amzUserAgent = fmt.Sprintf("aws-sdk-js/1.0.27 KiroIDE %s %s", KiroVersion, machineId)
-	} else {
-		userAgent = fmt.Sprintf("aws-sdk-js/1.0.27 ua/2.1 os/linux lang/js md/nodejs#22.21.1 api/codewhispererstreaming#1.0.27 m/E KiroIDE-%s", KiroVersion)
-		amzUserAgent = fmt.Sprintf("aws-sdk-js/1.0.27 KiroIDE %s", KiroVersion)
-	}
-
 	// 根据配置排序端点
 	endpoints := getSortedEndpoints(config.GetPreferredEndpoint())
 
@@ -190,16 +178,20 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
 			continue
 		}
 
+		host := ""
+		if parsedURL, parseErr := url.Parse(ep.URL); parseErr == nil {
+			host = parsedURL.Host
+		}
+		headerValues := buildStreamingHeaderValues(account, host)
+
 		req.Header.Set("Content-Type", "application/json")
 		req.Header.Set("Accept", "*/*")
 		req.Header.Set("X-Amz-Target", ep.AmzTarget)
-		req.Header.Set("User-Agent", userAgent)
-		req.Header.Set("X-Amz-User-Agent", amzUserAgent)
+		applyKiroBaseHeaders(req, account, headerValues)
 		req.Header.Set("x-amzn-kiro-agent-mode", "vibe")
 		req.Header.Set("x-amzn-codewhisperer-optout", "true")
 		req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 		req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
-		req.Header.Set("Authorization", "Bearer "+account.AccessToken)
 
 		resp, err := kiroHttpClient.Do(req)
 		if err != nil {
diff --git a/proxy/kiro_api.go b/proxy/kiro_api.go
index 7252182..2b2d3dc 100644
--- a/proxy/kiro_api.go
+++ b/proxy/kiro_api.go
@@ -12,7 +12,6 @@ import (
 
 const (
 	kiroRestAPIBase = "https://codewhisperer.us-east-1.amazonaws.com"
-	kiroVersion     = "0.7.45"
 )
 
 // GetUsageLimits 获取账户使用量和订阅信息
@@ -110,21 +109,14 @@ func ListAvailableModels(account *config.Account) ([]ModelInfo, error) {
 }
 
 func setKiroHeaders(req *http.Request, account *config.Account) {
-	machineId := account.MachineId
-	var userAgent, amzUserAgent string
-	if machineId != "" {
-		userAgent = fmt.Sprintf("aws-sdk-js/1.0.27 ua/2.1 os/linux lang/js md/nodejs#22.21.1 api/codewhispererstreaming#1.0.27 m/E KiroIDE-%s-%s", kiroVersion, machineId)
-		amzUserAgent = fmt.Sprintf("aws-sdk-js/1.0.27 KiroIDE %s %s", kiroVersion, machineId)
-	} else {
-		userAgent = fmt.Sprintf("aws-sdk-js/1.0.27 ua/2.1 os/linux lang/js md/nodejs#22.21.1 api/codewhispererstreaming#1.0.27 m/E KiroIDE-%s", kiroVersion)
-		amzUserAgent = fmt.Sprintf("aws-sdk-js/1.0.27 KiroIDE %s", kiroVersion)
+	host := ""
+	if req.URL != nil {
+		host = req.URL.Host
 	}
+	headerValues := buildRuntimeHeaderValues(account, host)
 
-	req.Header.Set("Authorization", "Bearer "+account.AccessToken)
 	req.Header.Set("Accept", "application/json")
-	req.Header.Set("User-Agent", userAgent)
-	req.Header.Set("x-amz-user-agent", amzUserAgent)
-	req.Header.Set("x-amzn-codewhisperer-optout", "true")
+	applyKiroBaseHeaders(req, account, headerValues)
 }
 
 // RefreshAccountInfo 刷新账户信息（使用量、订阅等）
@@ -156,7 +148,7 @@ func RefreshAccountInfo(account *config.Account) (*config.AccountInfo, error) {
 
 			return nil, fmt.Errorf("Account suspended: %w", err)
 		} else if strings.Contains(errMsg, "403") || strings.Contains(errMsg, "401") ||
-				  strings.Contains(errMsg, "invalid") || strings.Contains(errMsg, "expired") {
+			strings.Contains(errMsg, "invalid") || strings.Contains(errMsg, "expired") {
 			// Token 相关错误，可能需要重新认证
 			fmt.Printf("[RefreshAccountInfo] Authentication error for %s: %v\n", account.Email, err)
 
@@ -286,14 +278,14 @@ type UsageLimitsResponse struct {
 }
 
 type UsageBreakdown struct {
-	ResourceType   string  `json:"resourceType"`
-	CurrentUsage   float64 `json:"currentUsage"`
-	UsageLimit     float64 `json:"usageLimit"`
-	Currency       string  `json:"currency"`
-	Unit           string  `json:"unit"`
-	OverageRate    float64 `json:"overageRate"`
-	FreeTrialInfo  *FreeTrialInfo `json:"freeTrialInfo"`
-	Bonuses        []BonusInfo    `json:"bonuses"`
+	ResourceType  string         `json:"resourceType"`
+	CurrentUsage  float64        `json:"currentUsage"`
+	UsageLimit    float64        `json:"usageLimit"`
+	Currency      string         `json:"currency"`
+	Unit          string         `json:"unit"`
+	OverageRate   float64        `json:"overageRate"`
+	FreeTrialInfo *FreeTrialInfo `json:"freeTrialInfo"`
+	Bonuses       []BonusInfo    `json:"bonuses"`
 }
 
 type FreeTrialInfo struct {
diff --git a/proxy/kiro_headers.go b/proxy/kiro_headers.go
new file mode 100644
index 0000000..c667987
--- /dev/null
+++ b/proxy/kiro_headers.go
@@ -0,0 +1,68 @@
+package proxy
+
+import (
+	"fmt"
+	"kiro-api-proxy/config"
+	"net/http"
+)
+
+const (
+	kiroStreamingSDKVersion = "1.0.34"
+	kiroRuntimeSDKVersion   = "1.0.0"
+)
+
+type kiroHeaderValues struct {
+	UserAgent    string
+	AmzUserAgent string
+	Host         string
+}
+
+func buildStreamingHeaderValues(account *config.Account, host string) kiroHeaderValues {
+	return buildKiroHeaderValues(account, host, "codewhispererstreaming", kiroStreamingSDKVersion, "m/E")
+}
+
+func buildRuntimeHeaderValues(account *config.Account, host string) kiroHeaderValues {
+	return buildKiroHeaderValues(account, host, "codewhispererruntime", kiroRuntimeSDKVersion, "m/N,E")
+}
+
+func buildKiroHeaderValues(account *config.Account, host, apiName, sdkVersion, mode string) kiroHeaderValues {
+	clientCfg := config.GetKiroClientConfig()
+	machineID := ""
+	if account != nil {
+		machineID = account.MachineId
+	}
+
+	userAgent := fmt.Sprintf(
+		"aws-sdk-js/%s ua/2.1 os/%s lang/js md/nodejs#%s api/%s#%s %s KiroIDE-%s",
+		sdkVersion,
+		clientCfg.SystemVersion,
+		clientCfg.NodeVersion,
+		apiName,
+		sdkVersion,
+		mode,
+		clientCfg.KiroVersion,
+	)
+	amzUserAgent := fmt.Sprintf("aws-sdk-js/%s KiroIDE-%s", sdkVersion, clientCfg.KiroVersion)
+	if machineID != "" {
+		userAgent += "-" + machineID
+		amzUserAgent += "-" + machineID
+	}
+
+	return kiroHeaderValues{
+		UserAgent:    userAgent,
+		AmzUserAgent: amzUserAgent,
+		Host:         host,
+	}
+}
+
+func applyKiroBaseHeaders(req *http.Request, account *config.Account, values kiroHeaderValues) {
+	if account != nil && account.AccessToken != "" {
+		req.Header.Set("Authorization", "Bearer "+account.AccessToken)
+	}
+	req.Header.Set("User-Agent", values.UserAgent)
+	req.Header.Set("x-amz-user-agent", values.AmzUserAgent)
+	req.Header.Set("x-amzn-codewhisperer-optout", "true")
+	if values.Host != "" {
+		req.Host = values.Host
+	}
+}
diff --git a/proxy/kiro_headers_test.go b/proxy/kiro_headers_test.go
new file mode 100644
index 0000000..99e7074
--- /dev/null
+++ b/proxy/kiro_headers_test.go
@@ -0,0 +1,43 @@
+package proxy
+
+import (
+	"kiro-api-proxy/config"
+	"strings"
+	"testing"
+)
+
+func TestBuildStreamingHeaderValuesAlignsWithKiroIDEFormat(t *testing.T) {
+	account := &config.Account{MachineId: "machine-123"}
+	values := buildStreamingHeaderValues(account, "q.us-east-1.amazonaws.com")
+
+	if values.Host != "q.us-east-1.amazonaws.com" {
+		t.Fatalf("expected host to be preserved, got %q", values.Host)
+	}
+	if !strings.Contains(values.UserAgent, "aws-sdk-js/1.0.34") {
+		t.Fatalf("expected streaming sdk version in user agent, got %q", values.UserAgent)
+	}
+	if !strings.Contains(values.UserAgent, "api/codewhispererstreaming#1.0.34") {
+		t.Fatalf("expected streaming API marker in user agent, got %q", values.UserAgent)
+	}
+	if !strings.Contains(values.UserAgent, "KiroIDE-0.11.107-machine-123") {
+		t.Fatalf("expected kiro version and machine id in user agent, got %q", values.UserAgent)
+	}
+	if !strings.Contains(values.AmzUserAgent, "aws-sdk-js/1.0.34 KiroIDE-0.11.107-machine-123") {
+		t.Fatalf("expected x-amz-user-agent to include version and machine id, got %q", values.AmzUserAgent)
+	}
+}
+
+func TestBuildRuntimeHeaderValuesUsesRuntimeAPIFormat(t *testing.T) {
+	account := &config.Account{MachineId: "machine-456"}
+	values := buildRuntimeHeaderValues(account, "codewhisperer.us-east-1.amazonaws.com")
+
+	if !strings.Contains(values.UserAgent, "aws-sdk-js/1.0.0") {
+		t.Fatalf("expected runtime sdk version in user agent, got %q", values.UserAgent)
+	}
+	if !strings.Contains(values.UserAgent, "api/codewhispererruntime#1.0.0") {
+		t.Fatalf("expected runtime API marker in user agent, got %q", values.UserAgent)
+	}
+	if !strings.Contains(values.UserAgent, "m/N,E") {
+		t.Fatalf("expected runtime mode marker in user agent, got %q", values.UserAgent)
+	}
+}
diff --git a/proxy/translator.go b/proxy/translator.go
index 64c4128..957eb72 100644
--- a/proxy/translator.go
+++ b/proxy/translator.go
@@ -23,7 +23,7 @@ var modelMapOrdered = []modelMapping{
 	{"claude-sonnet-4-6", "claude-sonnet-4.6"},
 	{"claude-sonnet-4.6", "claude-sonnet-4.6"},
 	{"claude-opus-4-7", "claude-opus-4-7"},
-    {"claude-opus-4.7", "claude-opus-4-7"},
+	{"claude-opus-4.7", "claude-opus-4-7"},
 	{"claude-haiku-4-5", "claude-haiku-4.5"},
 	{"claude-haiku-4.5", "claude-haiku-4.5"},
 	{"claude-opus-4-5", "claude-opus-4.5"},
@@ -46,6 +46,7 @@ const ThinkingModePrompt = `<thinking_mode>enabled</thinking_mode>
 <max_thinking_length>200000</max_thinking_length>`
 
 const minimalFallbackUserContent = "."
+const toolResultsContinuationPrefix = "Tool results:"
 
 // ParseModelAndThinking 解析模型名称，返回实际模型和是否启用 thinking
 func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) {
@@ -134,9 +135,17 @@ type ClaudeResponse struct {
 	Usage        ClaudeUsage          `json:"usage"`
 }
 
+type ClaudeCacheCreationUsage struct {
+	Ephemeral5mInputTokens int `json:"ephemeral_5m_input_tokens,omitempty"`
+	Ephemeral1hInputTokens int `json:"ephemeral_1h_input_tokens,omitempty"`
+}
+
 type ClaudeUsage struct {
-	InputTokens  int `json:"input_tokens"`
-	OutputTokens int `json:"output_tokens"`
+	InputTokens              int                       `json:"input_tokens"`
+	OutputTokens             int                       `json:"output_tokens"`
+	CacheCreationInputTokens int                       `json:"cache_creation_input_tokens,omitempty"`
+	CacheReadInputTokens     int                       `json:"cache_read_input_tokens,omitempty"`
+	CacheCreation            *ClaudeCacheCreationUsage `json:"cache_creation,omitempty"`
 }
 
 // ==================== Claude -> Kiro 转换 ====================
@@ -176,7 +185,7 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 				userMsg := KiroUserInputMessage{
 					Content: content,
 					// ModelID: modelID,
-					Origin:  origin,
+					Origin: origin,
 				}
 				if len(images) > 0 {
 					userMsg.Images = images
@@ -201,16 +210,7 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 		}
 	}
 
-	// 确保 history 以 user 开始
-	if len(history) > 0 && history[0].AssistantResponseMessage != nil {
-		history = append([]KiroHistoryMessage{{
-			UserInputMessage: &KiroUserInputMessage{
-				Content: "Begin conversation",
-				// ModelID: modelID,
-				Origin:  origin,
-			},
-		}}, history...)
-	}
+	history = trimLeadingAssistantHistory(history)
 
 	// 构建最终内容
 	finalContent := ""
@@ -237,8 +237,8 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 	payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{
 		Content: finalContent,
 		// ModelID: modelID,
-		Origin:  origin,
-		Images:  currentImages,
+		Origin: origin,
+		Images: currentImages,
 	}
 
 	if len(kiroTools) > 0 || len(currentToolResults) > 0 {
@@ -616,8 +616,8 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 					UserInputMessage: &KiroUserInputMessage{
 						Content: content,
 						// ModelID: modelID,
-						Origin:  origin,
-						Images:  images,
+						Origin: origin,
+						Images: images,
 					},
 				})
 			}
@@ -662,7 +662,7 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 						UserInputMessage: &KiroUserInputMessage{
 							Content: buildToolResultsContinuation(currentToolResults),
 							// ModelID: modelID,
-							Origin:  origin,
+							Origin: origin,
 							UserInputMessageContext: &UserInputMessageContext{
 								ToolResults: currentToolResults,
 							},
@@ -699,8 +699,8 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 	payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{
 		Content: finalContent,
 		// ModelID: modelID,
-		Origin:  origin,
-		Images:  currentImages,
+		Origin: origin,
+		Images: currentImages,
 	}
 
 	if len(kiroTools) > 0 || len(currentToolResults) > 0 {
@@ -832,13 +832,27 @@ func buildToolResultsContinuation(toolResults []KiroToolResult) string {
 		return minimalFallbackUserContent
 	}
 
-	joined := strings.Join(parts, "\n\n")
+	joined := toolResultsContinuationPrefix + "\n\n" + strings.Join(parts, "\n\n")
 	if len(joined) > 4000 {
 		return joined[:4000]
 	}
 	return joined
 }
 
+func trimLeadingAssistantHistory(history []KiroHistoryMessage) []KiroHistoryMessage {
+	idx := 0
+	for idx < len(history) && history[idx].AssistantResponseMessage != nil {
+		idx++
+	}
+	if idx == 0 {
+		return history
+	}
+	if idx >= len(history) {
+		return nil
+	}
+	return history[idx:]
+}
+
 func firstClaudeConversationAnchor(messages []ClaudeMessage) string {
 	for _, msg := range messages {
 		if msg.Role != "user" {
@@ -849,15 +863,7 @@ func firstClaudeConversationAnchor(messages []ClaudeMessage) string {
 			return strings.TrimSpace(text)
 		}
 		if len(toolResults) > 0 {
-			return buildToolResultsContinuation(toolResults)
-		}
-	}
-
-	for _, msg := range messages {
-		if strings.TrimSpace(msg.Role) != "" {
-			if text := extractOpenAIMessageText(msg.Content); strings.TrimSpace(text) != "" {
-				return strings.TrimSpace(text)
-			}
+			continue
 		}
 	}
 
@@ -875,25 +881,32 @@ func firstOpenAIConversationAnchor(messages []OpenAIMessage) string {
 		}
 	}
 
-	for _, msg := range messages {
-		text := extractOpenAIMessageText(msg.Content)
-		if strings.TrimSpace(text) != "" {
-			return strings.TrimSpace(text)
-		}
-	}
-
 	return ""
 }
 
 func buildConversationID(modelID, systemPrompt, anchor string) string {
 	anchor = strings.TrimSpace(anchor)
-	if anchor == "" {
+	if isSyntheticConversationAnchor(anchor) {
 		return uuid.New().String()
 	}
 	seed := strings.Join([]string{modelID, strings.TrimSpace(systemPrompt), anchor}, "\n")
 	return uuid.NewSHA1(uuid.NameSpaceURL, []byte(seed)).String()
 }
 
+func isSyntheticConversationAnchor(anchor string) bool {
+	if strings.TrimSpace(anchor) == "" {
+		return true
+	}
+
+	normalized := strings.ToLower(strings.Join(strings.Fields(anchor), " "))
+	switch normalized {
+	case ".", "begin conversation", "please analyze the attached image.", strings.ToLower(minimalFallbackUserContent):
+		return true
+	default:
+		return false
+	}
+}
+
 func extractOpenAITextPart(part map[string]interface{}) (string, bool) {
 	partType, _ := part["type"].(string)
 	switch partType {
diff --git a/proxy/translator_test.go b/proxy/translator_test.go
index c650081..7c5dc43 100644
--- a/proxy/translator_test.go
+++ b/proxy/translator_test.go
@@ -76,7 +76,7 @@ func TestOpenAIToKiroPreservesStructuredAssistantAndToolContent(t *testing.T) {
 	}
 
 	cur := payload.ConversationState.CurrentMessage.UserInputMessage
-	if cur.Content != "tool-result-structured" {
+	if !strings.Contains(cur.Content, "tool-result-structured") {
 		t.Fatalf("expected tool-result continuation content, got %q", cur.Content)
 	}
 	if cur.UserInputMessageContext == nil || len(cur.UserInputMessageContext.ToolResults) != 1 {
@@ -196,3 +196,67 @@ func TestClaudeConversationIDStableFromAnchor(t *testing.T) {
 		t.Fatalf("expected stable conversation ID across turns, got %q vs %q", payloadA.ConversationState.ConversationID, payloadB.ConversationState.ConversationID)
 	}
 }
+
+func TestOpenAIConversationIDRandomForSyntheticAnchor(t *testing.T) {
+	req := &OpenAIRequest{
+		Model: "claude-sonnet-4.5",
+		Messages: []OpenAIMessage{
+			{Role: "assistant", Content: "prefill"},
+		},
+	}
+
+	payloadA := OpenAIToKiro(req, false)
+	payloadB := OpenAIToKiro(req, false)
+
+	if payloadA.ConversationState.ConversationID == payloadB.ConversationState.ConversationID {
+		t.Fatalf("expected synthetic anchor to generate non-deterministic conversation IDs")
+	}
+}
+
+func TestClaudeToKiroDropsLeadingAssistantHistory(t *testing.T) {
+	req := &ClaudeRequest{
+		Model: "claude-sonnet-4.5",
+		Messages: []ClaudeMessage{
+			{Role: "assistant", Content: "prefill"},
+			{Role: "user", Content: "real user message"},
+		},
+	}
+
+	payload := ClaudeToKiro(req, false)
+
+	if len(payload.ConversationState.History) != 0 {
+		t.Fatalf("expected leading assistant-only history to be dropped, got %d entries", len(payload.ConversationState.History))
+	}
+
+	if strings.Contains(payload.ConversationState.CurrentMessage.UserInputMessage.Content, "Begin conversation") {
+		t.Fatalf("unexpected synthetic Begin conversation injection in current content: %q", payload.ConversationState.CurrentMessage.UserInputMessage.Content)
+	}
+}
+
+func TestToolResultsContinuationIncludesInstructionPrefix(t *testing.T) {
+	req := &OpenAIRequest{
+		Model: "claude-sonnet-4.5",
+		Messages: []OpenAIMessage{
+			{Role: "user", Content: "find data"},
+			{Role: "assistant", ToolCalls: []ToolCall{{
+				ID:   "call_1",
+				Type: "function",
+				Function: struct {
+					Name      string `json:"name"`
+					Arguments string `json:"arguments"`
+				}{Name: "fetch", Arguments: "{}"},
+			}}},
+			{Role: "tool", ToolCallID: "call_1", Content: "result-1"},
+		},
+	}
+
+	payload := OpenAIToKiro(req, false)
+	content := payload.ConversationState.CurrentMessage.UserInputMessage.Content
+
+	if !strings.Contains(content, toolResultsContinuationPrefix) {
+		t.Fatalf("expected tool continuation prefix, got %q", content)
+	}
+	if !strings.Contains(content, "result-1") {
+		t.Fatalf("expected tool result text in continuation content, got %q", content)
+	}
+}

From bdc9c7babc70bae4e9ff4ab7c4f704e07e4eec62 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 19:22:34 +0800
Subject: [PATCH 02/22] chore: update dev branch model aggregation and naming

---
 Dockerfile                 |   6 +-
 auth/oidc.go               |   2 +-
 go.mod                     |   2 +-
 main.go                    |   6 +-
 pool/account.go            |   2 +-
 proxy/handler.go           | 182 ++++++++++++++++++++++++++++---------
 proxy/handler_test.go      |  45 ++++++++-
 proxy/kiro.go              |   2 +-
 proxy/kiro_api.go          |   2 +-
 proxy/kiro_headers.go      |   2 +-
 proxy/kiro_headers_test.go |   2 +-
 11 files changed, 196 insertions(+), 57 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 9834d80..db8766c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,16 +5,16 @@ COPY go.mod go.sum ./
 RUN go mod download
 
 COPY . .
-RUN CGO_ENABLED=0 GOOS=linux go build -o kiro-api-proxy .
+RUN CGO_ENABLED=0 GOOS=linux go build -o kiro-go .
 
 FROM alpine:latest
 RUN apk --no-cache add ca-certificates
 
 WORKDIR /app
-COPY --from=builder /app/kiro-api-proxy .
+COPY --from=builder /app/kiro-go .
 COPY --from=builder /app/web ./web
 
 EXPOSE 8080
 VOLUME /app/data
 
-CMD ["./kiro-api-proxy"]
+CMD ["./kiro-go"]
diff --git a/auth/oidc.go b/auth/oidc.go
index 40d3456..5a405d6 100644
--- a/auth/oidc.go
+++ b/auth/oidc.go
@@ -5,7 +5,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"kiro-api-proxy/config"
+	"kiro-go/config"
 	"net/http"
 	"time"
 )
diff --git a/go.mod b/go.mod
index f1bd668..4be296b 100644
--- a/go.mod
+++ b/go.mod
@@ -1,4 +1,4 @@
-module kiro-api-proxy
+module kiro-go
 
 go 1.21
 
diff --git a/main.go b/main.go
index 63631ef..99de1c3 100644
--- a/main.go
+++ b/main.go
@@ -15,9 +15,9 @@ package main
 
 import (
 	"fmt"
-	"kiro-api-proxy/config"
-	"kiro-api-proxy/pool"
-	"kiro-api-proxy/proxy"
+	"kiro-go/config"
+	"kiro-go/pool"
+	"kiro-go/proxy"
 	"log"
 	"net/http"
 	"os"
diff --git a/pool/account.go b/pool/account.go
index 0f1a2f1..2f0ab4d 100644
--- a/pool/account.go
+++ b/pool/account.go
@@ -3,7 +3,7 @@
 package pool
 
 import (
-	"kiro-api-proxy/config"
+	"kiro-go/config"
 	"sync"
 	"sync/atomic"
 	"time"
diff --git a/proxy/handler.go b/proxy/handler.go
index eb3bd36..a628ab5 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -4,9 +4,9 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"kiro-api-proxy/auth"
-	"kiro-api-proxy/config"
-	"kiro-api-proxy/pool"
+	"kiro-go/auth"
+	"kiro-go/config"
+	"kiro-go/pool"
 	"net/http"
 	"strings"
 	"sync"
@@ -341,36 +341,20 @@ func (h *Handler) handleModels(w http.ResponseWriter, r *http.Request) {
 	h.modelsCacheMu.RLock()
 	cached := h.cachedModels
 	h.modelsCacheMu.RUnlock()
+	if len(cached) == 0 {
+		h.refreshModelsCache()
+		h.modelsCacheMu.RLock()
+		cached = h.cachedModels
+		h.modelsCacheMu.RUnlock()
+	}
 
 	thinkingSuffix := config.GetThinkingConfig().Suffix
 
-	var models []map[string]interface{}
-	if len(cached) > 0 {
-		for _, m := range cached {
-			supportsImage := modelSupportsImage(m.InputTypes)
-			models = append(models, buildModelInfo(m.ModelId, "anthropic", supportsImage))
-			// 自动生成 thinking 变体
-			models = append(models, buildModelInfo(m.ModelId+thinkingSuffix, "anthropic", supportsImage))
-		}
-	} else {
-		// fallback 静态列表
-		models = []map[string]interface{}{
-			buildModelInfo("claude-sonnet-4.6", "anthropic", true),
-			buildModelInfo("claude-sonnet-4.6"+thinkingSuffix, "anthropic", true),
-			buildModelInfo("claude-opus-4.6", "anthropic", true),
-			buildModelInfo("claude-opus-4.6"+thinkingSuffix, "anthropic", true),
-			buildModelInfo("claude-opus-4-7", "anthropic", true),
-			buildModelInfo("claude-opus-4-7"+thinkingSuffix, "anthropic", true),
-			buildModelInfo("claude-sonnet-4.5", "anthropic", true),
-			buildModelInfo("claude-sonnet-4.5"+thinkingSuffix, "anthropic", true),
-			buildModelInfo("claude-sonnet-4", "anthropic", true),
-			buildModelInfo("claude-sonnet-4"+thinkingSuffix, "anthropic", true),
-			buildModelInfo("claude-haiku-4.5", "anthropic", true),
-			buildModelInfo("claude-haiku-4.5"+thinkingSuffix, "anthropic", true),
-			buildModelInfo("claude-opus-4.5", "anthropic", true),
-			buildModelInfo("claude-opus-4.5"+thinkingSuffix, "anthropic", true),
-		}
+	models := buildAnthropicModelsResponse(cached, thinkingSuffix)
+	if len(models) == 0 {
+		models = fallbackAnthropicModels(thinkingSuffix)
 	}
+
 	// 添加别名模型
 	models = append(models,
 		buildModelInfo("auto", "kiro-proxy", true),
@@ -383,6 +367,43 @@ func (h *Handler) handleModels(w http.ResponseWriter, r *http.Request) {
 		"object": "list",
 		"data":   models,
 	})
+	return
+}
+
+func buildAnthropicModelsResponse(cached []ModelInfo, thinkingSuffix string) []map[string]interface{} {
+	if len(cached) == 0 {
+		return nil
+	}
+
+	models := make([]map[string]interface{}, 0, len(cached)*2)
+	if len(cached) > 0 {
+		for _, m := range cached {
+			supportsImage := modelSupportsImage(m.InputTypes)
+			models = append(models, buildModelInfo(m.ModelId, "anthropic", supportsImage))
+			// 自动生成 thinking 变体
+			models = append(models, buildModelInfo(m.ModelId+thinkingSuffix, "anthropic", supportsImage))
+		}
+	}
+	return models
+}
+
+func fallbackAnthropicModels(thinkingSuffix string) []map[string]interface{} {
+	return []map[string]interface{}{
+		buildModelInfo("claude-sonnet-4.6", "anthropic", true),
+		buildModelInfo("claude-sonnet-4.6"+thinkingSuffix, "anthropic", true),
+		buildModelInfo("claude-opus-4.6", "anthropic", true),
+		buildModelInfo("claude-opus-4.6"+thinkingSuffix, "anthropic", true),
+		buildModelInfo("claude-opus-4-7", "anthropic", true),
+		buildModelInfo("claude-opus-4-7"+thinkingSuffix, "anthropic", true),
+		buildModelInfo("claude-sonnet-4.5", "anthropic", true),
+		buildModelInfo("claude-sonnet-4.5"+thinkingSuffix, "anthropic", true),
+		buildModelInfo("claude-sonnet-4", "anthropic", true),
+		buildModelInfo("claude-sonnet-4"+thinkingSuffix, "anthropic", true),
+		buildModelInfo("claude-haiku-4.5", "anthropic", true),
+		buildModelInfo("claude-haiku-4.5"+thinkingSuffix, "anthropic", true),
+		buildModelInfo("claude-opus-4.5", "anthropic", true),
+		buildModelInfo("claude-opus-4.5"+thinkingSuffix, "anthropic", true),
+	}
 }
 
 func modelSupportsImage(inputTypes []string) bool {
@@ -430,31 +451,106 @@ func buildModelInfo(id, ownedBy string, supportsImage bool) map[string]interface
 
 // refreshModelsCache 从 Kiro API 拉取模型列表并缓存
 func (h *Handler) refreshModelsCache() {
-	account := h.pool.GetNext()
-	if account == nil {
+	accounts := config.GetEnabledAccounts()
+	if len(accounts) == 0 {
 		return
 	}
 
-	// 确保 token 有效
-	if err := h.ensureValidToken(account); err != nil {
-		return
+	aggregated := make([]ModelInfo, 0)
+	for i := range accounts {
+		account := &accounts[i]
+		if err := h.ensureValidToken(account); err != nil {
+			fmt.Printf("[ModelsCache] Skip %s token refresh failed: %v\n", account.Email, err)
+			continue
+		}
+
+		models, err := ListAvailableModels(account)
+		if err != nil {
+			fmt.Printf("[ModelsCache] Failed to refresh for %s: %v\n", account.Email, err)
+			continue
+		}
+		aggregated = mergeUniqueModels(aggregated, models)
 	}
 
-	models, err := ListAvailableModels(account)
-	if err != nil {
-		fmt.Printf("[ModelsCache] Failed to refresh: %v\n", err)
-		return
-	}
-
-	if len(models) > 0 {
+	if len(aggregated) > 0 {
 		h.modelsCacheMu.Lock()
-		h.cachedModels = models
+		h.cachedModels = aggregated
 		h.modelsCacheTime = time.Now().Unix()
 		h.modelsCacheMu.Unlock()
-		fmt.Printf("[ModelsCache] Cached %d models\n", len(models))
+		fmt.Printf("[ModelsCache] Cached %d models\n", len(aggregated))
 	}
 }
 
+func mergeUniqueModels(existing []ModelInfo, incoming []ModelInfo) []ModelInfo {
+	if len(incoming) == 0 {
+		return existing
+	}
+
+	indexByID := make(map[string]int, len(existing))
+	merged := make([]ModelInfo, len(existing))
+	copy(merged, existing)
+	for i, model := range merged {
+		indexByID[strings.ToLower(strings.TrimSpace(model.ModelId))] = i
+	}
+
+	for _, model := range incoming {
+		key := strings.ToLower(strings.TrimSpace(model.ModelId))
+		if key == "" {
+			continue
+		}
+		if idx, ok := indexByID[key]; ok {
+			merged[idx] = mergeModelInfo(merged[idx], model)
+			continue
+		}
+		indexByID[key] = len(merged)
+		merged = append(merged, model)
+	}
+
+	return merged
+}
+
+func mergeModelInfo(base ModelInfo, extra ModelInfo) ModelInfo {
+	if base.ModelName == "" {
+		base.ModelName = extra.ModelName
+	}
+	if base.Description == "" {
+		base.Description = extra.Description
+	}
+	if base.RateMultiplier == 0 {
+		base.RateMultiplier = extra.RateMultiplier
+	}
+	if base.TokenLimits == nil {
+		base.TokenLimits = extra.TokenLimits
+	}
+	base.InputTypes = mergeStringLists(base.InputTypes, extra.InputTypes)
+	return base
+}
+
+func mergeStringLists(base []string, extra []string) []string {
+	if len(extra) == 0 {
+		return base
+	}
+	seen := make(map[string]bool, len(base)+len(extra))
+	merged := make([]string, 0, len(base)+len(extra))
+	for _, item := range base {
+		key := strings.ToLower(strings.TrimSpace(item))
+		if key == "" || seen[key] {
+			continue
+		}
+		seen[key] = true
+		merged = append(merged, item)
+	}
+	for _, item := range extra {
+		key := strings.ToLower(strings.TrimSpace(item))
+		if key == "" || seen[key] {
+			continue
+		}
+		seen[key] = true
+		merged = append(merged, item)
+	}
+	return merged
+}
+
 // handleCountTokens Token 计数（Claude Code 会调用）
 func (h *Handler) handleCountTokens(w http.ResponseWriter, r *http.Request) {
 	if r.Method != "POST" {
diff --git a/proxy/handler_test.go b/proxy/handler_test.go
index a435dec..672092a 100644
--- a/proxy/handler_test.go
+++ b/proxy/handler_test.go
@@ -1,6 +1,8 @@
 package proxy
 
-import "testing"
+import (
+	"testing"
+)
 
 func TestThinkingSourceReasoningFirst(t *testing.T) {
 	var source thinkingStreamSource
@@ -98,3 +100,44 @@ func TestValidateClaudeRequestShapeRejectsAssistantPrefill(t *testing.T) {
 		t.Fatalf("expected assistant-prefill final message to be rejected")
 	}
 }
+
+func TestMergeUniqueModelsPreservesUnionAcrossAccounts(t *testing.T) {
+	base := []ModelInfo{
+		{ModelId: "claude-sonnet-4.5", InputTypes: []string{"TEXT"}},
+	}
+	incoming := []ModelInfo{
+		{ModelId: "claude-sonnet-4.5", InputTypes: []string{"image"}},
+		{ModelId: "claude-opus-4-7", InputTypes: []string{"text"}},
+	}
+
+	merged := mergeUniqueModels(base, incoming)
+	if len(merged) != 2 {
+		t.Fatalf("expected 2 unique models, got %d", len(merged))
+	}
+	if !modelSupportsImage(merged[0].InputTypes) {
+		t.Fatalf("expected merged input types to preserve image capability, got %#v", merged[0].InputTypes)
+	}
+	if merged[1].ModelId != "claude-opus-4-7" {
+		t.Fatalf("expected second model to be claude-opus-4-7, got %q", merged[1].ModelId)
+	}
+}
+
+func TestBuildAnthropicModelsResponseGeneratesThinkingVariants(t *testing.T) {
+	models := buildAnthropicModelsResponse([]ModelInfo{{
+		ModelId:    "claude-sonnet-4.5",
+		InputTypes: []string{"text", "image"},
+	}}, "-thinking")
+
+	if len(models) != 2 {
+		t.Fatalf("expected base model and thinking variant, got %d", len(models))
+	}
+	if models[0]["id"] != "claude-sonnet-4.5" {
+		t.Fatalf("unexpected base model id: %#v", models[0]["id"])
+	}
+	if models[1]["id"] != "claude-sonnet-4.5-thinking" {
+		t.Fatalf("unexpected thinking model id: %#v", models[1]["id"])
+	}
+	if supportsImage, ok := models[0]["supports_image"].(bool); !ok || !supportsImage {
+		t.Fatalf("expected image capability to be preserved, got %#v", models[0]["supports_image"])
+	}
+}
diff --git a/proxy/kiro.go b/proxy/kiro.go
index 00109b7..7fcaa64 100644
--- a/proxy/kiro.go
+++ b/proxy/kiro.go
@@ -7,7 +7,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"kiro-api-proxy/config"
+	"kiro-go/config"
 	"net/http"
 	"net/url"
 	"strconv"
diff --git a/proxy/kiro_api.go b/proxy/kiro_api.go
index 2b2d3dc..948336e 100644
--- a/proxy/kiro_api.go
+++ b/proxy/kiro_api.go
@@ -4,7 +4,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
-	"kiro-api-proxy/config"
+	"kiro-go/config"
 	"net/http"
 	"strings"
 	"time"
diff --git a/proxy/kiro_headers.go b/proxy/kiro_headers.go
index c667987..baf3fc6 100644
--- a/proxy/kiro_headers.go
+++ b/proxy/kiro_headers.go
@@ -2,7 +2,7 @@ package proxy
 
 import (
 	"fmt"
-	"kiro-api-proxy/config"
+	"kiro-go/config"
 	"net/http"
 )
 
diff --git a/proxy/kiro_headers_test.go b/proxy/kiro_headers_test.go
index 99e7074..a4b0805 100644
--- a/proxy/kiro_headers_test.go
+++ b/proxy/kiro_headers_test.go
@@ -1,7 +1,7 @@
 package proxy
 
 import (
-	"kiro-api-proxy/config"
+	"kiro-go/config"
 	"strings"
 	"testing"
 )

From 74a959260e788431c0754ccd633c6c5d1396cbb3 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 20:57:40 +0800
Subject: [PATCH 03/22] chore: optimize model handling

---
 proxy/handler.go    |  4 ++--
 proxy/translator.go | 32 ++++++++++++++++----------------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/proxy/handler.go b/proxy/handler.go
index a628ab5..85afc5e 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -393,8 +393,8 @@ func fallbackAnthropicModels(thinkingSuffix string) []map[string]interface{} {
 		buildModelInfo("claude-sonnet-4.6"+thinkingSuffix, "anthropic", true),
 		buildModelInfo("claude-opus-4.6", "anthropic", true),
 		buildModelInfo("claude-opus-4.6"+thinkingSuffix, "anthropic", true),
-		buildModelInfo("claude-opus-4-7", "anthropic", true),
-		buildModelInfo("claude-opus-4-7"+thinkingSuffix, "anthropic", true),
+		buildModelInfo("claude-opus-4.7", "anthropic", true),
+		buildModelInfo("claude-opus-4.7"+thinkingSuffix, "anthropic", true),
 		buildModelInfo("claude-sonnet-4.5", "anthropic", true),
 		buildModelInfo("claude-sonnet-4.5"+thinkingSuffix, "anthropic", true),
 		buildModelInfo("claude-sonnet-4", "anthropic", true),
diff --git a/proxy/translator.go b/proxy/translator.go
index 957eb72..500b74e 100644
--- a/proxy/translator.go
+++ b/proxy/translator.go
@@ -22,8 +22,8 @@ var modelMapOrdered = []modelMapping{
 	{"claude-sonnet-4.5", "claude-sonnet-4.5"},
 	{"claude-sonnet-4-6", "claude-sonnet-4.6"},
 	{"claude-sonnet-4.6", "claude-sonnet-4.6"},
-	{"claude-opus-4-7", "claude-opus-4-7"},
-	{"claude-opus-4.7", "claude-opus-4-7"},
+	{"claude-opus-4-7", "claude-opus-4.7"},
+	{"claude-opus-4.7", "claude-opus-4.7"},
 	{"claude-haiku-4-5", "claude-haiku-4.5"},
 	{"claude-haiku-4.5", "claude-haiku-4.5"},
 	{"claude-opus-4-5", "claude-opus-4.5"},
@@ -73,7 +73,7 @@ func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) {
 		return model, thinking
 	}
 
-	return "claude-sonnet-4.5", thinking
+	return model, thinking
 }
 
 func MapModel(model string) string {
@@ -184,8 +184,8 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 			} else {
 				userMsg := KiroUserInputMessage{
 					Content: content,
-					// ModelID: modelID,
-					Origin: origin,
+					ModelID: modelID,
+					Origin:  origin,
 				}
 				if len(images) > 0 {
 					userMsg.Images = images
@@ -236,9 +236,9 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 	payload.ConversationState.ConversationID = buildConversationID(modelID, systemPrompt, firstClaudeConversationAnchor(req.Messages))
 	payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{
 		Content: finalContent,
-		// ModelID: modelID,
-		Origin: origin,
-		Images: currentImages,
+		ModelID: modelID,
+		Origin:  origin,
+		Images:  currentImages,
 	}
 
 	if len(kiroTools) > 0 || len(currentToolResults) > 0 {
@@ -615,9 +615,9 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 				history = append(history, KiroHistoryMessage{
 					UserInputMessage: &KiroUserInputMessage{
 						Content: content,
-						// ModelID: modelID,
-						Origin: origin,
-						Images: images,
+						ModelID: modelID,
+						Origin:  origin,
+						Images:  images,
 					},
 				})
 			}
@@ -661,8 +661,8 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 					history = append(history, KiroHistoryMessage{
 						UserInputMessage: &KiroUserInputMessage{
 							Content: buildToolResultsContinuation(currentToolResults),
-							// ModelID: modelID,
-							Origin: origin,
+							ModelID: modelID,
+							Origin:  origin,
 							UserInputMessageContext: &UserInputMessageContext{
 								ToolResults: currentToolResults,
 							},
@@ -698,9 +698,9 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 	payload.ConversationState.ConversationID = buildConversationID(modelID, systemPrompt, firstOpenAIConversationAnchor(nonSystemMessages))
 	payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{
 		Content: finalContent,
-		// ModelID: modelID,
-		Origin: origin,
-		Images: currentImages,
+		ModelID: modelID,
+		Origin:  origin,
+		Images:  currentImages,
 	}
 
 	if len(kiroTools) > 0 || len(currentToolResults) > 0 {

From 140492e6c7ba70ade0e93a4ed68893506439db6d Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 21:14:13 +0800
Subject: [PATCH 04/22] chore: update version metadata

---
 version.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/version.json b/version.json
index e206569..4ac5bf5 100644
--- a/version.json
+++ b/version.json
@@ -1,5 +1,5 @@
 {
-  "version": "1.0.3",
-  "changelog": "✅ 新增 clientID/clientSecret 校验\n⚖️ 新增账号权重字段，支持加权轮询策略\n🔄 批量账号管理（启用/禁用/刷新/详情）\n🚫 自动跳过用量耗尽的账号\n🔧 重构模型映射为有序列表，避免误匹配",
+  "version": "1.0.4",
+  "changelog": "✨ Added and fixed several improvements across the project.\n🛠️ 新增并修复了一些内容，包含若干功能改进与问题修复。",
   "download": "https://github.com/Quorinex/Kiro-Go"
 }

From f853d0544b4f93c44921683774173a0fa1f36b3f Mon Sep 17 00:00:00 2001
From: Quorinex <85111@proton.me>
Date: Sun, 10 May 2026 21:16:36 +0800
Subject: [PATCH 05/22] Merge branch 'dev' (#32)

* chore: optimize model handling

* chore: update version metadata

---------

Co-authored-by: Quorinex <quorinex@users.noreply.github.com>
---
 proxy/handler.go    |  4 ++--
 proxy/translator.go | 32 ++++++++++++++++----------------
 version.json        |  4 ++--
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/proxy/handler.go b/proxy/handler.go
index a628ab5..85afc5e 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -393,8 +393,8 @@ func fallbackAnthropicModels(thinkingSuffix string) []map[string]interface{} {
 		buildModelInfo("claude-sonnet-4.6"+thinkingSuffix, "anthropic", true),
 		buildModelInfo("claude-opus-4.6", "anthropic", true),
 		buildModelInfo("claude-opus-4.6"+thinkingSuffix, "anthropic", true),
-		buildModelInfo("claude-opus-4-7", "anthropic", true),
-		buildModelInfo("claude-opus-4-7"+thinkingSuffix, "anthropic", true),
+		buildModelInfo("claude-opus-4.7", "anthropic", true),
+		buildModelInfo("claude-opus-4.7"+thinkingSuffix, "anthropic", true),
 		buildModelInfo("claude-sonnet-4.5", "anthropic", true),
 		buildModelInfo("claude-sonnet-4.5"+thinkingSuffix, "anthropic", true),
 		buildModelInfo("claude-sonnet-4", "anthropic", true),
diff --git a/proxy/translator.go b/proxy/translator.go
index 957eb72..500b74e 100644
--- a/proxy/translator.go
+++ b/proxy/translator.go
@@ -22,8 +22,8 @@ var modelMapOrdered = []modelMapping{
 	{"claude-sonnet-4.5", "claude-sonnet-4.5"},
 	{"claude-sonnet-4-6", "claude-sonnet-4.6"},
 	{"claude-sonnet-4.6", "claude-sonnet-4.6"},
-	{"claude-opus-4-7", "claude-opus-4-7"},
-	{"claude-opus-4.7", "claude-opus-4-7"},
+	{"claude-opus-4-7", "claude-opus-4.7"},
+	{"claude-opus-4.7", "claude-opus-4.7"},
 	{"claude-haiku-4-5", "claude-haiku-4.5"},
 	{"claude-haiku-4.5", "claude-haiku-4.5"},
 	{"claude-opus-4-5", "claude-opus-4.5"},
@@ -73,7 +73,7 @@ func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) {
 		return model, thinking
 	}
 
-	return "claude-sonnet-4.5", thinking
+	return model, thinking
 }
 
 func MapModel(model string) string {
@@ -184,8 +184,8 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 			} else {
 				userMsg := KiroUserInputMessage{
 					Content: content,
-					// ModelID: modelID,
-					Origin: origin,
+					ModelID: modelID,
+					Origin:  origin,
 				}
 				if len(images) > 0 {
 					userMsg.Images = images
@@ -236,9 +236,9 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 	payload.ConversationState.ConversationID = buildConversationID(modelID, systemPrompt, firstClaudeConversationAnchor(req.Messages))
 	payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{
 		Content: finalContent,
-		// ModelID: modelID,
-		Origin: origin,
-		Images: currentImages,
+		ModelID: modelID,
+		Origin:  origin,
+		Images:  currentImages,
 	}
 
 	if len(kiroTools) > 0 || len(currentToolResults) > 0 {
@@ -615,9 +615,9 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 				history = append(history, KiroHistoryMessage{
 					UserInputMessage: &KiroUserInputMessage{
 						Content: content,
-						// ModelID: modelID,
-						Origin: origin,
-						Images: images,
+						ModelID: modelID,
+						Origin:  origin,
+						Images:  images,
 					},
 				})
 			}
@@ -661,8 +661,8 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 					history = append(history, KiroHistoryMessage{
 						UserInputMessage: &KiroUserInputMessage{
 							Content: buildToolResultsContinuation(currentToolResults),
-							// ModelID: modelID,
-							Origin: origin,
+							ModelID: modelID,
+							Origin:  origin,
 							UserInputMessageContext: &UserInputMessageContext{
 								ToolResults: currentToolResults,
 							},
@@ -698,9 +698,9 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 	payload.ConversationState.ConversationID = buildConversationID(modelID, systemPrompt, firstOpenAIConversationAnchor(nonSystemMessages))
 	payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{
 		Content: finalContent,
-		// ModelID: modelID,
-		Origin: origin,
-		Images: currentImages,
+		ModelID: modelID,
+		Origin:  origin,
+		Images:  currentImages,
 	}
 
 	if len(kiroTools) > 0 || len(currentToolResults) > 0 {
diff --git a/version.json b/version.json
index e206569..4ac5bf5 100644
--- a/version.json
+++ b/version.json
@@ -1,5 +1,5 @@
 {
-  "version": "1.0.3",
-  "changelog": "✅ 新增 clientID/clientSecret 校验\n⚖️ 新增账号权重字段，支持加权轮询策略\n🔄 批量账号管理（启用/禁用/刷新/详情）\n🚫 自动跳过用量耗尽的账号\n🔧 重构模型映射为有序列表，避免误匹配",
+  "version": "1.0.4",
+  "changelog": "✨ Added and fixed several improvements across the project.\n🛠️ 新增并修复了一些内容，包含若干功能改进与问题修复。",
   "download": "https://github.com/Quorinex/Kiro-Go"
 }

From e20b2a88164be7fc1a6140e2cf7f69b0fc809a5b Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 21:21:24 +0800
Subject: [PATCH 06/22] chore: sync config version

---
 config/config.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/config.go b/config/config.go
index a70eaea..22cfd48 100644
--- a/config/config.go
+++ b/config/config.go
@@ -137,7 +137,7 @@ type AccountInfo struct {
 }
 
 // Version 当前版本号
-const Version = "1.0.3"
+const Version = "1.0.4"
 
 var (
 	cfg     *Config

From 3089d028d28c1eb6d4e659c207af3a76dd213b54 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 21:21:24 +0800
Subject: [PATCH 07/22] chore: sync config version

---
 config/config.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/config.go b/config/config.go
index a70eaea..22cfd48 100644
--- a/config/config.go
+++ b/config/config.go
@@ -137,7 +137,7 @@ type AccountInfo struct {
 }
 
 // Version 当前版本号
-const Version = "1.0.3"
+const Version = "1.0.4"
 
 var (
 	cfg     *Config

From 834890f4be696b9cb2cd155af03c82fd81f44ff3 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 22:03:18 +0800
Subject: [PATCH 08/22] docs: simplify README and add contributing notes

---
 README.md    | 219 ++++++++-------------------------------------------
 README_CN.md | 219 ++++++++-------------------------------------------
 2 files changed, 62 insertions(+), 376 deletions(-)

diff --git a/README.md b/README.md
index b5d9b85..49a1263 100644
--- a/README.md
+++ b/README.md
@@ -8,20 +8,15 @@ Convert Kiro accounts to OpenAI / Anthropic compatible API service.
 
 [English](README.md) | [中文](README_CN.md)
 
+If this project helps you, a Star would mean a lot.
+
 ## Features
 
-- 🔄 **Anthropic Claude API** - Full support for `/v1/messages` endpoint
-- 🤖 **OpenAI Chat API** - Compatible with `/v1/chat/completions`
-- ⚖️ **Multi-Account Pool** - Round-robin load balancing
-- 🔐 **Auto Token Refresh** - Seamless token management
-- 📡 **Streaming** - Real-time SSE responses
-- 🎛️ **Web Admin Panel** - Easy account management
-- 🔑 **Multiple Auth Methods** - AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, Local Cache, Credentials
-- 📊 **Usage Tracking** - Monitor requests, tokens, and credits
-- 📦 **Account Export/Import** - Compatible with Kiro Account Manager format
-- 🔄 **Dynamic Model List** - Auto-synced from Kiro API with caching
-- 🔔 **Version Update Check** - Automatic new version notification
-- 🌐 **i18n** - Chinese / English admin panel
+- Anthropic `/v1/messages` & OpenAI `/v1/chat/completions`
+- Multi-account pool with round-robin load balancing
+- Auto token refresh, SSE streaming, Web admin panel
+- Multiple auth: AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, local cache, credentials JSON
+- Usage tracking, account import/export, i18n (CN / EN)
 
 ## Quick Start
 
@@ -30,19 +25,13 @@ Convert Kiro accounts to OpenAI / Anthropic compatible API service.
 ```bash
 git clone https://github.com/Quorinex/Kiro-Go.git
 cd Kiro-Go
-
-# Create data directory for persistence
 mkdir -p data
-
 docker-compose up -d
 ```
 
 ### Docker Run
 
 ```bash
-# Create data directory
-mkdir -p /path/to/data
-
 docker run -d \
   --name kiro-go \
   -p 8080:8080 \
@@ -52,8 +41,6 @@ docker run -d \
   ghcr.io/quorinex/kiro-go:latest
 ```
 
-> 📁 The `/app/data` volume stores `config.json` with accounts and settings. Mount it for data persistence.
-
 ### Build from Source
 
 ```bash
@@ -63,22 +50,29 @@ go build -o kiro-go .
 ./kiro-go
 ```
 
-## Configuration
+Config is auto-created at `data/config.json`. Mount `/app/data` for persistence. The default admin password is `changeme` — override it via the `ADMIN_PASSWORD` env var or change it in the admin panel before going to production.
 
-Config file is auto-created at `data/config.json` on first run:
+## Usage
 
-```json
-{
-  "password": "changeme",
-  "port": 8080,
-  "host": "127.0.0.1",
-  "requireApiKey": false,
-  "apiKey": "",
-  "accounts": []
-}
+Open `http://localhost:8080/admin`, log in, add accounts, then call the API:
+
+```bash
+# Claude
+curl http://localhost:8080/v1/messages \
+  -H "Content-Type: application/json" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{"model":"claude-sonnet-4.5","max_tokens":1024,"messages":[{"role":"user","content":"Hello!"}]}'
+
+# OpenAI
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer any" \
+  -d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello!"}]}'
 ```
 
-> ⚠️ **Change the default password before production use!**
+## Thinking Mode
+
+Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Configure output format in the admin panel under Settings - Thinking Mode.
 
 ## Environment Variables
 
@@ -87,168 +81,17 @@ Config file is auto-created at `data/config.json` on first run:
 | `CONFIG_PATH` | Config file path | `data/config.json` |
 | `ADMIN_PASSWORD` | Admin panel password (overrides config) | - |
 
-## Usage
+## Contributing
 
-### 1. Access Admin Panel
+Friendly discussion is welcome. If you run into issues, try asking Claude Code, Codex, or similar tools for help first — most problems can be solved that way. PRs are even better.
 
-Open `http://localhost:8080/admin` and login with your password.
+## Friend Links
 
-### 2. Add Accounts
-
-Multiple methods available:
-
-| Method | Description |
-|--------|-------------|
-| **AWS Builder ID** | Login with AWS Builder ID (personal accounts) |
-| **IAM Identity Center (Enterprise SSO)** | Login with IAM Identity Center (enterprise accounts) |
-| **SSO Token** | Import `x-amz-sso_authn` token from browser |
-| **Kiro Local Cache** | Import from local Kiro IDE cache files |
-| **Credentials JSON** | Import JSON from Kiro Account Manager |
-
-#### Credentials Format
-
-```json
-{
-  "refreshToken": "eyJ...",
-  "accessToken": "eyJ...",
-  "clientId": "xxx",
-  "clientSecret": "xxx"
-}
-```
-
-### 3. Call API
-
-#### Claude API
-
-```bash
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4-20250514",
-    "max_tokens": 1024,
-    "messages": [{"role": "user", "content": "Hello!"}]
-  }'
-```
-
-#### OpenAI API
-
-```bash
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer any" \
-  -d '{
-    "model": "gpt-4o",
-    "messages": [{"role": "user", "content": "Hello!"}]
-  }'
-```
-
-## Model Mapping
-
-| Request Model | Actual Model |
-|---------------|--------------|
-| `claude-sonnet-4-20250514` | claude-sonnet-4-20250514 |
-| `claude-sonnet-4.5` | claude-sonnet-4.5 |
-| `claude-haiku-4.5` | claude-haiku-4.5 |
-| `claude-opus-4.5` | claude-opus-4.5 |
-| `claude-opus-4.6` | claude-opus-4.6 |
-| `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 |
-| `gpt-3.5-turbo` | claude-sonnet-4-20250514 |
-
-## Thinking Mode
-
-Enable extended thinking by adding a suffix to the model name (default: `-thinking`).
-
-### Usage
-
-```bash
-# OpenAI API with thinking
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "messages": [{"role": "user", "content": "Solve this step by step: 15 * 23"}],
-    "stream": true
-  }'
-
-# Claude API with thinking
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "max_tokens": 4096,
-    "messages": [{"role": "user", "content": "Analyze this problem"}]
-  }'
-```
-
-### Configuration
-
-Configure thinking mode in the Admin Panel under **Settings > Thinking Mode Settings**:
-
-| Setting | Description | Options |
-|---------|-------------|---------|
-| **Trigger Suffix** | Model name suffix to enable thinking | Default: `-thinking` (customizable, e.g., `-think`, `-reason`) |
-| **OpenAI Output Format** | How thinking content is returned in OpenAI API | `reasoning_content` (DeepSeek compatible), `<thinking>` tag, `<think>` tag |
-| **Claude Output Format** | How thinking content is returned in Claude API | `<thinking>` tag (default), `<think>` tag, plain text |
-
-### Output Formats
-
-**OpenAI API (`/v1/chat/completions`)**:
-- `reasoning_content` - Thinking in separate `reasoning_content` field (DeepSeek compatible)
-- `thinking` - Thinking wrapped in `<thinking>...</thinking>` tags in content
-- `think` - Thinking wrapped in `<think>...</think>` tags in content
-
-**Claude API (`/v1/messages`)**:
-- `thinking` - Thinking wrapped in `<thinking>...</thinking>` tags (default)
-- `think` - Thinking wrapped in `<think>...</think>` tags
-- `reasoning_content` - Plain text output
-
-## API Endpoints
-
-| Endpoint | Description |
-|----------|-------------|
-| `GET /health` | Health check |
-| `GET /v1/models` | List models |
-| `GET /v1/stats` | Statistics |
-| `POST /v1/messages` | Claude Messages API |
-| `POST /v1/messages/count_tokens` | Token counting |
-| `POST /v1/chat/completions` | OpenAI Chat API |
-| `GET /admin` | Admin panel |
-
-## Project Structure
-
-```
-Kiro-Go/
-├── main.go              # Entry point
-├── version.json         # Version info for update check
-├── config/              # Configuration management
-├── pool/                # Account pool & load balancing
-├── proxy/               # API handlers & Kiro client
-│   ├── handler.go       # HTTP routing & admin API
-│   ├── kiro.go          # Kiro API client
-│   ├── kiro_api.go      # Kiro REST API (usage, models)
-│   └── translator.go    # Request/response conversion
-├── auth/                # Authentication
-│   ├── builderid.go     # AWS Builder ID login
-│   ├── iam_sso.go       # IAM SSO login
-│   ├── oidc.go          # OIDC token refresh
-│   └── sso_token.go     # SSO token import
-├── web/                 # Admin panel frontend
-├── Dockerfile
-└── docker-compose.yml
-```
+- [LINUX DO](https://linux.do)
 
 ## Disclaimer
 
-This project is provided for **educational and research purposes only**.
-
-- This software is not affiliated with, endorsed by, or associated with Amazon, AWS, or Kiro in any way
-- Users are solely responsible for ensuring their use complies with all applicable terms of service and laws
-- The authors assume no liability for any misuse or violations arising from the use of this software
-- Use at your own risk
-
-By using this software, you acknowledge that you have read and understood this disclaimer.
+For educational and research purposes only. Not affiliated with Amazon, AWS, or Kiro. Users are responsible for complying with applicable terms of service and laws. Use at your own risk.
 
 ## License
 
diff --git a/README_CN.md b/README_CN.md
index 750884b..b6b79d2 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -8,20 +8,15 @@
 
 [English](README.md) | 中文
 
+如果这个项目帮到了你，欢迎点个 Star 支持一下。
+
 ## 功能特性
 
-- 🔄 **Anthropic Claude API** - 完整支持 `/v1/messages` 端点
-- 🤖 **OpenAI Chat API** - 兼容 `/v1/chat/completions`
-- ⚖️ **多账号池** - 轮询负载均衡
-- 🔐 **自动刷新 Token** - 无缝 Token 管理
-- 📡 **流式响应** - 实时 SSE 输出
-- 🎛️ **Web 管理面板** - 便捷的账号管理
-- 🔑 **多种认证方式** - AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON
-- 📊 **用量追踪** - 监控请求数、Token、Credits
-- 📦 **账号导入导出** - 兼容 Kiro Account Manager 格式
-- 🔄 **动态模型列表** - 自动从 Kiro API 同步并缓存
-- 🔔 **版本更新检测** - 自动提醒新版本
-- 🌐 **中英双语** - 管理面板支持中文 / 英文
+- Anthropic `/v1/messages` 与 OpenAI `/v1/chat/completions`
+- 多账号池轮询负载均衡
+- 自动 Token 刷新、SSE 流式输出、Web 管理面板
+- 多种认证方式：AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON
+- 用量追踪、账号导入导出、中英双语
 
 ## 快速开始
 
@@ -30,19 +25,13 @@
 ```bash
 git clone https://github.com/Quorinex/Kiro-Go.git
 cd Kiro-Go
-
-# 创建数据目录用于持久化
 mkdir -p data
-
 docker-compose up -d
 ```
 
 ### Docker 运行
 
 ```bash
-# 创建数据目录
-mkdir -p /path/to/data
-
 docker run -d \
   --name kiro-go \
   -p 8080:8080 \
@@ -52,8 +41,6 @@ docker run -d \
   ghcr.io/quorinex/kiro-go:latest
 ```
 
-> 📁 `/app/data` 卷存储 `config.json`（包含账号和设置），挂载此目录以实现数据持久化。
-
 ### 源码编译
 
 ```bash
@@ -63,22 +50,29 @@ go build -o kiro-go .
 ./kiro-go
 ```
 
-## 配置
+首次运行会在 `data/config.json` 自动生成配置，挂载 `/app/data` 以持久化。默认管理密码为 `changeme`，生产环境请务必通过 `ADMIN_PASSWORD` 环境变量或在管理面板中修改。
 
-首次运行会自动创建 `data/config.json`：
+## 使用方法
 
-```json
-{
-  "password": "changeme",
-  "port": 8080,
-  "host": "127.0.0.1",
-  "requireApiKey": false,
-  "apiKey": "",
-  "accounts": []
-}
+访问 `http://localhost:8080/admin` 登录、添加账号，然后调用 API：
+
+```bash
+# Claude
+curl http://localhost:8080/v1/messages \
+  -H "Content-Type: application/json" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{"model":"claude-sonnet-4.5","max_tokens":1024,"messages":[{"role":"user","content":"你好！"}]}'
+
+# OpenAI
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer any" \
+  -d '{"model":"gpt-4o","messages":[{"role":"user","content":"你好！"}]}'
 ```
 
-> ⚠️ **生产环境请务必修改默认密码！**
+## 思考模式
+
+在模型名后加后缀（默认 `-thinking`）即可启用，例如 `claude-sonnet-4.5-thinking`。输出格式可在管理面板「设置 - Thinking 模式」中配置。
 
 ## 环境变量
 
@@ -87,168 +81,17 @@ go build -o kiro-go .
 | `CONFIG_PATH` | 配置文件路径 | `data/config.json` |
 | `ADMIN_PASSWORD` | 管理面板密码（覆盖配置文件） | - |
 
-## 使用方法
+## 参与贡献
 
-### 1. 访问管理面板
+欢迎友好交流。遇到问题时，建议先让 Claude Code、Codex 等工具帮忙排查一下，大部分问题都能自己解决。如果能直接提个 PR 就更好了。
 
-打开 `http://localhost:8080/admin`，输入密码登录。
+## 友情链接
 
-### 2. 添加账号
-
-支持多种方式：
-
-| 方式 | 说明 |
-|------|------|
-| **AWS Builder ID** | 通过 AWS Builder ID 授权登录（个人账号） |
-| **IAM Identity Center (企业 SSO) 登录** | 通过 IAM Identity Center (企业 SSO) 授权登录（企业账号） |
-| **SSO Token** | 通过浏览器 `x-amz-sso_authn` Token 添加账号 |
-| **Kiro 本地缓存** | 通过 Kiro IDE 本地缓存文件添加账号 |
-| **凭证 JSON** | 通过 Kiro Account Manager 导出的凭证添加账号 |
-
-#### 凭证格式
-
-```json
-{
-  "refreshToken": "eyJ...",
-  "accessToken": "eyJ...",
-  "clientId": "xxx",
-  "clientSecret": "xxx"
-}
-```
-
-### 3. 调用 API
-
-#### Claude API
-
-```bash
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4-20250514",
-    "max_tokens": 1024,
-    "messages": [{"role": "user", "content": "你好！"}]
-  }'
-```
-
-#### OpenAI API
-
-```bash
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer any" \
-  -d '{
-    "model": "gpt-4o",
-    "messages": [{"role": "user", "content": "你好！"}]
-  }'
-```
-
-## 模型映射
-
-| 请求模型 | 实际模型 |
-|---------|---------|
-| `claude-sonnet-4-20250514` | claude-sonnet-4-20250514 |
-| `claude-sonnet-4.5` | claude-sonnet-4.5 |
-| `claude-haiku-4.5` | claude-haiku-4.5 |
-| `claude-opus-4.5` | claude-opus-4.5 |
-| `claude-opus-4.6` | claude-opus-4.6 |
-| `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 |
-| `gpt-3.5-turbo` | claude-sonnet-4-20250514 |
-
-## 思考模式
-
-在模型名称后添加后缀（默认：`-thinking`）即可启用扩展思考模式。
-
-### 使用方法
-
-```bash
-# OpenAI API 启用思考
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "messages": [{"role": "user", "content": "一步步解决：15 * 23"}],
-    "stream": true
-  }'
-
-# Claude API 启用思考
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "max_tokens": 4096,
-    "messages": [{"role": "user", "content": "分析这个问题"}]
-  }'
-```
-
-### 配置
-
-在管理面板的 **设置 > Thinking 模式设置** 中配置：
-
-| 设置 | 说明 | 选项 |
-|-----|------|------|
-| **触发后缀** | 启用思考的模型名称后缀 | 默认：`-thinking`（可自定义，如 `-think`、`-sikao`） |
-| **OpenAI 输出格式** | OpenAI API 中思考内容的返回方式 | `reasoning_content`（DeepSeek 兼容）、`<thinking>` 标签、`<think>` 标签 |
-| **Claude 输出格式** | Claude API 中思考内容的返回方式 | `<thinking>` 标签（默认）、`<think>` 标签、纯文本 |
-
-### 输出格式说明
-
-**OpenAI API (`/v1/chat/completions`)**：
-- `reasoning_content` - 思考内容放在单独的 `reasoning_content` 字段（DeepSeek 兼容）
-- `thinking` - 思考内容用 `<thinking>...</thinking>` 标签包裹在 content 中
-- `think` - 思考内容用 `<think>...</think>` 标签包裹在 content 中
-
-**Claude API (`/v1/messages`)**：
-- `thinking` - 思考内容用 `<thinking>...</thinking>` 标签包裹（默认）
-- `think` - 思考内容用 `<think>...</think>` 标签包裹
-- `reasoning_content` - 纯文本输出
-
-## API 端点
-
-| 端点 | 说明 |
-|-----|------|
-| `GET /health` | 健康检查 |
-| `GET /v1/models` | 模型列表 |
-| `GET /v1/stats` | 统计数据 |
-| `POST /v1/messages` | Claude Messages API |
-| `POST /v1/messages/count_tokens` | Token 计数 |
-| `POST /v1/chat/completions` | OpenAI Chat API |
-| `GET /admin` | 管理面板 |
-
-## 项目结构
-
-```
-Kiro-Go/
-├── main.go              # 入口
-├── version.json         # 版本信息（用于更新检测）
-├── config/              # 配置管理
-├── pool/                # 账号池 & 负载均衡
-├── proxy/               # API 处理 & Kiro 客户端
-│   ├── handler.go       # HTTP 路由 & 管理 API
-│   ├── kiro.go          # Kiro API 客户端
-│   ├── kiro_api.go      # Kiro REST API（用量、模型）
-│   └── translator.go    # 请求/响应转换
-├── auth/                # 认证
-│   ├── builderid.go     # AWS Builder ID 登录
-│   ├── iam_sso.go       # IAM SSO 登录
-│   ├── oidc.go          # OIDC Token 刷新
-│   └── sso_token.go     # SSO Token 导入
-├── web/                 # 管理面板前端
-├── Dockerfile
-└── docker-compose.yml
-```
+- [LINUX DO](https://linux.do)
 
 ## 免责声明
 
-本项目仅供**学习和研究目的**使用。
-
-- 本软件与 Amazon、AWS 或 Kiro 没有任何关联、认可或合作关系
-- 用户需自行确保其使用行为符合所有适用的服务条款和法律法规
-- 作者不对因使用本软件而产生的任何滥用或违规行为承担责任
-- 使用风险自负
-
-使用本软件即表示您已阅读并理解本免责声明。
+本项目仅供学习和研究目的使用，与 Amazon、AWS 或 Kiro 没有任何关联。用户需自行确保使用行为符合所有适用的服务条款和法律法规，使用风险自负。
 
 ## 许可证
 

From 9dbe0cb55f0fd8b163e6f54b83b0a2fef53eabd5 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 22:03:18 +0800
Subject: [PATCH 09/22] docs: simplify README and add contributing notes

---
 README.md    | 219 ++++++++-------------------------------------------
 README_CN.md | 219 ++++++++-------------------------------------------
 2 files changed, 62 insertions(+), 376 deletions(-)

diff --git a/README.md b/README.md
index b5d9b85..49a1263 100644
--- a/README.md
+++ b/README.md
@@ -8,20 +8,15 @@ Convert Kiro accounts to OpenAI / Anthropic compatible API service.
 
 [English](README.md) | [中文](README_CN.md)
 
+If this project helps you, a Star would mean a lot.
+
 ## Features
 
-- 🔄 **Anthropic Claude API** - Full support for `/v1/messages` endpoint
-- 🤖 **OpenAI Chat API** - Compatible with `/v1/chat/completions`
-- ⚖️ **Multi-Account Pool** - Round-robin load balancing
-- 🔐 **Auto Token Refresh** - Seamless token management
-- 📡 **Streaming** - Real-time SSE responses
-- 🎛️ **Web Admin Panel** - Easy account management
-- 🔑 **Multiple Auth Methods** - AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, Local Cache, Credentials
-- 📊 **Usage Tracking** - Monitor requests, tokens, and credits
-- 📦 **Account Export/Import** - Compatible with Kiro Account Manager format
-- 🔄 **Dynamic Model List** - Auto-synced from Kiro API with caching
-- 🔔 **Version Update Check** - Automatic new version notification
-- 🌐 **i18n** - Chinese / English admin panel
+- Anthropic `/v1/messages` & OpenAI `/v1/chat/completions`
+- Multi-account pool with round-robin load balancing
+- Auto token refresh, SSE streaming, Web admin panel
+- Multiple auth: AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, local cache, credentials JSON
+- Usage tracking, account import/export, i18n (CN / EN)
 
 ## Quick Start
 
@@ -30,19 +25,13 @@ Convert Kiro accounts to OpenAI / Anthropic compatible API service.
 ```bash
 git clone https://github.com/Quorinex/Kiro-Go.git
 cd Kiro-Go
-
-# Create data directory for persistence
 mkdir -p data
-
 docker-compose up -d
 ```
 
 ### Docker Run
 
 ```bash
-# Create data directory
-mkdir -p /path/to/data
-
 docker run -d \
   --name kiro-go \
   -p 8080:8080 \
@@ -52,8 +41,6 @@ docker run -d \
   ghcr.io/quorinex/kiro-go:latest
 ```
 
-> 📁 The `/app/data` volume stores `config.json` with accounts and settings. Mount it for data persistence.
-
 ### Build from Source
 
 ```bash
@@ -63,22 +50,29 @@ go build -o kiro-go .
 ./kiro-go
 ```
 
-## Configuration
+Config is auto-created at `data/config.json`. Mount `/app/data` for persistence. The default admin password is `changeme` — override it via the `ADMIN_PASSWORD` env var or change it in the admin panel before going to production.
 
-Config file is auto-created at `data/config.json` on first run:
+## Usage
 
-```json
-{
-  "password": "changeme",
-  "port": 8080,
-  "host": "127.0.0.1",
-  "requireApiKey": false,
-  "apiKey": "",
-  "accounts": []
-}
+Open `http://localhost:8080/admin`, log in, add accounts, then call the API:
+
+```bash
+# Claude
+curl http://localhost:8080/v1/messages \
+  -H "Content-Type: application/json" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{"model":"claude-sonnet-4.5","max_tokens":1024,"messages":[{"role":"user","content":"Hello!"}]}'
+
+# OpenAI
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer any" \
+  -d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello!"}]}'
 ```
 
-> ⚠️ **Change the default password before production use!**
+## Thinking Mode
+
+Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Configure output format in the admin panel under Settings - Thinking Mode.
 
 ## Environment Variables
 
@@ -87,168 +81,17 @@ Config file is auto-created at `data/config.json` on first run:
 | `CONFIG_PATH` | Config file path | `data/config.json` |
 | `ADMIN_PASSWORD` | Admin panel password (overrides config) | - |
 
-## Usage
+## Contributing
 
-### 1. Access Admin Panel
+Friendly discussion is welcome. If you run into issues, try asking Claude Code, Codex, or similar tools for help first — most problems can be solved that way. PRs are even better.
 
-Open `http://localhost:8080/admin` and login with your password.
+## Friend Links
 
-### 2. Add Accounts
-
-Multiple methods available:
-
-| Method | Description |
-|--------|-------------|
-| **AWS Builder ID** | Login with AWS Builder ID (personal accounts) |
-| **IAM Identity Center (Enterprise SSO)** | Login with IAM Identity Center (enterprise accounts) |
-| **SSO Token** | Import `x-amz-sso_authn` token from browser |
-| **Kiro Local Cache** | Import from local Kiro IDE cache files |
-| **Credentials JSON** | Import JSON from Kiro Account Manager |
-
-#### Credentials Format
-
-```json
-{
-  "refreshToken": "eyJ...",
-  "accessToken": "eyJ...",
-  "clientId": "xxx",
-  "clientSecret": "xxx"
-}
-```
-
-### 3. Call API
-
-#### Claude API
-
-```bash
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4-20250514",
-    "max_tokens": 1024,
-    "messages": [{"role": "user", "content": "Hello!"}]
-  }'
-```
-
-#### OpenAI API
-
-```bash
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer any" \
-  -d '{
-    "model": "gpt-4o",
-    "messages": [{"role": "user", "content": "Hello!"}]
-  }'
-```
-
-## Model Mapping
-
-| Request Model | Actual Model |
-|---------------|--------------|
-| `claude-sonnet-4-20250514` | claude-sonnet-4-20250514 |
-| `claude-sonnet-4.5` | claude-sonnet-4.5 |
-| `claude-haiku-4.5` | claude-haiku-4.5 |
-| `claude-opus-4.5` | claude-opus-4.5 |
-| `claude-opus-4.6` | claude-opus-4.6 |
-| `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 |
-| `gpt-3.5-turbo` | claude-sonnet-4-20250514 |
-
-## Thinking Mode
-
-Enable extended thinking by adding a suffix to the model name (default: `-thinking`).
-
-### Usage
-
-```bash
-# OpenAI API with thinking
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "messages": [{"role": "user", "content": "Solve this step by step: 15 * 23"}],
-    "stream": true
-  }'
-
-# Claude API with thinking
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "max_tokens": 4096,
-    "messages": [{"role": "user", "content": "Analyze this problem"}]
-  }'
-```
-
-### Configuration
-
-Configure thinking mode in the Admin Panel under **Settings > Thinking Mode Settings**:
-
-| Setting | Description | Options |
-|---------|-------------|---------|
-| **Trigger Suffix** | Model name suffix to enable thinking | Default: `-thinking` (customizable, e.g., `-think`, `-reason`) |
-| **OpenAI Output Format** | How thinking content is returned in OpenAI API | `reasoning_content` (DeepSeek compatible), `<thinking>` tag, `<think>` tag |
-| **Claude Output Format** | How thinking content is returned in Claude API | `<thinking>` tag (default), `<think>` tag, plain text |
-
-### Output Formats
-
-**OpenAI API (`/v1/chat/completions`)**:
-- `reasoning_content` - Thinking in separate `reasoning_content` field (DeepSeek compatible)
-- `thinking` - Thinking wrapped in `<thinking>...</thinking>` tags in content
-- `think` - Thinking wrapped in `<think>...</think>` tags in content
-
-**Claude API (`/v1/messages`)**:
-- `thinking` - Thinking wrapped in `<thinking>...</thinking>` tags (default)
-- `think` - Thinking wrapped in `<think>...</think>` tags
-- `reasoning_content` - Plain text output
-
-## API Endpoints
-
-| Endpoint | Description |
-|----------|-------------|
-| `GET /health` | Health check |
-| `GET /v1/models` | List models |
-| `GET /v1/stats` | Statistics |
-| `POST /v1/messages` | Claude Messages API |
-| `POST /v1/messages/count_tokens` | Token counting |
-| `POST /v1/chat/completions` | OpenAI Chat API |
-| `GET /admin` | Admin panel |
-
-## Project Structure
-
-```
-Kiro-Go/
-├── main.go              # Entry point
-├── version.json         # Version info for update check
-├── config/              # Configuration management
-├── pool/                # Account pool & load balancing
-├── proxy/               # API handlers & Kiro client
-│   ├── handler.go       # HTTP routing & admin API
-│   ├── kiro.go          # Kiro API client
-│   ├── kiro_api.go      # Kiro REST API (usage, models)
-│   └── translator.go    # Request/response conversion
-├── auth/                # Authentication
-│   ├── builderid.go     # AWS Builder ID login
-│   ├── iam_sso.go       # IAM SSO login
-│   ├── oidc.go          # OIDC token refresh
-│   └── sso_token.go     # SSO token import
-├── web/                 # Admin panel frontend
-├── Dockerfile
-└── docker-compose.yml
-```
+- [LINUX DO](https://linux.do)
 
 ## Disclaimer
 
-This project is provided for **educational and research purposes only**.
-
-- This software is not affiliated with, endorsed by, or associated with Amazon, AWS, or Kiro in any way
-- Users are solely responsible for ensuring their use complies with all applicable terms of service and laws
-- The authors assume no liability for any misuse or violations arising from the use of this software
-- Use at your own risk
-
-By using this software, you acknowledge that you have read and understood this disclaimer.
+For educational and research purposes only. Not affiliated with Amazon, AWS, or Kiro. Users are responsible for complying with applicable terms of service and laws. Use at your own risk.
 
 ## License
 
diff --git a/README_CN.md b/README_CN.md
index 750884b..b6b79d2 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -8,20 +8,15 @@
 
 [English](README.md) | 中文
 
+如果这个项目帮到了你，欢迎点个 Star 支持一下。
+
 ## 功能特性
 
-- 🔄 **Anthropic Claude API** - 完整支持 `/v1/messages` 端点
-- 🤖 **OpenAI Chat API** - 兼容 `/v1/chat/completions`
-- ⚖️ **多账号池** - 轮询负载均衡
-- 🔐 **自动刷新 Token** - 无缝 Token 管理
-- 📡 **流式响应** - 实时 SSE 输出
-- 🎛️ **Web 管理面板** - 便捷的账号管理
-- 🔑 **多种认证方式** - AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON
-- 📊 **用量追踪** - 监控请求数、Token、Credits
-- 📦 **账号导入导出** - 兼容 Kiro Account Manager 格式
-- 🔄 **动态模型列表** - 自动从 Kiro API 同步并缓存
-- 🔔 **版本更新检测** - 自动提醒新版本
-- 🌐 **中英双语** - 管理面板支持中文 / 英文
+- Anthropic `/v1/messages` 与 OpenAI `/v1/chat/completions`
+- 多账号池轮询负载均衡
+- 自动 Token 刷新、SSE 流式输出、Web 管理面板
+- 多种认证方式：AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON
+- 用量追踪、账号导入导出、中英双语
 
 ## 快速开始
 
@@ -30,19 +25,13 @@
 ```bash
 git clone https://github.com/Quorinex/Kiro-Go.git
 cd Kiro-Go
-
-# 创建数据目录用于持久化
 mkdir -p data
-
 docker-compose up -d
 ```
 
 ### Docker 运行
 
 ```bash
-# 创建数据目录
-mkdir -p /path/to/data
-
 docker run -d \
   --name kiro-go \
   -p 8080:8080 \
@@ -52,8 +41,6 @@ docker run -d \
   ghcr.io/quorinex/kiro-go:latest
 ```
 
-> 📁 `/app/data` 卷存储 `config.json`（包含账号和设置），挂载此目录以实现数据持久化。
-
 ### 源码编译
 
 ```bash
@@ -63,22 +50,29 @@ go build -o kiro-go .
 ./kiro-go
 ```
 
-## 配置
+首次运行会在 `data/config.json` 自动生成配置，挂载 `/app/data` 以持久化。默认管理密码为 `changeme`，生产环境请务必通过 `ADMIN_PASSWORD` 环境变量或在管理面板中修改。
 
-首次运行会自动创建 `data/config.json`：
+## 使用方法
 
-```json
-{
-  "password": "changeme",
-  "port": 8080,
-  "host": "127.0.0.1",
-  "requireApiKey": false,
-  "apiKey": "",
-  "accounts": []
-}
+访问 `http://localhost:8080/admin` 登录、添加账号，然后调用 API：
+
+```bash
+# Claude
+curl http://localhost:8080/v1/messages \
+  -H "Content-Type: application/json" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{"model":"claude-sonnet-4.5","max_tokens":1024,"messages":[{"role":"user","content":"你好！"}]}'
+
+# OpenAI
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer any" \
+  -d '{"model":"gpt-4o","messages":[{"role":"user","content":"你好！"}]}'
 ```
 
-> ⚠️ **生产环境请务必修改默认密码！**
+## 思考模式
+
+在模型名后加后缀（默认 `-thinking`）即可启用，例如 `claude-sonnet-4.5-thinking`。输出格式可在管理面板「设置 - Thinking 模式」中配置。
 
 ## 环境变量
 
@@ -87,168 +81,17 @@ go build -o kiro-go .
 | `CONFIG_PATH` | 配置文件路径 | `data/config.json` |
 | `ADMIN_PASSWORD` | 管理面板密码（覆盖配置文件） | - |
 
-## 使用方法
+## 参与贡献
 
-### 1. 访问管理面板
+欢迎友好交流。遇到问题时，建议先让 Claude Code、Codex 等工具帮忙排查一下，大部分问题都能自己解决。如果能直接提个 PR 就更好了。
 
-打开 `http://localhost:8080/admin`，输入密码登录。
+## 友情链接
 
-### 2. 添加账号
-
-支持多种方式：
-
-| 方式 | 说明 |
-|------|------|
-| **AWS Builder ID** | 通过 AWS Builder ID 授权登录（个人账号） |
-| **IAM Identity Center (企业 SSO) 登录** | 通过 IAM Identity Center (企业 SSO) 授权登录（企业账号） |
-| **SSO Token** | 通过浏览器 `x-amz-sso_authn` Token 添加账号 |
-| **Kiro 本地缓存** | 通过 Kiro IDE 本地缓存文件添加账号 |
-| **凭证 JSON** | 通过 Kiro Account Manager 导出的凭证添加账号 |
-
-#### 凭证格式
-
-```json
-{
-  "refreshToken": "eyJ...",
-  "accessToken": "eyJ...",
-  "clientId": "xxx",
-  "clientSecret": "xxx"
-}
-```
-
-### 3. 调用 API
-
-#### Claude API
-
-```bash
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4-20250514",
-    "max_tokens": 1024,
-    "messages": [{"role": "user", "content": "你好！"}]
-  }'
-```
-
-#### OpenAI API
-
-```bash
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer any" \
-  -d '{
-    "model": "gpt-4o",
-    "messages": [{"role": "user", "content": "你好！"}]
-  }'
-```
-
-## 模型映射
-
-| 请求模型 | 实际模型 |
-|---------|---------|
-| `claude-sonnet-4-20250514` | claude-sonnet-4-20250514 |
-| `claude-sonnet-4.5` | claude-sonnet-4.5 |
-| `claude-haiku-4.5` | claude-haiku-4.5 |
-| `claude-opus-4.5` | claude-opus-4.5 |
-| `claude-opus-4.6` | claude-opus-4.6 |
-| `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 |
-| `gpt-3.5-turbo` | claude-sonnet-4-20250514 |
-
-## 思考模式
-
-在模型名称后添加后缀（默认：`-thinking`）即可启用扩展思考模式。
-
-### 使用方法
-
-```bash
-# OpenAI API 启用思考
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "messages": [{"role": "user", "content": "一步步解决：15 * 23"}],
-    "stream": true
-  }'
-
-# Claude API 启用思考
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "max_tokens": 4096,
-    "messages": [{"role": "user", "content": "分析这个问题"}]
-  }'
-```
-
-### 配置
-
-在管理面板的 **设置 > Thinking 模式设置** 中配置：
-
-| 设置 | 说明 | 选项 |
-|-----|------|------|
-| **触发后缀** | 启用思考的模型名称后缀 | 默认：`-thinking`（可自定义，如 `-think`、`-sikao`） |
-| **OpenAI 输出格式** | OpenAI API 中思考内容的返回方式 | `reasoning_content`（DeepSeek 兼容）、`<thinking>` 标签、`<think>` 标签 |
-| **Claude 输出格式** | Claude API 中思考内容的返回方式 | `<thinking>` 标签（默认）、`<think>` 标签、纯文本 |
-
-### 输出格式说明
-
-**OpenAI API (`/v1/chat/completions`)**：
-- `reasoning_content` - 思考内容放在单独的 `reasoning_content` 字段（DeepSeek 兼容）
-- `thinking` - 思考内容用 `<thinking>...</thinking>` 标签包裹在 content 中
-- `think` - 思考内容用 `<think>...</think>` 标签包裹在 content 中
-
-**Claude API (`/v1/messages`)**：
-- `thinking` - 思考内容用 `<thinking>...</thinking>` 标签包裹（默认）
-- `think` - 思考内容用 `<think>...</think>` 标签包裹
-- `reasoning_content` - 纯文本输出
-
-## API 端点
-
-| 端点 | 说明 |
-|-----|------|
-| `GET /health` | 健康检查 |
-| `GET /v1/models` | 模型列表 |
-| `GET /v1/stats` | 统计数据 |
-| `POST /v1/messages` | Claude Messages API |
-| `POST /v1/messages/count_tokens` | Token 计数 |
-| `POST /v1/chat/completions` | OpenAI Chat API |
-| `GET /admin` | 管理面板 |
-
-## 项目结构
-
-```
-Kiro-Go/
-├── main.go              # 入口
-├── version.json         # 版本信息（用于更新检测）
-├── config/              # 配置管理
-├── pool/                # 账号池 & 负载均衡
-├── proxy/               # API 处理 & Kiro 客户端
-│   ├── handler.go       # HTTP 路由 & 管理 API
-│   ├── kiro.go          # Kiro API 客户端
-│   ├── kiro_api.go      # Kiro REST API（用量、模型）
-│   └── translator.go    # 请求/响应转换
-├── auth/                # 认证
-│   ├── builderid.go     # AWS Builder ID 登录
-│   ├── iam_sso.go       # IAM SSO 登录
-│   ├── oidc.go          # OIDC Token 刷新
-│   └── sso_token.go     # SSO Token 导入
-├── web/                 # 管理面板前端
-├── Dockerfile
-└── docker-compose.yml
-```
+- [LINUX DO](https://linux.do)
 
 ## 免责声明
 
-本项目仅供**学习和研究目的**使用。
-
-- 本软件与 Amazon、AWS 或 Kiro 没有任何关联、认可或合作关系
-- 用户需自行确保其使用行为符合所有适用的服务条款和法律法规
-- 作者不对因使用本软件而产生的任何滥用或违规行为承担责任
-- 使用风险自负
-
-使用本软件即表示您已阅读并理解本免责声明。
+本项目仅供学习和研究目的使用，与 Amazon、AWS 或 Kiro 没有任何关联。用户需自行确保使用行为符合所有适用的服务条款和法律法规，使用风险自负。
 
 ## 许可证
 

From 496b14df3fbf4110d8837821041b0c8b4a191913 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Mon, 11 May 2026 15:05:20 +0800
Subject: [PATCH 10/22] fix: improve prompt cache tracking

---
 LICENSE                     |  21 ++++++
 proxy/cache_tracker.go      | 141 +++++++++++++++++++++++++++++++-----
 proxy/cache_tracker_test.go | 107 ++++++++++++++++++++++++++-
 3 files changed, 251 insertions(+), 18 deletions(-)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1bf685b
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Quorinex
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/proxy/cache_tracker.go b/proxy/cache_tracker.go
index 338f208..582754b 100644
--- a/proxy/cache_tracker.go
+++ b/proxy/cache_tracker.go
@@ -13,6 +13,13 @@ import (
 
 const defaultPromptCacheTTL = 5 * time.Minute
 
+// Anthropic requires cached prefixes to reach a minimum token count before
+// caching takes effect. Breakpoints below this threshold are excluded from
+// matching and storage to avoid reporting unrealistic 100% cache hits on
+// short requests.
+const defaultMinCacheableTokens = 1024
+const opusMinCacheableTokens = 4096
+
 type promptCacheUsage struct {
 	CacheCreationInputTokens   int
 	CacheReadInputTokens       int
@@ -29,6 +36,15 @@ type promptCacheBreakpoint struct {
 type promptCacheProfile struct {
 	Breakpoints      []promptCacheBreakpoint
 	TotalInputTokens int
+	Model            string
+}
+
+func minCacheableTokensForModel(model string) int {
+	lower := strings.ToLower(model)
+	if strings.Contains(lower, "opus") {
+		return opusMinCacheableTokens
+	}
+	return defaultMinCacheableTokens
 }
 
 type promptCacheEntry struct {
@@ -61,13 +77,27 @@ func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTo
 	hasher := sha256.New()
 	breakpoints := make([]promptCacheBreakpoint, 0)
 	cumulativeTokens := 0
+	var activeTTL time.Duration
 
 	for _, block := range blocks {
 		canonical := canonicalizeCacheValue(block.Value)
 		writeHashChunk(hasher, canonical)
 		cumulativeTokens += block.Tokens
 
-		if block.TTL <= 0 {
+		// Determine whether this block acts as a cache breakpoint:
+		//   1) Explicit cache_control on the block itself.
+		//   2) Once any explicit breakpoint has been seen, every message-end
+		//      boundary becomes an implicit breakpoint so that multi-turn
+		//      conversations can hit earlier stored prefixes.
+		breakpointTTL := time.Duration(0)
+		if block.TTL > 0 {
+			breakpointTTL = block.TTL
+			activeTTL = block.TTL
+		} else if block.IsMessageEnd && activeTTL > 0 {
+			breakpointTTL = activeTTL
+		}
+
+		if breakpointTTL <= 0 {
 			continue
 		}
 
@@ -76,7 +106,7 @@ func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTo
 		breakpoints = append(breakpoints, promptCacheBreakpoint{
 			Fingerprint:      fingerprint,
 			CumulativeTokens: cumulativeTokens,
-			TTL:              block.TTL,
+			TTL:              breakpointTTL,
 		})
 	}
 
@@ -91,6 +121,7 @@ func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTo
 	return &promptCacheProfile{
 		Breakpoints:      breakpoints,
 		TotalInputTokens: totalInputTokens,
+		Model:            req.Model,
 	}
 }
 
@@ -99,6 +130,7 @@ func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfi
 		return promptCacheUsage{}
 	}
 
+	minTokens := minCacheableTokensForModel(profile.Model)
 	last := profile.Breakpoints[len(profile.Breakpoints)-1]
 	lastTokens := minInt(last.CumulativeTokens, profile.TotalInputTokens)
 	now := time.Now()
@@ -109,18 +141,35 @@ func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfi
 
 	entries := t.entriesByAccount[accountID]
 	if len(entries) == 0 {
+		// First request for this account: report creation only if above threshold.
+		effectiveCreation := lastTokens
+		if effectiveCreation < minTokens {
+			effectiveCreation = 0
+		}
 		cache5m, cache1h := computePromptCacheTTLBreakdown(profile, 0)
 		return promptCacheUsage{
-			CacheCreationInputTokens:   lastTokens,
+			CacheCreationInputTokens:   effectiveCreation,
 			CacheReadInputTokens:       0,
 			CacheCreation5mInputTokens: cache5m,
 			CacheCreation1hInputTokens: cache1h,
 		}
 	}
 
+	// Cap cacheable tokens at 85% of total input to ensure a realistic
+	// uncached portion. The newest content in a request is never fully
+	// served from cache on the current turn.
+	maxCacheable := int(float64(profile.TotalInputTokens) * 0.85)
+	if lastTokens > maxCacheable {
+		lastTokens = maxCacheable
+	}
+
 	matchedTokens := 0
 	for i := len(profile.Breakpoints) - 1; i >= 0; i-- {
 		breakpoint := profile.Breakpoints[i]
+		// Skip breakpoints below the minimum cacheable token threshold.
+		if breakpoint.CumulativeTokens < minTokens {
+			continue
+		}
 		entry, ok := entries[breakpoint.Fingerprint]
 		if !ok || entry.ExpiresAt.Before(now) {
 			continue
@@ -128,6 +177,9 @@ func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfi
 		entry.ExpiresAt = now.Add(entry.TTL)
 		entries[breakpoint.Fingerprint] = entry
 		matchedTokens = minInt(breakpoint.CumulativeTokens, profile.TotalInputTokens)
+		if matchedTokens > lastTokens {
+			matchedTokens = lastTokens
+		}
 		break
 	}
 
@@ -146,6 +198,7 @@ func (t *promptCacheTracker) Update(accountID string, profile *promptCacheProfil
 		return
 	}
 
+	minTokens := minCacheableTokensForModel(profile.Model)
 	now := time.Now()
 	t.mu.Lock()
 	defer t.mu.Unlock()
@@ -158,6 +211,10 @@ func (t *promptCacheTracker) Update(accountID string, profile *promptCacheProfil
 	}
 
 	for _, breakpoint := range profile.Breakpoints {
+		// Skip breakpoints below the minimum cacheable token threshold.
+		if breakpoint.CumulativeTokens < minTokens {
+			continue
+		}
 		entries[breakpoint.Fingerprint] = promptCacheEntry{
 			ExpiresAt: now.Add(breakpoint.TTL),
 			TTL:       breakpoint.TTL,
@@ -179,9 +236,10 @@ func (t *promptCacheTracker) pruneExpiredLocked(now time.Time) {
 }
 
 type cacheablePromptBlock struct {
-	Value  interface{}
-	Tokens int
-	TTL    time.Duration
+	Value        interface{}
+	Tokens       int
+	TTL          time.Duration
+	IsMessageEnd bool
 }
 
 func flattenClaudeCacheBlocks(req *ClaudeRequest) []cacheablePromptBlock {
@@ -234,14 +292,14 @@ func appendSystemCacheBlocks(blocks *[]cacheablePromptBlock, system interface{})
 				"type": "text",
 				"text": v,
 			},
-		})
+		}, false)
 	case []interface{}:
 		for i, block := range v {
 			appendPromptBlock(blocks, map[string]interface{}{
 				"kind":         "system",
 				"system_index": i,
 				"block":        block,
-			})
+			}, false)
 		}
 	case []string:
 		for i, block := range v {
@@ -252,7 +310,7 @@ func appendSystemCacheBlocks(blocks *[]cacheablePromptBlock, system interface{})
 					"type": "text",
 					"text": block,
 				},
-			})
+			}, false)
 		}
 	}
 }
@@ -270,8 +328,9 @@ func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int,
 				"type": "text",
 				"text": content,
 			},
-		})
+		}, true)
 	case []interface{}:
+		lastIdx := len(content) - 1
 		for blockIndex, block := range content {
 			appendPromptBlock(blocks, map[string]interface{}{
 				"kind":          "message",
@@ -279,7 +338,7 @@ func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int,
 				"role":          role,
 				"block_index":   blockIndex,
 				"block":         block,
-			})
+			}, blockIndex == lastIdx)
 		}
 	default:
 		if content != nil {
@@ -289,22 +348,70 @@ func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int,
 				"role":          role,
 				"block_index":   0,
 				"block":         content,
-			})
+			}, true)
 		}
 	}
 }
 
-func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interface{}) {
-	blockValue, _ := wrapper["block"]
+func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interface{}, isMessageEnd bool) {
+	blockValue := wrapper["block"]
 	ttl := normalizePromptCacheTTL(extractPromptCacheTTL(blockValue))
+
+	// Normalize volatile text (e.g. Claude Code's x-anthropic-billing-header
+	// which drifts on every request) so that fingerprints remain stable across
+	// requests within the same conversation.
+	if normalized, changed := normalizeCacheBlockContent(blockValue); changed {
+		cloned := make(map[string]interface{}, len(wrapper))
+		for k, v := range wrapper {
+			cloned[k] = v
+		}
+		cloned["block"] = normalized
+		wrapper = cloned
+	}
+
 	canonical := canonicalizeCacheValue(wrapper)
 	*blocks = append(*blocks, cacheablePromptBlock{
-		Value:  wrapper,
-		Tokens: estimateApproxTokens(canonical),
-		TTL:    ttl,
+		Value:        wrapper,
+		Tokens:       estimateApproxTokens(canonical),
+		TTL:          ttl,
+		IsMessageEnd: isMessageEnd,
 	})
 }
 
+// normalizeCacheBlockContent replaces volatile but semantically irrelevant
+// fields with a placeholder so that the cumulative fingerprint stays stable
+// across requests in the same session. Currently handles:
+//   - Claude Code's "x-anthropic-billing-header: ..." system text block
+//     whose content drifts on every request (version, telemetry hash, etc.)
+func normalizeCacheBlockContent(value interface{}) (interface{}, bool) {
+	blockMap, ok := value.(map[string]interface{})
+	if !ok {
+		return value, false
+	}
+
+	// Only normalize text blocks (or blocks without an explicit type but containing text).
+	if t, ok := blockMap["type"].(string); ok && t != "" && t != "text" {
+		return value, false
+	}
+
+	text, ok := blockMap["text"].(string)
+	if !ok {
+		return value, false
+	}
+
+	trimmed := strings.TrimLeft(text, " \t\r\n")
+	if !strings.HasPrefix(strings.ToLower(trimmed), "x-anthropic-billing-header:") {
+		return value, false
+	}
+
+	cloned := make(map[string]interface{}, len(blockMap))
+	for k, v := range blockMap {
+		cloned[k] = v
+	}
+	cloned["text"] = "__anthropic_billing_header__"
+	return cloned, true
+}
+
 func extractPromptCacheTTL(value interface{}) time.Duration {
 	block, ok := value.(map[string]interface{})
 	if !ok {
diff --git a/proxy/cache_tracker_test.go b/proxy/cache_tracker_test.go
index 1beba02..aa620c8 100644
--- a/proxy/cache_tracker_test.go
+++ b/proxy/cache_tracker_test.go
@@ -1,18 +1,20 @@
 package proxy
 
 import (
+	"strings"
 	"testing"
 	"time"
 )
 
 func TestPromptCacheTrackerComputeAndUpdate(t *testing.T) {
 	tracker := newPromptCacheTracker(time.Hour)
+	longSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
 	req := &ClaudeRequest{
 		Model: "claude-sonnet-4.5",
 		System: []interface{}{
 			map[string]interface{}{
 				"type": "text",
-				"text": "system prompt",
+				"text": longSystem,
 				"cache_control": map[string]interface{}{
 					"type": "ephemeral",
 				},
@@ -71,3 +73,106 @@ func TestBuildClaudeUsageMapIncludesCacheFields(t *testing.T) {
 		t.Fatalf("unexpected ttl breakdown: %#v", creation)
 	}
 }
+
+// TestPromptCacheStableAcrossBillingHeaderDrift verifies that Claude Code's
+// per-request "x-anthropic-billing-header: cc_version=...; cch=...;" system
+// block (whose content drifts on every request) does not break cache hits.
+// The normalization logic should ensure the same conversation still matches.
+func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
+	tracker := newPromptCacheTracker(time.Hour)
+	mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
+
+	build := func(billingHdr string) *ClaudeRequest {
+		return &ClaudeRequest{
+			Model: "claude-sonnet-4.5",
+			System: []interface{}{
+				map[string]interface{}{
+					"type": "text",
+					"text": billingHdr,
+				},
+				map[string]interface{}{
+					"type": "text",
+					"text": mainSystem,
+					"cache_control": map[string]interface{}{
+						"type": "ephemeral",
+					},
+				},
+			},
+			Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
+		}
+	}
+
+	req1 := build("x-anthropic-billing-header: cc_version=2.1.87.1; cch=aaaa;")
+	profile1 := tracker.BuildClaudeProfile(req1, 2048)
+	if profile1 == nil {
+		t.Fatalf("profile1 should be built")
+	}
+	first := tracker.Compute("acct-1", profile1)
+	if first.CacheReadInputTokens != 0 {
+		t.Fatalf("expected no cache read on first request, got %+v", first)
+	}
+	tracker.Update("acct-1", profile1)
+
+	req2 := build("x-anthropic-billing-header: cc_version=2.1.87.42; cch=bbbb; padding=xxyyzz;")
+	profile2 := tracker.BuildClaudeProfile(req2, 2048)
+	if profile2 == nil {
+		t.Fatalf("profile2 should be built")
+	}
+	second := tracker.Compute("acct-1", profile2)
+	if second.CacheReadInputTokens == 0 {
+		t.Fatalf("expected cache read after billing header drift, got %+v", second)
+	}
+}
+
+// TestPromptCacheImplicitBreakpointAtMessageEnd verifies that once any
+// explicit cache_control breakpoint has been seen, subsequent message-end
+// boundaries act as implicit breakpoints. This allows multi-turn conversations
+// to hit earlier stored prefix fingerprints even when the newest messages
+// lack explicit cache_control.
+func TestPromptCacheImplicitBreakpointAtMessageEnd(t *testing.T) {
+	tracker := newPromptCacheTracker(time.Hour)
+	systemText := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
+
+	baseSystem := []interface{}{
+		map[string]interface{}{
+			"type": "text",
+			"text": systemText,
+			"cache_control": map[string]interface{}{
+				"type": "ephemeral",
+			},
+		},
+	}
+
+	// Round 1: single user message.
+	req1 := &ClaudeRequest{
+		Model:    "claude-sonnet-4.5",
+		System:   baseSystem,
+		Messages: []ClaudeMessage{{Role: "user", Content: "question one"}},
+	}
+	profile1 := tracker.BuildClaudeProfile(req1, 2048)
+	if profile1 == nil {
+		t.Fatalf("profile1 should be built")
+	}
+	tracker.Update("acct-1", profile1)
+
+	// Round 2: conversation continues with new messages. The latest user
+	// message has no explicit cache_control; it should still hit the stored
+	// prefix via the implicit message-end breakpoint.
+	req2 := &ClaudeRequest{
+		Model:  "claude-sonnet-4.5",
+		System: baseSystem,
+		Messages: []ClaudeMessage{
+			{Role: "user", Content: "question one"},
+			{Role: "assistant", Content: "answer one"},
+			{Role: "user", Content: "follow-up question"},
+		},
+	}
+	profile2 := tracker.BuildClaudeProfile(req2, 4096)
+	if profile2 == nil {
+		t.Fatalf("profile2 should be built")
+	}
+	result := tracker.Compute("acct-1", profile2)
+	if result.CacheReadInputTokens == 0 {
+		t.Fatalf("expected cache read via implicit message-end breakpoint, got %+v", result)
+	}
+}

From 31aa6aa4215035e894ac8a0476f2c8c01f6076db Mon Sep 17 00:00:00 2001
From: Naive YH <ahao12399@126.com>
Date: Mon, 11 May 2026 17:23:21 +0800
Subject: [PATCH 11/22] fix: accurate input_tokens via contextUsageEvent +
 smart routing for SDK clients

---
 proxy/handler.go | 330 ++++++++++++++++++++++++++++++++++++++++++++++-
 proxy/kiro.go    |  27 +++-
 2 files changed, 346 insertions(+), 11 deletions(-)

diff --git a/proxy/handler.go b/proxy/handler.go
index 85afc5e..cff78fe 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -262,6 +262,12 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 			return
 		}
 		h.handleClaudeMessages(w, r)
+	case path == "/cc/v1/messages":
+		if !h.validateApiKey(r) {
+			h.sendClaudeError(w, 401, "authentication_error", "Invalid or missing API key")
+			return
+		}
+		h.handleClaudeMessagesBuffered(w, r)
 	case path == "/v1/messages/count_tokens" || path == "/messages/count_tokens":
 		if !h.validateApiKey(r) {
 			h.sendClaudeError(w, 401, "authentication_error", "Invalid or missing API key")
@@ -631,9 +637,13 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
 	// 转换请求
 	kiroPayload := ClaudeToKiro(&req, thinking)
 
-	// 流式或非流式
+	// 流式或非流式；SDK 客户端（Claude Code、opencode 等）自动使用缓冲模式以获取精确 message_start
 	if req.Stream {
-		h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
+		if isAnthropicSDKRequest(r) {
+			h.handleClaudeStreamBuffered(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
+		} else {
+			h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
+		}
 	} else {
 		h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
 	}
@@ -657,6 +667,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 	msgID := "msg_" + uuid.New().String()
 	var inputTokens, outputTokens int
 	var credits float64
+	var realInputTokens int
 	var toolUses []KiroToolUse
 	var nextContentIndex int
 	var rawContentBuilder strings.Builder
@@ -978,6 +989,9 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 		OnCredits: func(c float64) {
 			credits = c
 		},
+		OnContextUsage: func(pct float64) {
+			realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0)
+		},
 	}
 
 	err := CallKiroAPI(account, payload, callback)
@@ -999,7 +1013,9 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 	}
 	closeActiveBlock()
 
-	if inputTokens <= 0 {
+	if realInputTokens > 0 {
+		inputTokens = realInputTokens
+	} else if inputTokens <= 0 {
 		inputTokens = estimatedInputTokens
 	}
 	outputContent, extractedReasoning := extractThinkingFromContent(rawContentBuilder.String())
@@ -1042,6 +1058,290 @@ func (h *Handler) sendSSE(w http.ResponseWriter, flusher http.Flusher, event str
 	flusher.Flush()
 }
 
+// isAnthropicSDKRequest 检测请求是否来自基于 Anthropic 官方 SDK 的客户端
+// (Claude Code、opencode、Roo Code 等)，这类客户端读取 message_start.input_tokens 来展示上下文用量
+func isAnthropicSDKRequest(r *http.Request) bool {
+	if r.Header.Get("x-stainless-lang") != "" {
+		return true
+	}
+	ua := strings.ToLower(r.Header.Get("User-Agent"))
+	return strings.Contains(ua, "claude") || strings.Contains(ua, "anthropic-sdk")
+}
+
+// handleClaudeMessagesBuffered Claude API 缓冲模式处理（/cc/v1/messages 及自动识别的 SDK 客户端）
+func (h *Handler) handleClaudeMessagesBuffered(w http.ResponseWriter, r *http.Request) {
+	h.handleClaudeMessagesInternalBuffered(w, r)
+}
+
+func (h *Handler) handleClaudeMessagesInternalBuffered(w http.ResponseWriter, r *http.Request) {
+	if r.Method != "POST" {
+		http.Error(w, "Method Not Allowed", 405)
+		return
+	}
+
+	body, err := io.ReadAll(r.Body)
+	if err != nil {
+		h.sendClaudeError(w, 400, "invalid_request_error", "Failed to read request body")
+		return
+	}
+
+	var req ClaudeRequest
+	if err := json.Unmarshal(body, &req); err != nil {
+		h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON: "+err.Error())
+		return
+	}
+	if msg := validateClaudeRequestShape(&req); msg != "" {
+		h.sendClaudeError(w, 400, "invalid_request_error", msg)
+		return
+	}
+
+	account := h.pool.GetNext()
+	if account == nil {
+		h.sendClaudeError(w, 503, "api_error", "No available accounts")
+		return
+	}
+
+	if err := h.ensureValidToken(account); err != nil {
+		h.sendClaudeError(w, 503, "api_error", "Token refresh failed: "+err.Error())
+		return
+	}
+
+	thinkingCfg := config.GetThinkingConfig()
+	actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix)
+	req.Model = actualModel
+	estimatedInputTokens := estimateClaudeRequestInputTokens(&req)
+	cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens)
+	cacheUsage := h.promptCache.Compute(account.ID, cacheProfile)
+
+	kiroPayload := ClaudeToKiro(&req, thinking)
+
+	if req.Stream {
+		h.handleClaudeStreamBuffered(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
+	} else {
+		h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
+	}
+}
+
+// handleClaudeStreamBuffered Claude 缓冲流式响应
+// 等待上游流完成后得到精确 input_tokens，回填 message_start 后一次性推送所有 SSE 事件
+// 等待期间每 25 秒发送 ping 事件保活
+func (h *Handler) handleClaudeStreamBuffered(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
+	w.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		h.sendClaudeError(w, 500, "api_error", "Streaming not supported")
+		return
+	}
+
+	// ping 保活 goroutine（25 秒间隔，防止客户端超时断开）
+	pingStop := make(chan struct{})
+	var stopOnce sync.Once
+	stopPing := func() { stopOnce.Do(func() { close(pingStop) }) }
+	defer stopPing()
+
+	go func() {
+		ticker := time.NewTicker(25 * time.Second)
+		defer ticker.Stop()
+		for {
+			select {
+			case <-ticker.C:
+				fmt.Fprintf(w, "event: ping\ndata: {}\n\n")
+				flusher.Flush()
+			case <-pingStop:
+				return
+			}
+		}
+	}()
+
+	// 缓冲阶段：收集所有内容
+	var contentBuilder strings.Builder
+	var thinkingBuilder strings.Builder
+	var toolUses []KiroToolUse
+	var inputTokens, outputTokens int
+	var credits float64
+	var realInputTokens int
+
+	callback := &KiroStreamCallback{
+		OnText: func(text string, isThinking bool) {
+			if isThinking {
+				thinkingBuilder.WriteString(text)
+			} else {
+				contentBuilder.WriteString(text)
+			}
+		},
+		OnToolUse: func(tu KiroToolUse) {
+			toolUses = append(toolUses, tu)
+		},
+		OnComplete: func(inTok, outTok int) {
+			inputTokens = inTok
+			outputTokens = outTok
+		},
+		OnError: func(err error) {
+			h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "quota"))
+		},
+		OnCredits: func(c float64) {
+			credits = c
+		},
+		OnContextUsage: func(pct float64) {
+			realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0)
+		},
+	}
+
+	err := CallKiroAPI(account, payload, callback)
+	stopPing()
+
+	if err != nil {
+		h.recordFailure()
+		h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "quota"))
+		h.sendSSE(w, flusher, "error", map[string]interface{}{
+			"type":  "error",
+			"error": map[string]string{"type": "api_error", "message": err.Error()},
+		})
+		return
+	}
+
+	// 确定精确 input_tokens
+	finalInputTokens := estimatedInputTokens
+	if realInputTokens > 0 {
+		finalInputTokens = realInputTokens
+	} else if inputTokens > 0 {
+		finalInputTokens = inputTokens
+	}
+
+	// 处理 thinking 内容
+	thinkingFormat := config.GetThinkingConfig().ClaudeFormat
+	rawContent := contentBuilder.String()
+	rawThinking := thinkingBuilder.String()
+	outputContent, extractedReasoning := extractThinkingFromContent(rawContent)
+	thinkingOutput := rawThinking
+	if thinking && thinkingOutput == "" && extractedReasoning != "" {
+		thinkingOutput = extractedReasoning
+	}
+	if !thinking {
+		thinkingOutput = ""
+	}
+	outputTokens = estimateClaudeOutputTokens(outputContent, thinkingOutput, toolUses)
+
+	h.recordSuccess(finalInputTokens, outputTokens, credits)
+	h.pool.RecordSuccess(account.ID)
+	h.pool.UpdateStats(account.ID, finalInputTokens+outputTokens, credits)
+	h.promptCache.Update(account.ID, cacheProfile)
+
+	msgID := "msg_" + uuid.New().String()
+	contentIndex := 0
+
+	// 推送阶段：message_start 携带精确 input_tokens
+	h.sendSSE(w, flusher, "message_start", map[string]interface{}{
+		"type": "message_start",
+		"message": map[string]interface{}{
+			"id":            msgID,
+			"type":          "message",
+			"role":          "assistant",
+			"content":       []interface{}{},
+			"model":         model,
+			"stop_reason":   nil,
+			"stop_sequence": nil,
+			"usage":         buildClaudeUsageMap(finalInputTokens, 0, cacheUsage, cacheProfile != nil),
+		},
+	})
+	h.sendSSE(w, flusher, "ping", map[string]interface{}{"type": "ping"})
+
+	// 推送 thinking 块
+	if thinking && thinkingOutput != "" {
+		switch thinkingFormat {
+		case "think":
+			h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
+				"type": "content_block_start", "index": contentIndex,
+				"content_block": map[string]string{"type": "text", "text": ""},
+			})
+			h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
+				"type": "content_block_delta", "index": contentIndex,
+				"delta": map[string]string{"type": "text_delta", "text": "<think>" + thinkingOutput + "</think>"},
+			})
+			h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
+				"type": "content_block_stop", "index": contentIndex,
+			})
+			contentIndex++
+		case "reasoning_content":
+			h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
+				"type": "content_block_start", "index": contentIndex,
+				"content_block": map[string]string{"type": "text", "text": ""},
+			})
+			h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
+				"type": "content_block_delta", "index": contentIndex,
+				"delta": map[string]string{"type": "text_delta", "text": thinkingOutput},
+			})
+			h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
+				"type": "content_block_stop", "index": contentIndex,
+			})
+			contentIndex++
+		default: // native thinking block
+			h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
+				"type": "content_block_start", "index": contentIndex,
+				"content_block": map[string]string{"type": "thinking", "thinking": ""},
+			})
+			h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
+				"type": "content_block_delta", "index": contentIndex,
+				"delta": map[string]string{"type": "thinking_delta", "thinking": thinkingOutput},
+			})
+			h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
+				"type": "content_block_stop", "index": contentIndex,
+			})
+			contentIndex++
+		}
+	}
+
+	// 推送文本块
+	if outputContent != "" {
+		h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
+			"type": "content_block_start", "index": contentIndex,
+			"content_block": map[string]string{"type": "text", "text": ""},
+		})
+		h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
+			"type": "content_block_delta", "index": contentIndex,
+			"delta": map[string]string{"type": "text_delta", "text": outputContent},
+		})
+		h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
+			"type": "content_block_stop", "index": contentIndex,
+		})
+		contentIndex++
+	}
+
+	// 推送工具调用块
+	for _, tu := range toolUses {
+		inputJSON, _ := json.Marshal(tu.Input)
+		h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
+			"type": "content_block_start", "index": contentIndex,
+			"content_block": map[string]interface{}{
+				"type": "tool_use", "id": tu.ToolUseID, "name": tu.Name, "input": map[string]interface{}{},
+			},
+		})
+		h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
+			"type": "content_block_delta", "index": contentIndex,
+			"delta": map[string]interface{}{"type": "input_json_delta", "partial_json": string(inputJSON)},
+		})
+		h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
+			"type": "content_block_stop", "index": contentIndex,
+		})
+		contentIndex++
+	}
+
+	stopReason := "end_turn"
+	if len(toolUses) > 0 {
+		stopReason = "tool_use"
+	}
+
+	h.sendSSE(w, flusher, "message_delta", map[string]interface{}{
+		"type":  "message_delta",
+		"delta": map[string]interface{}{"stop_reason": stopReason},
+		"usage": buildClaudeUsageMap(finalInputTokens, outputTokens, cacheUsage, cacheProfile != nil),
+	})
+	h.sendSSE(w, flusher, "message_stop", map[string]interface{}{"type": "message_stop"})
+}
+
 // backgroundStatsSaver 后台定时保存统计数据
 func (h *Handler) backgroundStatsSaver() {
 	ticker := time.NewTicker(30 * time.Second)
@@ -1103,6 +1403,7 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
 	var toolUses []KiroToolUse
 	var inputTokens, outputTokens int
 	var credits float64
+	var realInputTokens int
 
 	callback := &KiroStreamCallback{
 		OnText: func(text string, isThinking bool) {
@@ -1125,6 +1426,9 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
 		OnCredits: func(c float64) {
 			credits = c
 		},
+		OnContextUsage: func(pct float64) {
+			realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0)
+		},
 	}
 
 	err := CallKiroAPI(account, payload, callback)
@@ -1145,7 +1449,9 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
 		thinkingContent = ""
 	}
 
-	if inputTokens <= 0 {
+	if realInputTokens > 0 {
+		inputTokens = realInputTokens
+	} else if inputTokens <= 0 {
 		inputTokens = estimatedInputTokens
 	}
 	outputTokens = estimateClaudeOutputTokens(finalContent, thinkingContent, toolUses)
@@ -1262,6 +1568,7 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco
 	var toolCallIndex int
 	var inputTokens, outputTokens int
 	var credits float64
+	var realInputTokens int
 	var rawContentBuilder strings.Builder
 	var rawReasoningBuilder strings.Builder
 
@@ -1554,6 +1861,9 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco
 		OnCredits: func(c float64) {
 			credits = c
 		},
+		OnContextUsage: func(pct float64) {
+			realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0)
+		},
 	}
 
 	err := CallKiroAPI(account, payload, callback)
@@ -1570,7 +1880,9 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco
 		eventThinkingOpen = false
 	}
 
-	if inputTokens <= 0 {
+	if realInputTokens > 0 {
+		inputTokens = realInputTokens
+	} else if inputTokens <= 0 {
 		inputTokens = estimatedInputTokens
 	}
 	outputContent, extractedReasoning := extractThinkingFromContent(rawContentBuilder.String())
@@ -1626,6 +1938,7 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A
 	var toolUses []KiroToolUse
 	var inputTokens, outputTokens int
 	var credits float64
+	var realInputTokens int
 
 	callback := &KiroStreamCallback{
 		OnText: func(text string, isThinking bool) {
@@ -1639,6 +1952,9 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A
 		OnComplete: func(inTok, outTok int) { inputTokens = inTok; outputTokens = outTok },
 		OnError:    func(err error) { h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429")) },
 		OnCredits:  func(c float64) { credits = c },
+		OnContextUsage: func(pct float64) {
+			realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0)
+		},
 	}
 
 	err := CallKiroAPI(account, payload, callback)
@@ -1657,7 +1973,9 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A
 		reasoningContent = ""
 	}
 
-	if inputTokens <= 0 {
+	if realInputTokens > 0 {
+		inputTokens = realInputTokens
+	} else if inputTokens <= 0 {
 		inputTokens = estimatedInputTokens
 	}
 	outputTokens = estimateOpenAIOutputTokens(finalContent, reasoningContent, toolUses)
diff --git a/proxy/kiro.go b/proxy/kiro.go
index 7fcaa64..0a57bb2 100644
--- a/proxy/kiro.go
+++ b/proxy/kiro.go
@@ -136,11 +136,12 @@ type InferenceConfig struct {
 
 // KiroStreamCallback 流式响应回调
 type KiroStreamCallback struct {
-	OnText     func(text string, isThinking bool)
-	OnToolUse  func(toolUse KiroToolUse)
-	OnComplete func(inputTokens, outputTokens int)
-	OnError    func(err error)
-	OnCredits  func(credits float64)
+	OnText           func(text string, isThinking bool)
+	OnToolUse        func(toolUse KiroToolUse)
+	OnComplete       func(inputTokens, outputTokens int)
+	OnError          func(err error)
+	OnCredits        func(credits float64)
+	OnContextUsage   func(percentage float64)
 }
 
 // ==================== API 调用 ====================
@@ -306,6 +307,12 @@ func parseEventStream(body io.Reader, callback *KiroStreamCallback) error {
 			if usage, ok := event["usage"].(float64); ok {
 				totalCredits += usage
 			}
+		case "contextUsageEvent":
+			if pct, ok := event["contextUsagePercentage"].(float64); ok {
+				if callback.OnContextUsage != nil {
+					callback.OnContextUsage(pct)
+				}
+			}
 		}
 	}
 
@@ -370,6 +377,16 @@ func updateTokensFromEvent(event map[string]interface{}, currentInputTokens, cur
 	return inputTokens, outputTokens
 }
 
+// getContextWindowSize 返回模型的上下文窗口大小（token 数）
+// Kiro 托管的 Claude 模型窗口由 AWS 硬性规定，此处与官方保持一致
+func getContextWindowSize(model string) int {
+	m := strings.ToLower(model)
+	if strings.Contains(m, "4.6") || strings.Contains(m, "4-6") {
+		return 1_000_000
+	}
+	return 200_000
+}
+
 func collectUsageMaps(v interface{}, out *[]map[string]interface{}) {
 	switch t := v.(type) {
 	case map[string]interface{}:

From 0203357b34bbee306a98d380f9b24ba72d841ec7 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Mon, 11 May 2026 19:47:39 +0800
Subject: [PATCH 12/22] refactor: remove buffered stream mode, keep
 contextUsageEvent for accurate input tokens

---
 proxy/handler.go | 298 +----------------------------------------------
 proxy/kiro.go    |  11 +-
 2 files changed, 8 insertions(+), 301 deletions(-)

diff --git a/proxy/handler.go b/proxy/handler.go
index cff78fe..a7fb592 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -262,12 +262,6 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 			return
 		}
 		h.handleClaudeMessages(w, r)
-	case path == "/cc/v1/messages":
-		if !h.validateApiKey(r) {
-			h.sendClaudeError(w, 401, "authentication_error", "Invalid or missing API key")
-			return
-		}
-		h.handleClaudeMessagesBuffered(w, r)
 	case path == "/v1/messages/count_tokens" || path == "/messages/count_tokens":
 		if !h.validateApiKey(r) {
 			h.sendClaudeError(w, 401, "authentication_error", "Invalid or missing API key")
@@ -637,13 +631,9 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
 	// 转换请求
 	kiroPayload := ClaudeToKiro(&req, thinking)
 
-	// 流式或非流式；SDK 客户端（Claude Code、opencode 等）自动使用缓冲模式以获取精确 message_start
+	// Stream or non-stream
 	if req.Stream {
-		if isAnthropicSDKRequest(r) {
-			h.handleClaudeStreamBuffered(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
-		} else {
-			h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
-		}
+		h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
 	} else {
 		h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
 	}
@@ -1058,290 +1048,6 @@ func (h *Handler) sendSSE(w http.ResponseWriter, flusher http.Flusher, event str
 	flusher.Flush()
 }
 
-// isAnthropicSDKRequest 检测请求是否来自基于 Anthropic 官方 SDK 的客户端
-// (Claude Code、opencode、Roo Code 等)，这类客户端读取 message_start.input_tokens 来展示上下文用量
-func isAnthropicSDKRequest(r *http.Request) bool {
-	if r.Header.Get("x-stainless-lang") != "" {
-		return true
-	}
-	ua := strings.ToLower(r.Header.Get("User-Agent"))
-	return strings.Contains(ua, "claude") || strings.Contains(ua, "anthropic-sdk")
-}
-
-// handleClaudeMessagesBuffered Claude API 缓冲模式处理（/cc/v1/messages 及自动识别的 SDK 客户端）
-func (h *Handler) handleClaudeMessagesBuffered(w http.ResponseWriter, r *http.Request) {
-	h.handleClaudeMessagesInternalBuffered(w, r)
-}
-
-func (h *Handler) handleClaudeMessagesInternalBuffered(w http.ResponseWriter, r *http.Request) {
-	if r.Method != "POST" {
-		http.Error(w, "Method Not Allowed", 405)
-		return
-	}
-
-	body, err := io.ReadAll(r.Body)
-	if err != nil {
-		h.sendClaudeError(w, 400, "invalid_request_error", "Failed to read request body")
-		return
-	}
-
-	var req ClaudeRequest
-	if err := json.Unmarshal(body, &req); err != nil {
-		h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON: "+err.Error())
-		return
-	}
-	if msg := validateClaudeRequestShape(&req); msg != "" {
-		h.sendClaudeError(w, 400, "invalid_request_error", msg)
-		return
-	}
-
-	account := h.pool.GetNext()
-	if account == nil {
-		h.sendClaudeError(w, 503, "api_error", "No available accounts")
-		return
-	}
-
-	if err := h.ensureValidToken(account); err != nil {
-		h.sendClaudeError(w, 503, "api_error", "Token refresh failed: "+err.Error())
-		return
-	}
-
-	thinkingCfg := config.GetThinkingConfig()
-	actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix)
-	req.Model = actualModel
-	estimatedInputTokens := estimateClaudeRequestInputTokens(&req)
-	cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens)
-	cacheUsage := h.promptCache.Compute(account.ID, cacheProfile)
-
-	kiroPayload := ClaudeToKiro(&req, thinking)
-
-	if req.Stream {
-		h.handleClaudeStreamBuffered(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
-	} else {
-		h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
-	}
-}
-
-// handleClaudeStreamBuffered Claude 缓冲流式响应
-// 等待上游流完成后得到精确 input_tokens，回填 message_start 后一次性推送所有 SSE 事件
-// 等待期间每 25 秒发送 ping 事件保活
-func (h *Handler) handleClaudeStreamBuffered(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
-	w.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
-	w.Header().Set("Cache-Control", "no-cache")
-	w.Header().Set("Connection", "keep-alive")
-
-	flusher, ok := w.(http.Flusher)
-	if !ok {
-		h.sendClaudeError(w, 500, "api_error", "Streaming not supported")
-		return
-	}
-
-	// ping 保活 goroutine（25 秒间隔，防止客户端超时断开）
-	pingStop := make(chan struct{})
-	var stopOnce sync.Once
-	stopPing := func() { stopOnce.Do(func() { close(pingStop) }) }
-	defer stopPing()
-
-	go func() {
-		ticker := time.NewTicker(25 * time.Second)
-		defer ticker.Stop()
-		for {
-			select {
-			case <-ticker.C:
-				fmt.Fprintf(w, "event: ping\ndata: {}\n\n")
-				flusher.Flush()
-			case <-pingStop:
-				return
-			}
-		}
-	}()
-
-	// 缓冲阶段：收集所有内容
-	var contentBuilder strings.Builder
-	var thinkingBuilder strings.Builder
-	var toolUses []KiroToolUse
-	var inputTokens, outputTokens int
-	var credits float64
-	var realInputTokens int
-
-	callback := &KiroStreamCallback{
-		OnText: func(text string, isThinking bool) {
-			if isThinking {
-				thinkingBuilder.WriteString(text)
-			} else {
-				contentBuilder.WriteString(text)
-			}
-		},
-		OnToolUse: func(tu KiroToolUse) {
-			toolUses = append(toolUses, tu)
-		},
-		OnComplete: func(inTok, outTok int) {
-			inputTokens = inTok
-			outputTokens = outTok
-		},
-		OnError: func(err error) {
-			h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "quota"))
-		},
-		OnCredits: func(c float64) {
-			credits = c
-		},
-		OnContextUsage: func(pct float64) {
-			realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0)
-		},
-	}
-
-	err := CallKiroAPI(account, payload, callback)
-	stopPing()
-
-	if err != nil {
-		h.recordFailure()
-		h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "quota"))
-		h.sendSSE(w, flusher, "error", map[string]interface{}{
-			"type":  "error",
-			"error": map[string]string{"type": "api_error", "message": err.Error()},
-		})
-		return
-	}
-
-	// 确定精确 input_tokens
-	finalInputTokens := estimatedInputTokens
-	if realInputTokens > 0 {
-		finalInputTokens = realInputTokens
-	} else if inputTokens > 0 {
-		finalInputTokens = inputTokens
-	}
-
-	// 处理 thinking 内容
-	thinkingFormat := config.GetThinkingConfig().ClaudeFormat
-	rawContent := contentBuilder.String()
-	rawThinking := thinkingBuilder.String()
-	outputContent, extractedReasoning := extractThinkingFromContent(rawContent)
-	thinkingOutput := rawThinking
-	if thinking && thinkingOutput == "" && extractedReasoning != "" {
-		thinkingOutput = extractedReasoning
-	}
-	if !thinking {
-		thinkingOutput = ""
-	}
-	outputTokens = estimateClaudeOutputTokens(outputContent, thinkingOutput, toolUses)
-
-	h.recordSuccess(finalInputTokens, outputTokens, credits)
-	h.pool.RecordSuccess(account.ID)
-	h.pool.UpdateStats(account.ID, finalInputTokens+outputTokens, credits)
-	h.promptCache.Update(account.ID, cacheProfile)
-
-	msgID := "msg_" + uuid.New().String()
-	contentIndex := 0
-
-	// 推送阶段：message_start 携带精确 input_tokens
-	h.sendSSE(w, flusher, "message_start", map[string]interface{}{
-		"type": "message_start",
-		"message": map[string]interface{}{
-			"id":            msgID,
-			"type":          "message",
-			"role":          "assistant",
-			"content":       []interface{}{},
-			"model":         model,
-			"stop_reason":   nil,
-			"stop_sequence": nil,
-			"usage":         buildClaudeUsageMap(finalInputTokens, 0, cacheUsage, cacheProfile != nil),
-		},
-	})
-	h.sendSSE(w, flusher, "ping", map[string]interface{}{"type": "ping"})
-
-	// 推送 thinking 块
-	if thinking && thinkingOutput != "" {
-		switch thinkingFormat {
-		case "think":
-			h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
-				"type": "content_block_start", "index": contentIndex,
-				"content_block": map[string]string{"type": "text", "text": ""},
-			})
-			h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
-				"type": "content_block_delta", "index": contentIndex,
-				"delta": map[string]string{"type": "text_delta", "text": "<think>" + thinkingOutput + "</think>"},
-			})
-			h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
-				"type": "content_block_stop", "index": contentIndex,
-			})
-			contentIndex++
-		case "reasoning_content":
-			h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
-				"type": "content_block_start", "index": contentIndex,
-				"content_block": map[string]string{"type": "text", "text": ""},
-			})
-			h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
-				"type": "content_block_delta", "index": contentIndex,
-				"delta": map[string]string{"type": "text_delta", "text": thinkingOutput},
-			})
-			h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
-				"type": "content_block_stop", "index": contentIndex,
-			})
-			contentIndex++
-		default: // native thinking block
-			h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
-				"type": "content_block_start", "index": contentIndex,
-				"content_block": map[string]string{"type": "thinking", "thinking": ""},
-			})
-			h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
-				"type": "content_block_delta", "index": contentIndex,
-				"delta": map[string]string{"type": "thinking_delta", "thinking": thinkingOutput},
-			})
-			h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
-				"type": "content_block_stop", "index": contentIndex,
-			})
-			contentIndex++
-		}
-	}
-
-	// 推送文本块
-	if outputContent != "" {
-		h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
-			"type": "content_block_start", "index": contentIndex,
-			"content_block": map[string]string{"type": "text", "text": ""},
-		})
-		h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
-			"type": "content_block_delta", "index": contentIndex,
-			"delta": map[string]string{"type": "text_delta", "text": outputContent},
-		})
-		h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
-			"type": "content_block_stop", "index": contentIndex,
-		})
-		contentIndex++
-	}
-
-	// 推送工具调用块
-	for _, tu := range toolUses {
-		inputJSON, _ := json.Marshal(tu.Input)
-		h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
-			"type": "content_block_start", "index": contentIndex,
-			"content_block": map[string]interface{}{
-				"type": "tool_use", "id": tu.ToolUseID, "name": tu.Name, "input": map[string]interface{}{},
-			},
-		})
-		h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
-			"type": "content_block_delta", "index": contentIndex,
-			"delta": map[string]interface{}{"type": "input_json_delta", "partial_json": string(inputJSON)},
-		})
-		h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{
-			"type": "content_block_stop", "index": contentIndex,
-		})
-		contentIndex++
-	}
-
-	stopReason := "end_turn"
-	if len(toolUses) > 0 {
-		stopReason = "tool_use"
-	}
-
-	h.sendSSE(w, flusher, "message_delta", map[string]interface{}{
-		"type":  "message_delta",
-		"delta": map[string]interface{}{"stop_reason": stopReason},
-		"usage": buildClaudeUsageMap(finalInputTokens, outputTokens, cacheUsage, cacheProfile != nil),
-	})
-	h.sendSSE(w, flusher, "message_stop", map[string]interface{}{"type": "message_stop"})
-}
-
 // backgroundStatsSaver 后台定时保存统计数据
 func (h *Handler) backgroundStatsSaver() {
 	ticker := time.NewTicker(30 * time.Second)
diff --git a/proxy/kiro.go b/proxy/kiro.go
index 0a57bb2..bc8233d 100644
--- a/proxy/kiro.go
+++ b/proxy/kiro.go
@@ -132,9 +132,9 @@ type InferenceConfig struct {
 	TopP        float64 `json:"topP,omitempty"`
 }
 
-// ==================== 流式回调 ====================
+// ==================== Stream Callbacks ====================
 
-// KiroStreamCallback 流式响应回调
+// KiroStreamCallback stream response callbacks
 type KiroStreamCallback struct {
 	OnText           func(text string, isThinking bool)
 	OnToolUse        func(toolUse KiroToolUse)
@@ -377,11 +377,12 @@ func updateTokensFromEvent(event map[string]interface{}, currentInputTokens, cur
 	return inputTokens, outputTokens
 }
 
-// getContextWindowSize 返回模型的上下文窗口大小（token 数）
-// Kiro 托管的 Claude 模型窗口由 AWS 硬性规定，此处与官方保持一致
+// getContextWindowSize returns the context window size (in tokens) for a model.
 func getContextWindowSize(model string) int {
 	m := strings.ToLower(model)
-	if strings.Contains(m, "4.6") || strings.Contains(m, "4-6") {
+	// sonnet-4.6, opus-4.6, opus-4.7 all have 1M context windows
+	if strings.Contains(m, "4.6") || strings.Contains(m, "4-6") ||
+		strings.Contains(m, "4.7") || strings.Contains(m, "4-7") {
 		return 1_000_000
 	}
 	return 200_000

From 221348b975bde927baa13344518d9da1276b40fe Mon Sep 17 00:00:00 2001
From: Henry Yang <83214045+HenryXiaoYang@users.noreply.github.com>
Date: Mon, 11 May 2026 21:01:54 +0800
Subject: [PATCH 13/22] fix: support Claude thinking config routing (#40)

---
 README.md                |   2 +-
 README_CN.md             |   2 +-
 proxy/handler.go         | 143 +++++++++++++++++++----
 proxy/handler_test.go    | 238 ++++++++++++++++++++++++++++++++++++++-
 proxy/translator.go      | 132 +++++++++++++++++++---
 proxy/translator_test.go |  17 +++
 6 files changed, 492 insertions(+), 42 deletions(-)

diff --git a/README.md b/README.md
index 49a1263..d0dcf4b 100644
--- a/README.md
+++ b/README.md
@@ -72,7 +72,7 @@ curl http://localhost:8080/v1/chat/completions \
 
 ## Thinking Mode
 
-Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Configure output format in the admin panel under Settings - Thinking Mode.
+Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Claude-compatible requests that include a top-level `thinking` config such as `{"type":"enabled","budget_tokens":2048}` or `{"type":"adaptive"}` also enable thinking mode automatically. Configure output format in the admin panel under Settings - Thinking Mode.
 
 ## Environment Variables
 
diff --git a/README_CN.md b/README_CN.md
index b6b79d2..542b2f3 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -72,7 +72,7 @@ curl http://localhost:8080/v1/chat/completions \
 
 ## 思考模式
 
-在模型名后加后缀（默认 `-thinking`）即可启用，例如 `claude-sonnet-4.5-thinking`。输出格式可在管理面板「设置 - Thinking 模式」中配置。
+在模型名后加后缀（默认 `-thinking`）即可启用，例如 `claude-sonnet-4.5-thinking`。Claude 兼容请求如果带有顶层 `thinking` 配置，例如 `{"type":"enabled","budget_tokens":2048}` 或 `{"type":"adaptive"}`，也会自动启用 thinking 模式。输出格式可在管理面板「设置 - Thinking 模式」中配置。
 
 ## 环境变量
 
diff --git a/proxy/handler.go b/proxy/handler.go
index a7fb592..2c42be9 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -66,6 +66,9 @@ func validateClaudeRequestShape(req *ClaudeRequest) string {
 	if len(req.Messages) == 0 {
 		return "messages must not be empty"
 	}
+	if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" {
+		return msg
+	}
 
 	hasUserContext := false
 	lastRole := ""
@@ -94,6 +97,75 @@ func validateClaudeRequestShape(req *ClaudeRequest) string {
 	return ""
 }
 
+func validateClaudeThinkingConfig(thinking *ClaudeThinkingConfig, maxTokens int) string {
+	if thinking == nil {
+		return ""
+	}
+
+	kind := strings.ToLower(strings.TrimSpace(thinking.Type))
+	switch kind {
+	case "enabled":
+		if maxTokens == 0 {
+			return "thinking.type enabled cannot be used with max_tokens=0"
+		}
+		if thinking.BudgetTokens <= 0 {
+			return "thinking.budget_tokens is required when thinking.type is enabled"
+		}
+		if thinking.BudgetTokens < 1024 {
+			return "thinking.budget_tokens must be at least 1024"
+		}
+		if maxTokens > 0 && thinking.BudgetTokens >= maxTokens {
+			return "thinking.budget_tokens must be less than max_tokens"
+		}
+	case "adaptive":
+		if thinking.BudgetTokens != 0 {
+			return "thinking.budget_tokens is not supported when thinking.type is adaptive"
+		}
+	case "disabled":
+		if thinking.BudgetTokens != 0 {
+			return "thinking.budget_tokens is not supported when thinking.type is disabled"
+		}
+	default:
+		return "thinking.type must be one of: enabled, adaptive, disabled"
+	}
+
+	display := strings.ToLower(strings.TrimSpace(thinking.Display))
+	if display != "" && display != "summarized" && display != "omitted" {
+		return "thinking.display must be one of: summarized, omitted"
+	}
+	if kind == "disabled" && display != "" {
+		return "thinking.display is not supported when thinking.type is disabled"
+	}
+
+	return ""
+}
+
+type claudeThinkingResponseOptions struct {
+	Format      string
+	OmitDisplay bool
+}
+
+func resolveClaudeThinkingResponseOptions(thinking *ClaudeThinkingConfig, defaultFormat string) claudeThinkingResponseOptions {
+	opts := claudeThinkingResponseOptions{Format: defaultFormat}
+	if opts.Format == "" {
+		opts.Format = "thinking"
+	}
+	if thinking == nil {
+		return opts
+	}
+
+	display := strings.ToLower(strings.TrimSpace(thinking.Display))
+	switch display {
+	case "summarized":
+		opts.Format = "thinking"
+	case "omitted":
+		opts.Format = "thinking"
+		opts.OmitDisplay = true
+	}
+
+	return opts
+}
+
 func validateOpenAIRequestShape(req *OpenAIRequest) string {
 	if len(req.Messages) == 0 {
 		return "messages must not be empty"
@@ -569,8 +641,17 @@ func (h *Handler) handleCountTokens(w http.ResponseWriter, r *http.Request) {
 		h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON")
 		return
 	}
+	if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" {
+		h.sendClaudeError(w, 400, "invalid_request_error", msg)
+		return
+	}
 
-	estimatedTokens := estimateClaudeRequestInputTokens(&req)
+	thinkingCfg := config.GetThinkingConfig()
+	actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix)
+	req.Model = actualModel
+	effectiveReq := cloneClaudeRequestForThinking(&req, thinking)
+
+	estimatedTokens := estimateClaudeRequestInputTokens(effectiveReq)
 	if estimatedTokens < 1 {
 		estimatedTokens = 1
 	}
@@ -622,10 +703,12 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
 
 	// 解析模型和 thinking 模式
 	thinkingCfg := config.GetThinkingConfig()
-	actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix)
+	actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix)
 	req.Model = actualModel
-	estimatedInputTokens := estimateClaudeRequestInputTokens(&req)
-	cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens)
+	effectiveReq := cloneClaudeRequestForThinking(&req, thinking)
+	thinkingResponseOpts := resolveClaudeThinkingResponseOptions(req.Thinking, thinkingCfg.ClaudeFormat)
+	estimatedInputTokens := estimateClaudeRequestInputTokens(effectiveReq)
+	cacheProfile := h.promptCache.BuildClaudeProfile(effectiveReq, estimatedInputTokens)
 	cacheUsage := h.promptCache.Compute(account.ID, cacheProfile)
 
 	// 转换请求
@@ -633,14 +716,14 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
 
 	// Stream or non-stream
 	if req.Stream {
-		h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
+		h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile)
 	} else {
-		h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
+		h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile)
 	}
 }
 
 // handleClaudeStream Claude 流式响应
-func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
+func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
 	w.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
 	w.Header().Set("Cache-Control", "no-cache")
 	w.Header().Set("Connection", "keep-alive")
@@ -652,7 +735,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 	}
 
 	// 获取 thinking 输出格式配置
-	thinkingFormat := config.GetThinkingConfig().ClaudeFormat
+	thinkingFormat := thinkingOpts.Format
 
 	msgID := "msg_" + uuid.New().String()
 	var inputTokens, outputTokens int
@@ -769,6 +852,19 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 				"delta": map[string]string{"type": "text_delta", "text": text},
 			})
 		default:
+			if thinkingOpts.OmitDisplay {
+				if thinkingState == 1 {
+					startContentBlock("thinking")
+					return
+				}
+				if thinkingState == 3 {
+					if activeBlockType != "thinking" {
+						startContentBlock("thinking")
+					}
+					closeActiveBlock()
+				}
+				return
+			}
 			if thinkingState == 3 && text == "" {
 				if activeBlockType == "thinking" {
 					closeActiveBlock()
@@ -1103,7 +1199,7 @@ func (h *Handler) recordFailure() {
 }
 
 // handleClaudeNonStream Claude 非流式响应
-func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
+func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
 	var content string
 	var thinkingContent string
 	var toolUses []KiroToolUse
@@ -1146,13 +1242,14 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
 	}
 
 	// 合并 thinking 内容（如果有 reasoningContentEvent 的内容）
-	thinkingFormat := config.GetThinkingConfig().ClaudeFormat
+	thinkingFormat := thinkingOpts.Format
 	finalContent, extractedReasoning := extractThinkingFromContent(content)
-	if thinking && thinkingContent == "" && extractedReasoning != "" {
-		thinkingContent = extractedReasoning
+	rawThinkingContent := thinkingContent
+	if thinking && rawThinkingContent == "" && extractedReasoning != "" {
+		rawThinkingContent = extractedReasoning
 	}
 	if !thinking {
-		thinkingContent = ""
+		rawThinkingContent = ""
 	}
 
 	if realInputTokens > 0 {
@@ -1160,26 +1257,32 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
 	} else if inputTokens <= 0 {
 		inputTokens = estimatedInputTokens
 	}
-	outputTokens = estimateClaudeOutputTokens(finalContent, thinkingContent, toolUses)
+	outputTokens = estimateClaudeOutputTokens(finalContent, rawThinkingContent, toolUses)
 
 	h.recordSuccess(inputTokens, outputTokens, credits)
 	h.pool.RecordSuccess(account.ID)
 	h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
 	h.promptCache.Update(account.ID, cacheProfile)
 
-	if thinking && thinkingContent != "" {
+	responseThinkingContent := rawThinkingContent
+	includeEmptyThinkingBlock := thinking && thinkingOpts.OmitDisplay && rawThinkingContent != ""
+	if includeEmptyThinkingBlock {
+		responseThinkingContent = ""
+	}
+
+	if thinking && responseThinkingContent != "" {
 		switch thinkingFormat {
 		case "think":
-			finalContent = "<think>" + thinkingContent + "</think>" + finalContent
-			thinkingContent = ""
+			finalContent = "<think>" + responseThinkingContent + "</think>" + finalContent
+			responseThinkingContent = ""
 		case "reasoning_content":
-			finalContent = thinkingContent + finalContent // Claude 格式不支持 reasoning_content，直接拼接
-			thinkingContent = ""
+			finalContent = responseThinkingContent + finalContent // Claude 格式不支持 reasoning_content，直接拼接
+			responseThinkingContent = ""
 		default:
 		}
 	}
 
-	resp := KiroToClaudeResponse(finalContent, thinkingContent, toolUses, inputTokens, outputTokens, model)
+	resp := KiroToClaudeResponse(finalContent, responseThinkingContent, includeEmptyThinkingBlock, toolUses, inputTokens, outputTokens, model)
 	resp.Usage.InputTokens = billedClaudeInputTokens(inputTokens, cacheUsage)
 	resp.Usage.CacheCreationInputTokens = cacheUsage.CacheCreationInputTokens
 	resp.Usage.CacheReadInputTokens = cacheUsage.CacheReadInputTokens
diff --git a/proxy/handler_test.go b/proxy/handler_test.go
index 672092a..e905bf1 100644
--- a/proxy/handler_test.go
+++ b/proxy/handler_test.go
@@ -1,8 +1,6 @@
 package proxy
 
-import (
-	"testing"
-)
+import "testing"
 
 func TestThinkingSourceReasoningFirst(t *testing.T) {
 	var source thinkingStreamSource
@@ -101,6 +99,240 @@ func TestValidateClaudeRequestShapeRejectsAssistantPrefill(t *testing.T) {
 	}
 }
 
+func TestResolveClaudeThinkingModeHonorsRequestThinking(t *testing.T) {
+	tests := []struct {
+		name         string
+		model        string
+		thinking     *ClaudeThinkingConfig
+		wantModel    string
+		wantThinking bool
+	}{
+		{
+			name:         "adaptive request enables thinking",
+			model:        "claude-sonnet-4.6",
+			thinking:     &ClaudeThinkingConfig{Type: "adaptive"},
+			wantModel:    "claude-sonnet-4.6",
+			wantThinking: true,
+		},
+		{
+			name:         "enabled request enables thinking",
+			model:        "claude-opus-4.5",
+			thinking:     &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 2048},
+			wantModel:    "claude-opus-4.5",
+			wantThinking: true,
+		},
+		{
+			name:         "disabled request keeps thinking off",
+			model:        "claude-opus-4.7",
+			thinking:     &ClaudeThinkingConfig{Type: "disabled"},
+			wantModel:    "claude-opus-4.7",
+			wantThinking: false,
+		},
+		{
+			name:         "suffix remains supported when thinking is disabled",
+			model:        "claude-sonnet-4.5-thinking",
+			thinking:     &ClaudeThinkingConfig{Type: "disabled"},
+			wantModel:    "claude-sonnet-4.5",
+			wantThinking: true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			gotModel, gotThinking := resolveClaudeThinkingMode(tc.model, tc.thinking, "-thinking")
+			if gotModel != tc.wantModel {
+				t.Fatalf("expected model %q, got %q", tc.wantModel, gotModel)
+			}
+			if gotThinking != tc.wantThinking {
+				t.Fatalf("expected thinking=%v, got %v", tc.wantThinking, gotThinking)
+			}
+		})
+	}
+}
+
+func TestCloneClaudeRequestForThinkingInjectsPromptWithoutMutatingOriginal(t *testing.T) {
+	req := &ClaudeRequest{
+		Model:  "claude-sonnet-4.6",
+		System: "Follow the user instructions.",
+	}
+
+	cloned := cloneClaudeRequestForThinking(req, true)
+	blocks, ok := cloned.System.([]interface{})
+	if !ok {
+		t.Fatalf("expected cloned system prompt to be structured blocks, got %T", cloned.System)
+	}
+	if len(blocks) != 2 {
+		t.Fatalf("expected 2 system blocks after prepend, got %d", len(blocks))
+	}
+	gotPrompt := extractSystemPrompt(cloned.System)
+	expected := ThinkingModePrompt + "\n\nFollow the user instructions."
+	if gotPrompt != expected {
+		t.Fatalf("expected injected system prompt %q, got %q", expected, gotPrompt)
+	}
+	if original, ok := req.System.(string); !ok || original != "Follow the user instructions." {
+		t.Fatalf("expected original request system prompt to stay unchanged, got %#v", req.System)
+	}
+}
+
+func TestCloneClaudeRequestForThinkingPreservesStructuredSystemBlocks(t *testing.T) {
+	req := &ClaudeRequest{
+		Model: "claude-sonnet-4.6",
+		System: []interface{}{
+			map[string]interface{}{
+				"type": "text",
+				"text": "cached system",
+				"cache_control": map[string]interface{}{
+					"type": "ephemeral",
+					"ttl":  "5m",
+				},
+			},
+		},
+	}
+
+	cloned := cloneClaudeRequestForThinking(req, true)
+	blocks, ok := cloned.System.([]interface{})
+	if !ok {
+		t.Fatalf("expected structured system blocks, got %T", cloned.System)
+	}
+	if len(blocks) != 2 {
+		t.Fatalf("expected 2 system blocks after prepend, got %d", len(blocks))
+	}
+	first, ok := blocks[0].(map[string]interface{})
+	if !ok || first["text"] != ThinkingModePrompt+"\n" {
+		t.Fatalf("expected first block to be thinking prompt, got %#v", blocks[0])
+	}
+	second, ok := blocks[1].(map[string]interface{})
+	if !ok {
+		t.Fatalf("expected original system block to remain a map, got %T", blocks[1])
+	}
+	cacheControl, ok := second["cache_control"].(map[string]interface{})
+	if !ok || cacheControl["type"] != "ephemeral" {
+		t.Fatalf("expected original cache_control to be preserved, got %#v", second["cache_control"])
+	}
+}
+
+func TestThinkingPromptAffectsClaudeTokenEstimate(t *testing.T) {
+	req := &ClaudeRequest{
+		Model:    "claude-sonnet-4.6",
+		Messages: []ClaudeMessage{{Role: "user", Content: "hello"}},
+	}
+
+	baseTokens := estimateClaudeRequestInputTokens(req)
+	thinkingTokens := estimateClaudeRequestInputTokens(cloneClaudeRequestForThinking(req, true))
+
+	if thinkingTokens <= baseTokens {
+		t.Fatalf("expected thinking tokens (%d) to exceed base tokens (%d)", thinkingTokens, baseTokens)
+	}
+}
+
+func TestValidateClaudeThinkingConfig(t *testing.T) {
+	tests := []struct {
+		name        string
+		thinking    *ClaudeThinkingConfig
+		maxTokens   int
+		expectError bool
+	}{
+		{
+			name:        "adaptive is valid",
+			thinking:    &ClaudeThinkingConfig{Type: "adaptive"},
+			maxTokens:   4096,
+			expectError: false,
+		},
+		{
+			name:        "enabled requires budget",
+			thinking:    &ClaudeThinkingConfig{Type: "enabled"},
+			maxTokens:   4096,
+			expectError: true,
+		},
+		{
+			name:        "enabled requires at least 1024 budget tokens",
+			thinking:    &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 512},
+			maxTokens:   4096,
+			expectError: true,
+		},
+		{
+			name:        "enabled rejects max tokens zero",
+			thinking:    &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 2048},
+			maxTokens:   0,
+			expectError: true,
+		},
+		{
+			name:        "enabled budget must stay below max tokens",
+			thinking:    &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 4096},
+			maxTokens:   4096,
+			expectError: true,
+		},
+		{
+			name:        "disabled rejects display",
+			thinking:    &ClaudeThinkingConfig{Type: "disabled", Display: "summarized"},
+			maxTokens:   4096,
+			expectError: true,
+		},
+		{
+			name:        "missing type is rejected",
+			thinking:    &ClaudeThinkingConfig{},
+			maxTokens:   4096,
+			expectError: true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			errMsg := validateClaudeThinkingConfig(tc.thinking, tc.maxTokens)
+			if tc.expectError && errMsg == "" {
+				t.Fatalf("expected validation error")
+			}
+			if !tc.expectError && errMsg != "" {
+				t.Fatalf("expected thinking config to be valid, got %q", errMsg)
+			}
+		})
+	}
+}
+
+func TestResolveClaudeThinkingResponseOptions(t *testing.T) {
+	tests := []struct {
+		name       string
+		thinking   *ClaudeThinkingConfig
+		defaultFmt string
+		wantFmt    string
+		wantOmit   bool
+	}{
+		{
+			name:       "default config is preserved when display unset",
+			thinking:   &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 2048},
+			defaultFmt: "think",
+			wantFmt:    "think",
+			wantOmit:   false,
+		},
+		{
+			name:       "summarized forces official thinking blocks",
+			thinking:   &ClaudeThinkingConfig{Type: "adaptive", Display: "summarized"},
+			defaultFmt: "reasoning_content",
+			wantFmt:    "thinking",
+			wantOmit:   false,
+		},
+		{
+			name:       "omitted forces official thinking blocks and hides content",
+			thinking:   &ClaudeThinkingConfig{Type: "adaptive", Display: "omitted"},
+			defaultFmt: "think",
+			wantFmt:    "thinking",
+			wantOmit:   true,
+		},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			opts := resolveClaudeThinkingResponseOptions(tc.thinking, tc.defaultFmt)
+			if opts.Format != tc.wantFmt {
+				t.Fatalf("expected format %q, got %q", tc.wantFmt, opts.Format)
+			}
+			if opts.OmitDisplay != tc.wantOmit {
+				t.Fatalf("expected omitDisplay=%v, got %v", tc.wantOmit, opts.OmitDisplay)
+			}
+		})
+	}
+}
+
 func TestMergeUniqueModelsPreservesUnionAcrossAccounts(t *testing.T) {
 	base := []ModelInfo{
 		{ModelId: "claude-sonnet-4.5", InputTypes: []string{"TEXT"}},
diff --git a/proxy/translator.go b/proxy/translator.go
index 500b74e..38b562e 100644
--- a/proxy/translator.go
+++ b/proxy/translator.go
@@ -76,6 +76,19 @@ func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) {
 	return model, thinking
 }
 
+func resolveClaudeThinkingMode(model string, thinkingCfg *ClaudeThinkingConfig, thinkingSuffix string) (string, bool) {
+	actualModel, suffixThinking := ParseModelAndThinking(model, thinkingSuffix)
+	return actualModel, suffixThinking || isClaudeThinkingRequested(thinkingCfg)
+}
+
+func isClaudeThinkingRequested(thinkingCfg *ClaudeThinkingConfig) bool {
+	if thinkingCfg == nil {
+		return false
+	}
+	kind := strings.ToLower(strings.TrimSpace(thinkingCfg.Type))
+	return kind == "enabled" || kind == "adaptive"
+}
+
 func MapModel(model string) string {
 	mapped, _ := ParseModelAndThinking(model, "-thinking")
 	return mapped
@@ -84,15 +97,22 @@ func MapModel(model string) string {
 // ==================== Claude API 类型 ====================
 
 type ClaudeRequest struct {
-	Model       string          `json:"model"`
-	Messages    []ClaudeMessage `json:"messages"`
-	MaxTokens   int             `json:"max_tokens"`
-	Temperature float64         `json:"temperature,omitempty"`
-	TopP        float64         `json:"top_p,omitempty"`
-	Stream      bool            `json:"stream,omitempty"`
-	System      interface{}     `json:"system,omitempty"` // string or []SystemBlock
-	Tools       []ClaudeTool    `json:"tools,omitempty"`
-	ToolChoice  interface{}     `json:"tool_choice,omitempty"`
+	Model       string                `json:"model"`
+	Messages    []ClaudeMessage       `json:"messages"`
+	MaxTokens   int                   `json:"max_tokens"`
+	Temperature float64               `json:"temperature,omitempty"`
+	TopP        float64               `json:"top_p,omitempty"`
+	Stream      bool                  `json:"stream,omitempty"`
+	System      interface{}           `json:"system,omitempty"` // string or []SystemBlock
+	Thinking    *ClaudeThinkingConfig `json:"thinking,omitempty"`
+	Tools       []ClaudeTool          `json:"tools,omitempty"`
+	ToolChoice  interface{}           `json:"tool_choice,omitempty"`
+}
+
+type ClaudeThinkingConfig struct {
+	Type         string `json:"type,omitempty"`
+	BudgetTokens int    `json:"budget_tokens,omitempty"`
+	Display      string `json:"display,omitempty"`
 }
 
 type ClaudeMessage struct {
@@ -104,6 +124,7 @@ type ClaudeContentBlock struct {
 	Type      string       `json:"type"`
 	Text      string       `json:"text,omitempty"`
 	Thinking  string       `json:"thinking,omitempty"`
+	Signature string       `json:"signature,omitempty"`
 	ID        string       `json:"id,omitempty"`
 	Name      string       `json:"name,omitempty"`
 	Input     interface{}  `json:"input,omitempty"`
@@ -157,12 +178,7 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 	origin := "AI_EDITOR"
 
 	// 提取系统提示
-	systemPrompt := extractSystemPrompt(req.System)
-
-	// 如果启用 thinking 模式，注入 thinking 提示
-	if thinking {
-		systemPrompt = ThinkingModePrompt + "\n\n" + systemPrompt
-	}
+	systemPrompt := buildClaudeSystemPrompt(req.System, thinking)
 
 	// 构建历史消息
 	history := make([]KiroHistoryMessage, 0)
@@ -263,6 +279,88 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 	return payload
 }
 
+func buildClaudeSystemPrompt(system interface{}, thinking bool) string {
+	systemPrompt := extractSystemPrompt(system)
+	if !thinking {
+		return systemPrompt
+	}
+	if systemPrompt == "" {
+		return ThinkingModePrompt
+	}
+	return ThinkingModePrompt + "\n\n" + systemPrompt
+}
+
+func cloneClaudeRequestForThinking(req *ClaudeRequest, thinking bool) *ClaudeRequest {
+	if req == nil {
+		return nil
+	}
+
+	cloned := *req
+	if thinking {
+		cloned.System = prependThinkingSystem(req.System)
+	}
+	return &cloned
+}
+
+func prependThinkingSystem(system interface{}) interface{} {
+	thinkingText := ThinkingModePrompt
+	if hasClaudeSystemContent(system) {
+		thinkingText += "\n"
+	}
+	thinkingBlock := map[string]interface{}{
+		"type": "text",
+		"text": thinkingText,
+	}
+
+	switch v := system.(type) {
+	case nil:
+		return []interface{}{thinkingBlock}
+	case string:
+		if v == "" {
+			return []interface{}{thinkingBlock}
+		}
+		return []interface{}{
+			thinkingBlock,
+			map[string]interface{}{
+				"type": "text",
+				"text": v,
+			},
+		}
+	case []interface{}:
+		blocks := make([]interface{}, 0, len(v)+1)
+		blocks = append(blocks, thinkingBlock)
+		blocks = append(blocks, v...)
+		return blocks
+	case []string:
+		blocks := make([]interface{}, 0, len(v)+1)
+		blocks = append(blocks, thinkingBlock)
+		for _, block := range v {
+			blocks = append(blocks, map[string]interface{}{
+				"type": "text",
+				"text": block,
+			})
+		}
+		return blocks
+	default:
+		return []interface{}{thinkingBlock}
+	}
+}
+
+func hasClaudeSystemContent(system interface{}) bool {
+	switch v := system.(type) {
+	case nil:
+		return false
+	case string:
+		return v != ""
+	case []interface{}:
+		return len(v) > 0
+	case []string:
+		return len(v) > 0
+	default:
+		return true
+	}
+}
+
 func extractSystemPrompt(system interface{}) string {
 	if system == nil {
 		return ""
@@ -459,10 +557,10 @@ func shortenToolName(name string) string {
 
 // ==================== Kiro -> Claude 转换 ====================
 
-func KiroToClaudeResponse(content, thinkingContent string, toolUses []KiroToolUse, inputTokens, outputTokens int, model string) *ClaudeResponse {
+func KiroToClaudeResponse(content, thinkingContent string, includeEmptyThinkingBlock bool, toolUses []KiroToolUse, inputTokens, outputTokens int, model string) *ClaudeResponse {
 	blocks := make([]ClaudeContentBlock, 0)
 
-	if thinkingContent != "" {
+	if thinkingContent != "" || includeEmptyThinkingBlock {
 		blocks = append(blocks, ClaudeContentBlock{
 			Type:     "thinking",
 			Thinking: thinkingContent,
diff --git a/proxy/translator_test.go b/proxy/translator_test.go
index 7c5dc43..e0f276f 100644
--- a/proxy/translator_test.go
+++ b/proxy/translator_test.go
@@ -233,6 +233,23 @@ func TestClaudeToKiroDropsLeadingAssistantHistory(t *testing.T) {
 	}
 }
 
+func TestKiroToClaudeResponseCanEmitEmptyThinkingBlock(t *testing.T) {
+	resp := KiroToClaudeResponse("final answer", "", true, nil, 10, 20, "claude-sonnet-4.6")
+
+	if len(resp.Content) != 2 {
+		t.Fatalf("expected empty thinking block plus text block, got %d blocks", len(resp.Content))
+	}
+	if resp.Content[0].Type != "thinking" {
+		t.Fatalf("expected first block to be thinking, got %#v", resp.Content[0])
+	}
+	if resp.Content[0].Thinking != "" {
+		t.Fatalf("expected omitted thinking block to have empty content, got %#v", resp.Content[0].Thinking)
+	}
+	if resp.Content[1].Type != "text" || resp.Content[1].Text != "final answer" {
+		t.Fatalf("expected text block to be preserved, got %#v", resp.Content[1])
+	}
+}
+
 func TestToolResultsContinuationIncludesInstructionPrefix(t *testing.T) {
 	req := &OpenAIRequest{
 		Model: "claude-sonnet-4.5",

From 404e2425fa6a660520187d86762a1cfb86f03dfd Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Mon, 11 May 2026 21:40:45 +0800
Subject: [PATCH 14/22] feat: add outbound proxy support (socks5/http) for
 restricted networks

---
 auth/builderid.go   |  4 ++--
 auth/http_client.go | 50 +++++++++++++++++++++++++++++---------
 auth/iam_sso.go     |  4 ++--
 auth/oidc.go        |  4 ++--
 auth/sso_token.go   | 16 ++++++-------
 config/config.go    | 21 ++++++++++++++++
 proxy/handler.go    | 55 ++++++++++++++++++++++++++++++++++++++++++
 proxy/kiro.go       | 58 ++++++++++++++++++++++++++++++++-------------
 web/index.html      | 35 +++++++++++++++++++++++++++
 9 files changed, 205 insertions(+), 42 deletions(-)

diff --git a/auth/builderid.go b/auth/builderid.go
index 460ad6b..21a74d9 100644
--- a/auth/builderid.go
+++ b/auth/builderid.go
@@ -57,7 +57,7 @@ func StartBuilderIdLogin(region string) (*BuilderIdSession, error) {
 	regReq, _ := http.NewRequest("POST", oidcBase+"/client/register", bytes.NewReader(regBody))
 	regReq.Header.Set("Content-Type", "application/json")
 
-	client := httpClient
+	client := httpClient()
 	regResp, err := client.Do(regReq)
 	if err != nil {
 		return nil, fmt.Errorf("register client failed: %v", err)
@@ -175,7 +175,7 @@ func PollBuilderIdAuth(sessionID string) (accessToken, refreshToken, clientID, c
 	tokenReq, _ := http.NewRequest("POST", oidcBase+"/token", bytes.NewReader(tokenBody))
 	tokenReq.Header.Set("Content-Type", "application/json")
 
-	client := httpClient
+	client := httpClient()
 	tokenResp, err := client.Do(tokenReq)
 	if err != nil {
 		return "", "", "", "", "", 0, "", fmt.Errorf("token request failed: %v", err)
diff --git a/auth/http_client.go b/auth/http_client.go
index 836fb7c..fa5443e 100644
--- a/auth/http_client.go
+++ b/auth/http_client.go
@@ -3,18 +3,46 @@ package auth
 
 import (
 	"net/http"
+	"net/url"
+	"sync/atomic"
 	"time"
 )
 
-// 全局 HTTP 客户端，复用连接池
-// 用于所有 auth 模块的 HTTP 请求
-var httpClient = &http.Client{
-	Timeout: 30 * time.Second,
-	Transport: &http.Transport{
-		MaxIdleConns:        50,               // 最大空闲连接数
-		MaxIdleConnsPerHost: 10,               // 每个 Host 最大空闲连接数
-		IdleConnTimeout:     90 * time.Second, // 空闲连接超时
-		DisableCompression:  false,            // 启用压缩
-		ForceAttemptHTTP2:   true,             // 尝试使用 HTTP/2
-	},
+// 全局 HTTP 客户端存储，支持运行时代理重配置
+var httpClientStore atomic.Pointer[http.Client]
+
+// httpClient 返回当前全局 auth HTTP 客户端
+func httpClient() *http.Client {
+	return httpClientStore.Load()
+}
+
+func init() {
+	InitHttpClient("")
+}
+
+// buildAuthTransport 构建带可选代理的 Transport
+func buildAuthTransport(proxyURL string) *http.Transport {
+	t := &http.Transport{
+		MaxIdleConns:        50,
+		MaxIdleConnsPerHost: 10,
+		IdleConnTimeout:     90 * time.Second,
+		DisableCompression:  false,
+		ForceAttemptHTTP2:   true,
+	}
+	if proxyURL != "" {
+		if u, err := url.Parse(proxyURL); err == nil {
+			t.Proxy = http.ProxyURL(u)
+			t.ForceAttemptHTTP2 = false
+		}
+	}
+	return t
+}
+
+// InitHttpClient 初始化（或重新初始化）auth 模块的全局 HTTP 客户端
+func InitHttpClient(proxyURL string) {
+	client := &http.Client{
+		Timeout:   30 * time.Second,
+		Transport: buildAuthTransport(proxyURL),
+	}
+	httpClientStore.Store(client)
 }
diff --git a/auth/iam_sso.go b/auth/iam_sso.go
index e17e4eb..bfd4a4a 100644
--- a/auth/iam_sso.go
+++ b/auth/iam_sso.go
@@ -170,7 +170,7 @@ func registerOIDCClient(oidcBase, startUrl, redirectUri string) (clientID, clien
 	req, _ := http.NewRequest("POST", oidcBase+"/client/register", bytes.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 
-	resp, err := httpClient.Do(req)
+	resp, err := httpClient().Do(req)
 	if err != nil {
 		return "", "", err
 	}
@@ -207,7 +207,7 @@ func exchangeToken(oidcBase, clientID, clientSecret, code, codeVerifier, redirec
 	req, _ := http.NewRequest("POST", oidcBase+"/token", bytes.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 
-	resp, err := httpClient.Do(req)
+	resp, err := httpClient().Do(req)
 	if err != nil {
 		return "", "", 0, err
 	}
diff --git a/auth/oidc.go b/auth/oidc.go
index 5a405d6..7dcb494 100644
--- a/auth/oidc.go
+++ b/auth/oidc.go
@@ -40,7 +40,7 @@ func refreshOIDCToken(refreshToken, clientID, clientSecret, region string) (stri
 	req, _ := http.NewRequest("POST", url, bytes.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 
-	resp, err := httpClient.Do(req)
+	resp, err := httpClient().Do(req)
 	if err != nil {
 		return "", "", 0, err
 	}
@@ -77,7 +77,7 @@ func refreshSocialToken(refreshToken string) (string, string, int64, error) {
 	req, _ := http.NewRequest("POST", url, bytes.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 
-	resp, err := httpClient.Do(req)
+	resp, err := httpClient().Do(req)
 	if err != nil {
 		return "", "", 0, err
 	}
diff --git a/auth/sso_token.go b/auth/sso_token.go
index 22da746..dee0540 100644
--- a/auth/sso_token.go
+++ b/auth/sso_token.go
@@ -79,7 +79,7 @@ func registerDeviceClient(oidcBase, startUrl string) (clientID, clientSecret str
 	req, _ := http.NewRequest("POST", oidcBase+"/client/register", bytes.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 
-	client := httpClient
+	client := httpClient()
 	resp, err := client.Do(req)
 	if err != nil {
 		return "", "", err
@@ -110,7 +110,7 @@ func startDeviceAuth(oidcBase, clientID, clientSecret, startUrl string) (deviceC
 	req, _ := http.NewRequest("POST", oidcBase+"/device_authorization", bytes.NewReader(body))
 	req.Header.Set("Content-Type", "application/json")
 
-	client := httpClient
+	client := httpClient()
 	resp, err := client.Do(req)
 	if err != nil {
 		return "", "", 0, err
@@ -139,7 +139,7 @@ func verifyBearerToken(portalBase, bearerToken string) error {
 	req.Header.Set("Authorization", "Bearer "+bearerToken)
 	req.Header.Set("Accept", "application/json")
 
-	client := httpClient
+	client := httpClient()
 	resp, err := client.Do(req)
 	if err != nil {
 		return err
@@ -157,7 +157,7 @@ func getDeviceSessionToken(portalBase, bearerToken string) (string, error) {
 	req.Header.Set("Authorization", "Bearer "+bearerToken)
 	req.Header.Set("Content-Type", "application/json")
 
-	client := httpClient
+	client := httpClient()
 	resp, err := client.Do(req)
 	if err != nil {
 		return "", err
@@ -193,7 +193,7 @@ func acceptUserCode(oidcBase, userCode, deviceSessionToken string) (*deviceConte
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("Referer", "https://view.awsapps.com/")
 
-	client := httpClient
+	client := httpClient()
 	resp, err := client.Do(req)
 	if err != nil {
 		return nil, err
@@ -227,7 +227,7 @@ func approveAuth(oidcBase string, deviceContext *deviceContextInfo, deviceSessio
 	req.Header.Set("Content-Type", "application/json")
 	req.Header.Set("Referer", "https://view.awsapps.com/")
 
-	client := httpClient
+	client := httpClient()
 	resp, err := client.Do(req)
 	if err != nil {
 		return err
@@ -262,7 +262,7 @@ func pollForToken(oidcBase, clientID, clientSecret, deviceCode string, interval
 			req, _ := http.NewRequest("POST", oidcBase+"/token", bytes.NewReader(body))
 			req.Header.Set("Content-Type", "application/json")
 
-			client := httpClient
+			client := httpClient()
 			resp, err := client.Do(req)
 			if err != nil {
 				continue
@@ -311,7 +311,7 @@ func GetUserInfo(accessToken string) (email, userID string, err error) {
 	req.Header.Set("User-Agent", "aws-sdk-js/1.0.18 KiroAPIProxy")
 	req.Header.Set("x-amz-user-agent", "aws-sdk-js/1.0.18 KiroAPIProxy")
 
-	client := httpClient
+	client := httpClient()
 	resp, err := client.Do(req)
 	if err != nil {
 		return "", "", err
diff --git a/config/config.go b/config/config.go
index f3ab37e..7195fde 100644
--- a/config/config.go
+++ b/config/config.go
@@ -108,6 +108,12 @@ type Config struct {
 	// Endpoint configuration: "auto", "codewhisperer", or "amazonq"
 	PreferredEndpoint string `json:"preferredEndpoint,omitempty"`
 
+	// Proxy configuration: optional outbound proxy for Kiro API requests
+	// Format: "socks5://host:port", "socks5://user:pass@host:port",
+	//         "http://host:port",  "http://user:pass@host:port"
+	// Leave empty to connect directly.
+	ProxyURL string `json:"proxyURL,omitempty"`
+
 	// Global statistics (persisted across restarts)
 	TotalRequests   int     `json:"totalRequests,omitempty"`   // Total API requests received
 	SuccessRequests int     `json:"successRequests,omitempty"` // Successful requests count
@@ -445,6 +451,21 @@ func UpdatePreferredEndpoint(endpoint string) error {
 	return Save()
 }
 
+// GetProxyURL 获取出站代理地址
+func GetProxyURL() string {
+	cfgLock.RLock()
+	defer cfgLock.RUnlock()
+	return cfg.ProxyURL
+}
+
+// UpdateProxySettings 更新出站代理配置
+func UpdateProxySettings(proxyURL string) error {
+	cfgLock.Lock()
+	defer cfgLock.Unlock()
+	cfg.ProxyURL = proxyURL
+	return Save()
+}
+
 type KiroClientConfig struct {
 	KiroVersion   string
 	SystemVersion string
diff --git a/proxy/handler.go b/proxy/handler.go
index 2c42be9..8b1c783 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -206,6 +206,9 @@ func validateOpenAIRequestShape(req *OpenAIRequest) string {
 }
 
 func NewHandler() *Handler {
+	// 启动时应用代理配置
+	applyProxyConfig(config.GetProxyURL())
+
 	totalReq, successReq, failedReq, totalTokens, totalCredits := config.GetStats()
 	h := &Handler{
 		pool:            pool.GetPool(),
@@ -1908,6 +1911,10 @@ func (h *Handler) handleAdminAPI(w http.ResponseWriter, r *http.Request) {
 		h.apiGetEndpointConfig(w, r)
 	case path == "/endpoint" && r.Method == "POST":
 		h.apiUpdateEndpointConfig(w, r)
+	case path == "/proxy" && r.Method == "GET":
+		h.apiGetProxy(w, r)
+	case path == "/proxy" && r.Method == "POST":
+		h.apiUpdateProxy(w, r)
 	case path == "/version" && r.Method == "GET":
 		h.apiGetVersion(w, r)
 	case path == "/export" && r.Method == "POST":
@@ -2872,6 +2879,54 @@ func (h *Handler) apiUpdateEndpointConfig(w http.ResponseWriter, r *http.Request
 	json.NewEncoder(w).Encode(map[string]bool{"success": true})
 }
 
+// applyProxyConfig 将代理配置应用到所有出站 HTTP 客户端（Kiro API + auth 模块）
+func applyProxyConfig(proxyURL string) {
+	InitKiroHttpClient(proxyURL)
+	auth.InitHttpClient(proxyURL)
+}
+
+// apiGetProxy 获取当前代理配置
+func (h *Handler) apiGetProxy(w http.ResponseWriter, r *http.Request) {
+	json.NewEncoder(w).Encode(map[string]string{
+		"proxyURL": config.GetProxyURL(),
+	})
+}
+
+// apiUpdateProxy 更新代理配置并立即生效
+func (h *Handler) apiUpdateProxy(w http.ResponseWriter, r *http.Request) {
+	var req struct {
+		ProxyURL string `json:"proxyURL"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		w.WriteHeader(400)
+		json.NewEncoder(w).Encode(map[string]string{"error": "Invalid JSON"})
+		return
+	}
+
+	// 验证代理 URL 格式（非空时）
+	if req.ProxyURL != "" {
+		if !strings.HasPrefix(req.ProxyURL, "http://") &&
+			!strings.HasPrefix(req.ProxyURL, "https://") &&
+			!strings.HasPrefix(req.ProxyURL, "socks5://") &&
+			!strings.HasPrefix(req.ProxyURL, "socks5h://") {
+			w.WriteHeader(400)
+			json.NewEncoder(w).Encode(map[string]string{"error": "proxyURL must start with http://, https://, socks5://, or socks5h://"})
+			return
+		}
+	}
+
+	if err := config.UpdateProxySettings(req.ProxyURL); err != nil {
+		w.WriteHeader(500)
+		json.NewEncoder(w).Encode(map[string]string{"error": err.Error()})
+		return
+	}
+
+	// 立即应用新的代理配置
+	applyProxyConfig(req.ProxyURL)
+
+	json.NewEncoder(w).Encode(map[string]bool{"success": true})
+}
+
 // apiGetVersion 获取版本信息
 func (h *Handler) apiGetVersion(w http.ResponseWriter, r *http.Request) {
 	json.NewEncoder(w).Encode(map[string]string{
diff --git a/proxy/kiro.go b/proxy/kiro.go
index bc8233d..370cb08 100644
--- a/proxy/kiro.go
+++ b/proxy/kiro.go
@@ -12,6 +12,7 @@ import (
 	"net/url"
 	"strconv"
 	"strings"
+	"sync/atomic"
 	"time"
 
 	"github.com/google/uuid"
@@ -40,16 +41,39 @@ var kiroEndpoints = []kiroEndpoint{
 	},
 }
 
-// 全局 HTTP 客户端，复用连接池
-var kiroHttpClient = &http.Client{
-	Timeout: 5 * time.Minute,
-	Transport: &http.Transport{
-		MaxIdleConns:        100,              // 最大空闲连接数
-		MaxIdleConnsPerHost: 20,               // 每个 Host 最大空闲连接数
-		IdleConnTimeout:     90 * time.Second, // 空闲连接超时
-		DisableCompression:  false,            // 启用压缩
-		ForceAttemptHTTP2:   true,             // 尝试使用 HTTP/2
-	},
+// 全局 HTTP 客户端，支持运行时更换（代理重配置）
+var kiroHttpStore atomic.Pointer[http.Client]
+
+func init() {
+	InitKiroHttpClient("")
+}
+
+// buildKiroTransport 构建带可选代理的 Transport
+func buildKiroTransport(proxyURL string) *http.Transport {
+	t := &http.Transport{
+		MaxIdleConns:        100,
+		MaxIdleConnsPerHost: 20,
+		IdleConnTimeout:     90 * time.Second,
+		DisableCompression:  false,
+		ForceAttemptHTTP2:   true,
+	}
+	if proxyURL != "" {
+		if u, err := url.Parse(proxyURL); err == nil {
+			t.Proxy = http.ProxyURL(u)
+			// 代理不支持 HTTP/2 协议升级
+			t.ForceAttemptHTTP2 = false
+		}
+	}
+	return t
+}
+
+// InitKiroHttpClient 初始化（或重新初始化）Kiro API 的 HTTP 客户端
+func InitKiroHttpClient(proxyURL string) {
+	client := &http.Client{
+		Timeout:   5 * time.Minute,
+		Transport: buildKiroTransport(proxyURL),
+	}
+	kiroHttpStore.Store(client)
 }
 
 // ==================== 请求结构 ====================
@@ -136,12 +160,12 @@ type InferenceConfig struct {
 
 // KiroStreamCallback stream response callbacks
 type KiroStreamCallback struct {
-	OnText           func(text string, isThinking bool)
-	OnToolUse        func(toolUse KiroToolUse)
-	OnComplete       func(inputTokens, outputTokens int)
-	OnError          func(err error)
-	OnCredits        func(credits float64)
-	OnContextUsage   func(percentage float64)
+	OnText         func(text string, isThinking bool)
+	OnToolUse      func(toolUse KiroToolUse)
+	OnComplete     func(inputTokens, outputTokens int)
+	OnError        func(err error)
+	OnCredits      func(credits float64)
+	OnContextUsage func(percentage float64)
 }
 
 // ==================== API 调用 ====================
@@ -194,7 +218,7 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
 		req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3")
 		req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String())
 
-		resp, err := kiroHttpClient.Do(req)
+		resp, err := kiroHttpStore.Load().Do(req)
 		if err != nil {
 			lastErr = err
 			fmt.Printf("[KiroAPI] Endpoint %s failed: %v\n", ep.Name, err)
diff --git a/web/index.html b/web/index.html
index bda7341..8b0790b 100644
--- a/web/index.html
+++ b/web/index.html
@@ -1017,6 +1017,15 @@
                         id="newPassword" data-i18n-placeholder="settings.newPasswordPlaceholder"></div>
                 <button class="btn btn-primary" onclick="changePassword()" data-i18n="settings.changePassword"></button>
             </div>
+            <div class="card">
+                <div class="card-header"><span class="card-title" data-i18n="settings.proxySettings"></span></div>
+                <div class="form-group">
+                    <label data-i18n="settings.proxyURL"></label>
+                    <input type="text" id="proxyURLInput" data-i18n-placeholder="settings.proxyURLPlaceholder">
+                    <small style="color:#64748b;font-size:12px;margin-top:4px;display:block" data-i18n="settings.proxyURLHint"></small>
+                </div>
+                <button class="btn btn-primary" onclick="saveProxyConfig()" data-i18n="settings.saveProxy"></button>
+            </div>
             <div class="card">
                 <div class="card-header"><span class="card-title" data-i18n="settings.statistics"></span></div>
                 <button class="btn btn-danger" onclick="resetStats()" data-i18n="settings.resetStats"></button>
@@ -1146,6 +1155,12 @@
                 'settings.statistics': '统计',
                 'settings.resetStats': '重置统计',
                 'settings.confirmReset': '确定重置统计？',
+                'settings.proxySettings': '出站代理设置',
+                'settings.proxyURL': '代理地址',
+                'settings.proxyURLPlaceholder': '留空则直连（不使用代理）',
+                'settings.proxyURLHint': '支持 socks5://host:port、socks5://user:pass@host:port、http://host:port 格式，适用于网络受限地区的用户',
+                'settings.saveProxy': '保存代理设置',
+                'settings.proxySaved': '代理设置已保存，已即时生效',
                 'api.endpoints': 'API 端点',
                 'api.modelList': '模型列表',
                 'api.stats': '统计数据',
@@ -1352,6 +1367,12 @@
                 'settings.statistics': 'Statistics',
                 'settings.resetStats': 'Reset Statistics',
                 'settings.confirmReset': 'Confirm reset statistics?',
+                'settings.proxySettings': 'Outbound Proxy Settings',
+                'settings.proxyURL': 'Proxy URL',
+                'settings.proxyURLPlaceholder': 'Leave empty to connect directly',
+                'settings.proxyURLHint': 'Supports socks5://host:port, socks5://user:pass@host:port, http://host:port. For users in restricted network regions.',
+                'settings.saveProxy': 'Save Proxy Settings',
+                'settings.proxySaved': 'Proxy settings saved and applied',
                 'api.endpoints': 'API Endpoints',
                 'api.modelList': 'Model List',
                 'api.stats': 'Statistics',
@@ -1991,6 +2012,7 @@
             document.getElementById('apiKeyInput').value = d.apiKey || '';
             loadThinkingConfig();
             loadEndpointConfig();
+            loadProxyConfig();
         }
         async function loadThinkingConfig() {
             const res = await fetch('/admin/api/thinking', { headers: { 'X-Admin-Password': password } });
@@ -2020,6 +2042,19 @@
             const d = await res.json();
             if (d.success) { alert(t('settings.endpointSaved')); } else { alert(t('common.saveFailed') + ': ' + d.error); }
         }
+        async function loadProxyConfig() {
+            const res = await fetch('/admin/api/proxy', { headers: { 'X-Admin-Password': password } });
+            const d = await res.json();
+            document.getElementById('proxyURLInput').value = d.proxyURL || '';
+        }
+        async function saveProxyConfig() {
+            const res = await fetch('/admin/api/proxy', {
+                method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Admin-Password': password },
+                body: JSON.stringify({ proxyURL: document.getElementById('proxyURLInput').value.trim() })
+            });
+            const d = await res.json();
+            if (d.success) { alert(t('settings.proxySaved')); } else { alert(t('common.saveFailed') + ': ' + d.error); }
+        }
         function generateApiKey() {
             const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
             let key = 'sk-';

From 50f1a7e5ad7bea964fd52fa9081f3953e0f83709 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Mon, 11 May 2026 21:54:42 +0800
Subject: [PATCH 15/22] refactor: improve proxy settings UI with type selector
 and structured fields

---
 web/index.html | 82 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 71 insertions(+), 11 deletions(-)

diff --git a/web/index.html b/web/index.html
index 8b0790b..2dd725c 100644
--- a/web/index.html
+++ b/web/index.html
@@ -1020,9 +1020,28 @@
             <div class="card">
                 <div class="card-header"><span class="card-title" data-i18n="settings.proxySettings"></span></div>
                 <div class="form-group">
-                    <label data-i18n="settings.proxyURL"></label>
-                    <input type="text" id="proxyURLInput" data-i18n-placeholder="settings.proxyURLPlaceholder">
-                    <small style="color:#64748b;font-size:12px;margin-top:4px;display:block" data-i18n="settings.proxyURLHint"></small>
+                    <label data-i18n="settings.proxyType"></label>
+                    <select id="proxyType" onchange="onProxyTypeChange()">
+                        <option value="none" data-i18n="settings.proxyNone"></option>
+                        <option value="socks5">SOCKS5</option>
+                        <option value="http">HTTP</option>
+                    </select>
+                </div>
+                <div id="proxyFields" style="display:none">
+                    <div class="form-group">
+                        <label data-i18n="settings.proxyHost"></label>
+                        <div style="display:flex;gap:8px;align-items:stretch">
+                            <input type="text" id="proxyHost" style="flex:1" placeholder="127.0.0.1">
+                            <input type="number" id="proxyPort" style="width:90px" placeholder="1080" min="1" max="65535">
+                        </div>
+                    </div>
+                    <div class="form-group">
+                        <label data-i18n="settings.proxyAuth"></label>
+                        <div style="display:flex;gap:8px">
+                            <input type="text" id="proxyUsername" style="flex:1" data-i18n-placeholder="settings.proxyUsername" autocomplete="off">
+                            <input type="password" id="proxyPassword" style="flex:1" data-i18n-placeholder="settings.proxyPassword" autocomplete="new-password">
+                        </div>
+                    </div>
                 </div>
                 <button class="btn btn-primary" onclick="saveProxyConfig()" data-i18n="settings.saveProxy"></button>
             </div>
@@ -1156,9 +1175,13 @@
                 'settings.resetStats': '重置统计',
                 'settings.confirmReset': '确定重置统计？',
                 'settings.proxySettings': '出站代理设置',
-                'settings.proxyURL': '代理地址',
-                'settings.proxyURLPlaceholder': '留空则直连（不使用代理）',
-                'settings.proxyURLHint': '支持 socks5://host:port、socks5://user:pass@host:port、http://host:port 格式，适用于网络受限地区的用户',
+                'settings.proxyType': '代理类型',
+                'settings.proxyNone': '直连（不使用代理）',
+                'settings.proxyHost': '地址 / 端口',
+                'settings.proxyAuth': '认证（可选）',
+                'settings.proxyUsername': '用户名',
+                'settings.proxyPassword': '密码',
+                'settings.proxyHostRequired': '请填写代理地址和端口',
                 'settings.saveProxy': '保存代理设置',
                 'settings.proxySaved': '代理设置已保存，已即时生效',
                 'api.endpoints': 'API 端点',
@@ -1368,9 +1391,13 @@
                 'settings.resetStats': 'Reset Statistics',
                 'settings.confirmReset': 'Confirm reset statistics?',
                 'settings.proxySettings': 'Outbound Proxy Settings',
-                'settings.proxyURL': 'Proxy URL',
-                'settings.proxyURLPlaceholder': 'Leave empty to connect directly',
-                'settings.proxyURLHint': 'Supports socks5://host:port, socks5://user:pass@host:port, http://host:port. For users in restricted network regions.',
+                'settings.proxyType': 'Proxy Type',
+                'settings.proxyNone': 'Direct (no proxy)',
+                'settings.proxyHost': 'Host / Port',
+                'settings.proxyAuth': 'Authentication (optional)',
+                'settings.proxyUsername': 'Username',
+                'settings.proxyPassword': 'Password',
+                'settings.proxyHostRequired': 'Please enter proxy host and port',
                 'settings.saveProxy': 'Save Proxy Settings',
                 'settings.proxySaved': 'Proxy settings saved and applied',
                 'api.endpoints': 'API Endpoints',
@@ -2045,12 +2072,45 @@
         async function loadProxyConfig() {
             const res = await fetch('/admin/api/proxy', { headers: { 'X-Admin-Password': password } });
             const d = await res.json();
-            document.getElementById('proxyURLInput').value = d.proxyURL || '';
+            const proxyURL = d.proxyURL || '';
+            if (!proxyURL) {
+                document.getElementById('proxyType').value = 'none';
+                document.getElementById('proxyFields').style.display = 'none';
+                return;
+            }
+            try {
+                const u = new URL(proxyURL);
+                const scheme = u.protocol.replace(':', '');
+                document.getElementById('proxyType').value = scheme.startsWith('socks5') ? 'socks5' : 'http';
+                document.getElementById('proxyHost').value = u.hostname;
+                document.getElementById('proxyPort').value = u.port;
+                document.getElementById('proxyUsername').value = decodeURIComponent(u.username);
+                document.getElementById('proxyPassword').value = decodeURIComponent(u.password);
+                document.getElementById('proxyFields').style.display = '';
+            } catch(e) {
+                document.getElementById('proxyType').value = 'none';
+                document.getElementById('proxyFields').style.display = 'none';
+            }
+        }
+        function onProxyTypeChange() {
+            const type = document.getElementById('proxyType').value;
+            document.getElementById('proxyFields').style.display = type === 'none' ? 'none' : '';
         }
         async function saveProxyConfig() {
+            const type = document.getElementById('proxyType').value;
+            let proxyURL = '';
+            if (type !== 'none') {
+                const host = document.getElementById('proxyHost').value.trim();
+                const port = document.getElementById('proxyPort').value.trim();
+                if (!host || !port) { alert(t('settings.proxyHostRequired')); return; }
+                const user = document.getElementById('proxyUsername').value.trim();
+                const pass = document.getElementById('proxyPassword').value.trim();
+                const auth = user ? (pass ? `${encodeURIComponent(user)}:${encodeURIComponent(pass)}@` : `${encodeURIComponent(user)}@`) : '';
+                proxyURL = `${type}://${auth}${host}:${port}`;
+            }
             const res = await fetch('/admin/api/proxy', {
                 method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Admin-Password': password },
-                body: JSON.stringify({ proxyURL: document.getElementById('proxyURLInput').value.trim() })
+                body: JSON.stringify({ proxyURL })
             });
             const d = await res.json();
             if (d.success) { alert(t('settings.proxySaved')); } else { alert(t('common.saveFailed') + ': ' + d.error); }

From 0e03808b0daea6f6489337bf6b36ed17e1612f8f Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Mon, 11 May 2026 22:01:23 +0800
Subject: [PATCH 16/22] ci: parallel native arm64/amd64 builds, add Go BuildKit
 cache mounts

---
 .github/workflows/docker.yml | 100 ++++++++++++++++++++++++++++++-----
 Dockerfile                   |   7 ++-
 2 files changed, 91 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 4727b6d..442430d 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -14,18 +14,27 @@ env:
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    name: Build (${{ matrix.platform }})
+    runs-on: ${{ matrix.runner }}
     permissions:
       contents: read
       packages: write
+    outputs:
+      digest-amd64: ${{ steps.digest.outputs.digest-linux-amd64 }}
+      digest-arm64: ${{ steps.digest.outputs.digest-linux-arm64 }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - platform: linux/amd64
+            runner: ubuntu-latest
+          - platform: linux/arm64
+            runner: ubuntu-24.04-arm
 
     steps:
       - name: Checkout
         uses: actions/checkout@v4
 
-      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v3
-
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -37,6 +46,70 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
+      - name: Extract metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+
+      - name: Build and push by digest
+        id: build
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: ${{ matrix.platform }}
+          push: ${{ github.event_name != 'pull_request' }}
+          labels: ${{ steps.meta.outputs.labels }}
+          outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }}
+          cache-from: type=gha,scope=${{ matrix.platform }}
+          cache-to: type=gha,mode=max,scope=${{ matrix.platform }}
+          provenance: false
+
+      - name: Export digest
+        if: github.event_name != 'pull_request'
+        id: digest
+        run: |
+          PLATFORM_SAFE=$(echo "${{ matrix.platform }}" | tr '/' '-')
+          echo "digest-${PLATFORM_SAFE}=${{ steps.build.outputs.digest }}" >> "$GITHUB_OUTPUT"
+          mkdir -p /tmp/digests
+          echo "${{ steps.build.outputs.digest }}" > "/tmp/digests/${PLATFORM_SAFE}.txt"
+
+      - name: Upload digest artifact
+        if: github.event_name != 'pull_request'
+        uses: actions/upload-artifact@v4
+        with:
+          name: digest-${{ matrix.runner }}
+          path: /tmp/digests/
+          if-no-files-found: error
+          retention-days: 1
+
+  merge:
+    name: Merge manifests
+    runs-on: ubuntu-latest
+    if: github.event_name != 'pull_request'
+    needs: build
+    permissions:
+      contents: read
+      packages: write
+
+    steps:
+      - name: Download digests
+        uses: actions/download-artifact@v4
+        with:
+          pattern: digest-*
+          path: /tmp/digests
+          merge-multiple: true
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Extract metadata
         id: meta
         uses: docker/metadata-action@v5
@@ -49,13 +122,12 @@ jobs:
             type=semver,pattern={{major}}.{{minor}}
             type=sha,prefix=
 
-      - name: Build and push
-        uses: docker/build-push-action@v5
-        with:
-          context: .
-          platforms: linux/amd64,linux/arm64
-          push: ${{ github.event_name != 'pull_request' }}
-          tags: ${{ steps.meta.outputs.tags }}
-          labels: ${{ steps.meta.outputs.labels }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max
+      - name: Create and push manifest
+        run: |
+          DIGESTS=$(find /tmp/digests -name '*.txt' -exec cat {} \; | xargs -I{} echo "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@{}")
+          TAGS=$(echo "${{ steps.meta.outputs.tags }}" | xargs -I{} echo "--tag {}")
+          docker buildx imagetools create $TAGS $DIGESTS
+
+      - name: Inspect manifest
+        run: |
+          docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }}
diff --git a/Dockerfile b/Dockerfile
index db8766c..dedb35c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,10 +2,13 @@ FROM golang:1.21-alpine AS builder
 
 WORKDIR /app
 COPY go.mod go.sum ./
-RUN go mod download
+RUN --mount=type=cache,target=/go/pkg/mod \
+    go mod download
 
 COPY . .
-RUN CGO_ENABLED=0 GOOS=linux go build -o kiro-go .
+RUN --mount=type=cache,target=/go/pkg/mod \
+    --mount=type=cache,target=/root/.cache/go-build \
+    CGO_ENABLED=0 GOOS=linux go build -o kiro-go .
 
 FROM alpine:latest
 RUN apk --no-cache add ca-certificates

From fdbf511b11328915910152fcb2f337246378d6fd Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Mon, 11 May 2026 22:05:37 +0800
Subject: [PATCH 17/22] ci: fix image name must be lowercase for ghcr.io

---
 .github/workflows/docker.yml | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 442430d..739b7fd 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -35,6 +35,10 @@ jobs:
       - name: Checkout
         uses: actions/checkout@v4
 
+      - name: Set lowercase image name
+        id: image
+        run: echo "name=$(echo '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}' | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_OUTPUT"
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -50,7 +54,7 @@ jobs:
         id: meta
         uses: docker/metadata-action@v5
         with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          images: ${{ steps.image.outputs.name }}
 
       - name: Build and push by digest
         id: build
@@ -60,7 +64,7 @@ jobs:
           platforms: ${{ matrix.platform }}
           push: ${{ github.event_name != 'pull_request' }}
           labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }}
+          outputs: type=image,name=${{ steps.image.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }}
           cache-from: type=gha,scope=${{ matrix.platform }}
           cache-to: type=gha,mode=max,scope=${{ matrix.platform }}
           provenance: false
@@ -100,6 +104,10 @@ jobs:
           path: /tmp/digests
           merge-multiple: true
 
+      - name: Set lowercase image name
+        id: image
+        run: echo "name=$(echo '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}' | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_OUTPUT"
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -114,7 +122,7 @@ jobs:
         id: meta
         uses: docker/metadata-action@v5
         with:
-          images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+          images: ${{ steps.image.outputs.name }}
           tags: |
             type=raw,value=latest,enable={{is_default_branch}}
             type=ref,event=branch
@@ -124,10 +132,10 @@ jobs:
 
       - name: Create and push manifest
         run: |
-          DIGESTS=$(find /tmp/digests -name '*.txt' -exec cat {} \; | xargs -I{} echo "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@{}")
+          DIGESTS=$(find /tmp/digests -name '*.txt' -exec cat {} \; | xargs -I{} echo "${{ steps.image.outputs.name }}@{}")
           TAGS=$(echo "${{ steps.meta.outputs.tags }}" | xargs -I{} echo "--tag {}")
           docker buildx imagetools create $TAGS $DIGESTS
 
       - name: Inspect manifest
         run: |
-          docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }}
+          docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }}

From 5cf2cce1d103c53ad19f35bf4b812a1df08bf40d Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Mon, 11 May 2026 22:10:35 +0800
Subject: [PATCH 18/22] ci: use Go cross-compilation to eliminate slow arm64
 runner

---
 .github/workflows/docker.yml | 106 ++++++-----------------------------
 Dockerfile                   |   8 ++-
 2 files changed, 23 insertions(+), 91 deletions(-)

diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index 739b7fd..125fecc 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -14,22 +14,10 @@ env:
 
 jobs:
   build:
-    name: Build (${{ matrix.platform }})
-    runs-on: ${{ matrix.runner }}
+    runs-on: ubuntu-latest
     permissions:
       contents: read
       packages: write
-    outputs:
-      digest-amd64: ${{ steps.digest.outputs.digest-linux-amd64 }}
-      digest-arm64: ${{ steps.digest.outputs.digest-linux-arm64 }}
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - platform: linux/amd64
-            runner: ubuntu-latest
-          - platform: linux/arm64
-            runner: ubuntu-24.04-arm
 
     steps:
       - name: Checkout
@@ -39,6 +27,11 @@ jobs:
         id: image
         run: echo "name=$(echo '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}' | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_OUTPUT"
 
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+        with:
+          platforms: arm64
+
       - name: Set up Docker Buildx
         uses: docker/setup-buildx-action@v3
 
@@ -50,74 +43,6 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
-      - name: Extract metadata
-        id: meta
-        uses: docker/metadata-action@v5
-        with:
-          images: ${{ steps.image.outputs.name }}
-
-      - name: Build and push by digest
-        id: build
-        uses: docker/build-push-action@v6
-        with:
-          context: .
-          platforms: ${{ matrix.platform }}
-          push: ${{ github.event_name != 'pull_request' }}
-          labels: ${{ steps.meta.outputs.labels }}
-          outputs: type=image,name=${{ steps.image.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }}
-          cache-from: type=gha,scope=${{ matrix.platform }}
-          cache-to: type=gha,mode=max,scope=${{ matrix.platform }}
-          provenance: false
-
-      - name: Export digest
-        if: github.event_name != 'pull_request'
-        id: digest
-        run: |
-          PLATFORM_SAFE=$(echo "${{ matrix.platform }}" | tr '/' '-')
-          echo "digest-${PLATFORM_SAFE}=${{ steps.build.outputs.digest }}" >> "$GITHUB_OUTPUT"
-          mkdir -p /tmp/digests
-          echo "${{ steps.build.outputs.digest }}" > "/tmp/digests/${PLATFORM_SAFE}.txt"
-
-      - name: Upload digest artifact
-        if: github.event_name != 'pull_request'
-        uses: actions/upload-artifact@v4
-        with:
-          name: digest-${{ matrix.runner }}
-          path: /tmp/digests/
-          if-no-files-found: error
-          retention-days: 1
-
-  merge:
-    name: Merge manifests
-    runs-on: ubuntu-latest
-    if: github.event_name != 'pull_request'
-    needs: build
-    permissions:
-      contents: read
-      packages: write
-
-    steps:
-      - name: Download digests
-        uses: actions/download-artifact@v4
-        with:
-          pattern: digest-*
-          path: /tmp/digests
-          merge-multiple: true
-
-      - name: Set lowercase image name
-        id: image
-        run: echo "name=$(echo '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}' | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_OUTPUT"
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v3
-
-      - name: Login to GitHub Container Registry
-        uses: docker/login-action@v3
-        with:
-          registry: ${{ env.REGISTRY }}
-          username: ${{ github.actor }}
-          password: ${{ secrets.GITHUB_TOKEN }}
-
       - name: Extract metadata
         id: meta
         uses: docker/metadata-action@v5
@@ -130,12 +55,15 @@ jobs:
             type=semver,pattern={{major}}.{{minor}}
             type=sha,prefix=
 
-      - name: Create and push manifest
-        run: |
-          DIGESTS=$(find /tmp/digests -name '*.txt' -exec cat {} \; | xargs -I{} echo "${{ steps.image.outputs.name }}@{}")
-          TAGS=$(echo "${{ steps.meta.outputs.tags }}" | xargs -I{} echo "--tag {}")
-          docker buildx imagetools create $TAGS $DIGESTS
+      - name: Build and push
+        uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          provenance: false
 
-      - name: Inspect manifest
-        run: |
-          docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }}
diff --git a/Dockerfile b/Dockerfile
index dedb35c..7c6cfa4 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,8 @@
-FROM golang:1.21-alpine AS builder
+# builder 阶段始终运行在构建机原生平台（amd64），用 Go 交叉编译目标平台二进制
+FROM --platform=$BUILDPLATFORM golang:1.21-alpine AS builder
+
+ARG TARGETOS
+ARG TARGETARCH
 
 WORKDIR /app
 COPY go.mod go.sum ./
@@ -8,7 +12,7 @@ RUN --mount=type=cache,target=/go/pkg/mod \
 COPY . .
 RUN --mount=type=cache,target=/go/pkg/mod \
     --mount=type=cache,target=/root/.cache/go-build \
-    CGO_ENABLED=0 GOOS=linux go build -o kiro-go .
+    CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o kiro-go .
 
 FROM alpine:latest
 RUN apk --no-cache add ca-certificates

From 940dc782cb0a9a0d095abc6f407adf21ccc24ae2 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Mon, 11 May 2026 22:25:27 +0800
Subject: [PATCH 19/22] chore: bump version to 1.0.6

---
 README.md        | 7 +++++++
 README_CN.md     | 7 +++++++
 config/config.go | 2 +-
 version.json     | 2 +-
 web/index.html   | 4 ++--
 5 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/README.md b/README.md
index d0dcf4b..17a8649 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,7 @@ If this project helps you, a Star would mean a lot.
 - Auto token refresh, SSE streaming, Web admin panel
 - Multiple auth: AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, local cache, credentials JSON
 - Usage tracking, account import/export, i18n (CN / EN)
+- Support configuring outbound proxy (SOCKS5 / HTTP)
 
 ## Quick Start
 
@@ -74,6 +75,12 @@ curl http://localhost:8080/v1/chat/completions \
 
 Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Claude-compatible requests that include a top-level `thinking` config such as `{"type":"enabled","budget_tokens":2048}` or `{"type":"adaptive"}` also enable thinking mode automatically. Configure output format in the admin panel under Settings - Thinking Mode.
 
+## Outbound Proxy
+
+For users in restricted network regions, configure an outbound proxy in the admin panel under **Settings - Outbound Proxy Settings**. Supports SOCKS5 and HTTP proxies.
+
+The setting takes effect immediately without restarting.
+
 ## Environment Variables
 
 | Variable | Description | Default |
diff --git a/README_CN.md b/README_CN.md
index 542b2f3..8e9fdf6 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -17,6 +17,7 @@
 - 自动 Token 刷新、SSE 流式输出、Web 管理面板
 - 多种认证方式：AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON
 - 用量追踪、账号导入导出、中英双语
+- 支持设置出站代理（SOCKS5 / HTTP）
 
 ## 快速开始
 
@@ -74,6 +75,12 @@ curl http://localhost:8080/v1/chat/completions \
 
 在模型名后加后缀（默认 `-thinking`）即可启用，例如 `claude-sonnet-4.5-thinking`。Claude 兼容请求如果带有顶层 `thinking` 配置，例如 `{"type":"enabled","budget_tokens":2048}` 或 `{"type":"adaptive"}`，也会自动启用 thinking 模式。输出格式可在管理面板「设置 - Thinking 模式」中配置。
 
+## 出站代理
+
+可在管理面板「设置 - 出站代理设置」中配置代理。支持 SOCKS5 和 HTTP 代理。
+
+设置保存后即时生效，无需重启服务。
+
 ## 环境变量
 
 | 变量 | 说明 | 默认值 |
diff --git a/config/config.go b/config/config.go
index 7195fde..dfce5df 100644
--- a/config/config.go
+++ b/config/config.go
@@ -143,7 +143,7 @@ type AccountInfo struct {
 }
 
 // Version current version
-const Version = "1.0.5"
+const Version = "1.0.6"
 
 var (
 	cfg     *Config
diff --git a/version.json b/version.json
index 3942b7b..14e14d7 100644
--- a/version.json
+++ b/version.json
@@ -1,5 +1,5 @@
 {
-  "version": "1.0.5",
+  "version": "1.0.6",
   "changelog": "✨ Added and fixed several improvements across the project.\n✨ 新增并修复了一些内容，包含若干功能改进与问题修复。",
   "download": "https://github.com/Quorinex/Kiro-Go"
 }
diff --git a/web/index.html b/web/index.html
index 2dd725c..0d6985b 100644
--- a/web/index.html
+++ b/web/index.html
@@ -1183,7 +1183,7 @@
                 'settings.proxyPassword': '密码',
                 'settings.proxyHostRequired': '请填写代理地址和端口',
                 'settings.saveProxy': '保存代理设置',
-                'settings.proxySaved': '代理设置已保存，已即时生效',
+                'settings.proxySaved': '代理设置已保存',
                 'api.endpoints': 'API 端点',
                 'api.modelList': '模型列表',
                 'api.stats': '统计数据',
@@ -1399,7 +1399,7 @@
                 'settings.proxyPassword': 'Password',
                 'settings.proxyHostRequired': 'Please enter proxy host and port',
                 'settings.saveProxy': 'Save Proxy Settings',
-                'settings.proxySaved': 'Proxy settings saved and applied',
+                'settings.proxySaved': 'Proxy settings saved',
                 'api.endpoints': 'API Endpoints',
                 'api.modelList': 'Model List',
                 'api.stats': 'Statistics',

From f9e45a5f1ddeb990292872a6b9c6b6b93feb9d98 Mon Sep 17 00:00:00 2001
From: Delicious233 <2991573049@qq.com>
Date: Tue, 12 May 2026 18:04:58 +0800
Subject: [PATCH 20/22] fix: respect proxy settings for outbound clients (#43)

---
 auth/http_client.go      |  2 ++
 auth/http_client_test.go | 52 +++++++++++++++++++++++++++++++
 proxy/kiro.go            |  9 ++++++
 proxy/kiro_api.go        |  9 ++----
 proxy/kiro_test.go       | 67 +++++++++++++++++++++++++++++++++++++++-
 5 files changed, 132 insertions(+), 7 deletions(-)
 create mode 100644 auth/http_client_test.go

diff --git a/auth/http_client.go b/auth/http_client.go
index fa5443e..4604d70 100644
--- a/auth/http_client.go
+++ b/auth/http_client.go
@@ -34,6 +34,8 @@ func buildAuthTransport(proxyURL string) *http.Transport {
 			t.Proxy = http.ProxyURL(u)
 			t.ForceAttemptHTTP2 = false
 		}
+	} else {
+		t.Proxy = http.ProxyFromEnvironment
 	}
 	return t
 }
diff --git a/auth/http_client_test.go b/auth/http_client_test.go
new file mode 100644
index 0000000..3f5d505
--- /dev/null
+++ b/auth/http_client_test.go
@@ -0,0 +1,52 @@
+package auth
+
+import (
+	"net/http"
+	"net/url"
+	"testing"
+)
+
+func TestBuildAuthTransportUsesExplicitProxyURL(t *testing.T) {
+	transport := buildAuthTransport("http://proxy.local:8080")
+	req := &http.Request{URL: mustParseURL(t, "https://oidc.us-east-1.amazonaws.com")}
+
+	got, err := transport.Proxy(req)
+	if err != nil {
+		t.Fatalf("unexpected proxy error: %v", err)
+	}
+	assertProxyURL(t, got, "http://proxy.local:8080")
+}
+
+func TestBuildAuthTransportFallsBackToEnvironmentProxy(t *testing.T) {
+	t.Setenv("HTTPS_PROXY", "http://env-proxy.local:2323")
+	t.Setenv("NO_PROXY", "")
+	t.Setenv("no_proxy", "")
+
+	transport := buildAuthTransport("")
+	req := &http.Request{URL: mustParseURL(t, "https://oidc.us-east-1.amazonaws.com")}
+
+	got, err := transport.Proxy(req)
+	if err != nil {
+		t.Fatalf("unexpected proxy error: %v", err)
+	}
+	assertProxyURL(t, got, "http://env-proxy.local:2323")
+}
+
+func mustParseURL(t *testing.T, raw string) *url.URL {
+	t.Helper()
+	parsed, err := url.Parse(raw)
+	if err != nil {
+		t.Fatalf("invalid test URL: %v", err)
+	}
+	return parsed
+}
+
+func assertProxyURL(t *testing.T, got *url.URL, want string) {
+	t.Helper()
+	if got == nil {
+		t.Fatalf("expected proxy URL %q, got nil", want)
+	}
+	if got.String() != want {
+		t.Fatalf("expected proxy URL %q, got %q", want, got.String())
+	}
+}
diff --git a/proxy/kiro.go b/proxy/kiro.go
index 370cb08..0b0ccf1 100644
--- a/proxy/kiro.go
+++ b/proxy/kiro.go
@@ -43,6 +43,7 @@ var kiroEndpoints = []kiroEndpoint{
 
 // 全局 HTTP 客户端，支持运行时更换（代理重配置）
 var kiroHttpStore atomic.Pointer[http.Client]
+var kiroRestHttpStore atomic.Pointer[http.Client]
 
 func init() {
 	InitKiroHttpClient("")
@@ -63,6 +64,8 @@ func buildKiroTransport(proxyURL string) *http.Transport {
 			// 代理不支持 HTTP/2 协议升级
 			t.ForceAttemptHTTP2 = false
 		}
+	} else {
+		t.Proxy = http.ProxyFromEnvironment
 	}
 	return t
 }
@@ -74,6 +77,12 @@ func InitKiroHttpClient(proxyURL string) {
 		Transport: buildKiroTransport(proxyURL),
 	}
 	kiroHttpStore.Store(client)
+
+	restClient := &http.Client{
+		Timeout:   30 * time.Second,
+		Transport: buildKiroTransport(proxyURL),
+	}
+	kiroRestHttpStore.Store(restClient)
 }
 
 // ==================== 请求结构 ====================
diff --git a/proxy/kiro_api.go b/proxy/kiro_api.go
index 948336e..94e94ce 100644
--- a/proxy/kiro_api.go
+++ b/proxy/kiro_api.go
@@ -25,8 +25,7 @@ func GetUsageLimits(account *config.Account) (*UsageLimitsResponse, error) {
 
 	setKiroHeaders(req, account)
 
-	client := &http.Client{Timeout: 30 * time.Second}
-	resp, err := client.Do(req)
+	resp, err := kiroRestHttpStore.Load().Do(req)
 	if err != nil {
 		return nil, err
 	}
@@ -57,8 +56,7 @@ func GetUserInfo(account *config.Account) (*UserInfoResponse, error) {
 	setKiroHeaders(req, account)
 	req.Header.Set("Content-Type", "application/json")
 
-	client := &http.Client{Timeout: 30 * time.Second}
-	resp, err := client.Do(req)
+	resp, err := kiroRestHttpStore.Load().Do(req)
 	if err != nil {
 		return nil, err
 	}
@@ -87,8 +85,7 @@ func ListAvailableModels(account *config.Account) ([]ModelInfo, error) {
 
 	setKiroHeaders(req, account)
 
-	client := &http.Client{Timeout: 30 * time.Second}
-	resp, err := client.Do(req)
+	resp, err := kiroRestHttpStore.Load().Do(req)
 	if err != nil {
 		return nil, err
 	}
diff --git a/proxy/kiro_test.go b/proxy/kiro_test.go
index f32190b..003e544 100644
--- a/proxy/kiro_test.go
+++ b/proxy/kiro_test.go
@@ -1,6 +1,11 @@
 package proxy
 
-import "testing"
+import (
+	"net/http"
+	"net/url"
+	"testing"
+	"time"
+)
 
 func TestNormalizeChunkBasicProgression(t *testing.T) {
 	prev := ""
@@ -35,3 +40,63 @@ func TestNormalizeChunkOverlapDelta(t *testing.T) {
 		t.Fatalf("expected overlap suffix delta, got %q", got)
 	}
 }
+
+func TestBuildKiroTransportUsesExplicitProxyURL(t *testing.T) {
+	transport := buildKiroTransport("http://proxy.local:8080")
+	req := &http.Request{URL: mustParseURL(t, "https://q.us-east-1.amazonaws.com")}
+
+	got, err := transport.Proxy(req)
+	if err != nil {
+		t.Fatalf("unexpected proxy error: %v", err)
+	}
+	assertProxyURL(t, got, "http://proxy.local:8080")
+}
+
+func TestBuildKiroTransportFallsBackToEnvironmentProxy(t *testing.T) {
+	t.Setenv("HTTPS_PROXY", "http://env-proxy.local:2323")
+	t.Setenv("NO_PROXY", "")
+	t.Setenv("no_proxy", "")
+
+	transport := buildKiroTransport("")
+	req := &http.Request{URL: mustParseURL(t, "https://q.us-east-1.amazonaws.com")}
+
+	got, err := transport.Proxy(req)
+	if err != nil {
+		t.Fatalf("unexpected proxy error: %v", err)
+	}
+	assertProxyURL(t, got, "http://env-proxy.local:2323")
+}
+
+func TestInitKiroHttpClientKeepsShortRestTimeout(t *testing.T) {
+	InitKiroHttpClient("")
+	t.Cleanup(func() { InitKiroHttpClient("") })
+
+	streamClient := kiroHttpStore.Load()
+	restClient := kiroRestHttpStore.Load()
+
+	if streamClient.Timeout != 5*time.Minute {
+		t.Fatalf("expected streaming timeout to be 5m, got %s", streamClient.Timeout)
+	}
+	if restClient.Timeout != 30*time.Second {
+		t.Fatalf("expected REST timeout to stay 30s, got %s", restClient.Timeout)
+	}
+}
+
+func mustParseURL(t *testing.T, raw string) *url.URL {
+	t.Helper()
+	parsed, err := url.Parse(raw)
+	if err != nil {
+		t.Fatalf("invalid test URL: %v", err)
+	}
+	return parsed
+}
+
+func assertProxyURL(t *testing.T, got *url.URL, want string) {
+	t.Helper()
+	if got == nil {
+		t.Fatalf("expected proxy URL %q, got nil", want)
+	}
+	if got.String() != want {
+		t.Fatalf("expected proxy URL %q, got %q", want, got.String())
+	}
+}

From 08a9747c99f2a872bb01b43cf55eef3aec5d5bac Mon Sep 17 00:00:00 2001
From: Delicious233 <delicious233@hnu.edu.cn>
Date: Tue, 12 May 2026 18:56:59 +0800
Subject: [PATCH 21/22] fix: resolve Kiro profile ARN for generation requests
 (#46)

---
 config/config.go       | 13 ++++++
 proxy/kiro.go          | 11 +++++
 proxy/kiro_api.go      | 63 +++++++++++++++++++++++++++
 proxy/kiro_api_test.go | 96 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 183 insertions(+)
 create mode 100644 proxy/kiro_api_test.go

diff --git a/config/config.go b/config/config.go
index dfce5df..ac0aad4 100644
--- a/config/config.go
+++ b/config/config.go
@@ -50,6 +50,7 @@ type Account struct {
 	StartUrl     string `json:"startUrl,omitempty"`     // AWS SSO start URL
 	ExpiresAt    int64  `json:"expiresAt,omitempty"`    // Token expiration timestamp (Unix seconds)
 	MachineId    string `json:"machineId,omitempty"`    // UUID machine identifier for request tracking
+	ProfileArn   string `json:"profileArn,omitempty"`   // CodeWhisperer/Kiro profile ARN for generation requests
 
 	// Priority weight for load balancing (higher = more requests)
 	Weight int `json:"weight,omitempty"` // 0 or 1 = normal, 2+ = higher priority
@@ -274,6 +275,18 @@ func UpdateAccount(id string, account Account) error {
 	return nil
 }
 
+func UpdateAccountProfileArn(id, profileArn string) error {
+	cfgLock.Lock()
+	defer cfgLock.Unlock()
+	for i, a := range cfg.Accounts {
+		if a.ID == id {
+			cfg.Accounts[i].ProfileArn = profileArn
+			return Save()
+		}
+	}
+	return nil
+}
+
 func DeleteAccount(id string) error {
 	cfgLock.Lock()
 	defer cfgLock.Unlock()
diff --git a/proxy/kiro.go b/proxy/kiro.go
index 0b0ccf1..974650a 100644
--- a/proxy/kiro.go
+++ b/proxy/kiro.go
@@ -196,6 +196,17 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
 	if _, err := json.Marshal(payload); err != nil {
 		return err
 	}
+	if payload != nil && strings.TrimSpace(payload.ProfileArn) == "" {
+		if profileArn, err := ResolveProfileArn(account); err == nil {
+			payload.ProfileArn = profileArn
+		} else {
+			accountEmail := "<nil>"
+			if account != nil {
+				accountEmail = account.Email
+			}
+			fmt.Printf("[ProfileArn] Failed to resolve profile ARN for %s: %v\n", accountEmail, err)
+		}
+	}
 
 	// 根据配置排序端点
 	endpoints := getSortedEndpoints(config.GetPreferredEndpoint())
diff --git a/proxy/kiro_api.go b/proxy/kiro_api.go
index 94e94ce..91c27f8 100644
--- a/proxy/kiro_api.go
+++ b/proxy/kiro_api.go
@@ -6,6 +6,7 @@ import (
 	"io"
 	"kiro-go/config"
 	"net/http"
+	neturl "net/url"
 	"strings"
 	"time"
 )
@@ -17,6 +18,7 @@ const (
 // GetUsageLimits 获取账户使用量和订阅信息
 func GetUsageLimits(account *config.Account) (*UsageLimitsResponse, error) {
 	url := fmt.Sprintf("%s/getUsageLimits?origin=AI_EDITOR&resourceType=AGENTIC_REQUEST&isEmailRequired=true", kiroRestAPIBase)
+	url = withProfileArnQuery(url, account)
 
 	req, err := http.NewRequest("GET", url, nil)
 	if err != nil {
@@ -77,6 +79,7 @@ func GetUserInfo(account *config.Account) (*UserInfoResponse, error) {
 // ListAvailableModels 获取可用模型列表
 func ListAvailableModels(account *config.Account) ([]ModelInfo, error) {
 	url := fmt.Sprintf("%s/ListAvailableModels?origin=AI_EDITOR&maxResults=50", kiroRestAPIBase)
+	url = withProfileArnQuery(url, account)
 
 	req, err := http.NewRequest("GET", url, nil)
 	if err != nil {
@@ -105,6 +108,66 @@ func ListAvailableModels(account *config.Account) ([]ModelInfo, error) {
 	return result.Models, nil
 }
 
+// ResolveProfileArn returns the account profile ARN, fetching and caching it
+// when it is missing. Some Kiro generation requests require this profile for
+// model authorization even when model listing works without it.
+func ResolveProfileArn(account *config.Account) (string, error) {
+	if account == nil {
+		return "", fmt.Errorf("account is nil")
+	}
+	if profileArn := strings.TrimSpace(account.ProfileArn); profileArn != "" {
+		return profileArn, nil
+	}
+
+	req, err := http.NewRequest("POST", fmt.Sprintf("%s/ListAvailableProfiles", kiroRestAPIBase), strings.NewReader(`{"maxResults":10}`))
+	if err != nil {
+		return "", err
+	}
+	setKiroHeaders(req, account)
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := kiroRestHttpStore.Load().Do(req)
+	if err != nil {
+		return "", err
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode != 200 {
+		body, _ := io.ReadAll(resp.Body)
+		return "", fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
+	}
+
+	var result struct {
+		Profiles []struct {
+			Arn string `json:"arn"`
+		} `json:"profiles"`
+	}
+	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
+		return "", err
+	}
+	for _, profile := range result.Profiles {
+		if profileArn := strings.TrimSpace(profile.Arn); profileArn != "" {
+			if updateErr := config.UpdateAccountProfileArn(account.ID, profileArn); updateErr != nil {
+				fmt.Printf("[ProfileArn] Failed to cache profile ARN for %s: %v\n", account.Email, updateErr)
+			}
+			account.ProfileArn = profileArn
+			return profileArn, nil
+		}
+	}
+	return "", fmt.Errorf("no available Kiro profile")
+}
+
+func withProfileArnQuery(rawURL string, account *config.Account) string {
+	if account == nil {
+		return rawURL
+	}
+	profileArn := strings.TrimSpace(account.ProfileArn)
+	if profileArn == "" {
+		return rawURL
+	}
+	return rawURL + "&profileArn=" + neturl.QueryEscape(profileArn)
+}
+
 func setKiroHeaders(req *http.Request, account *config.Account) {
 	host := ""
 	if req.URL != nil {
diff --git a/proxy/kiro_api_test.go b/proxy/kiro_api_test.go
new file mode 100644
index 0000000..4fce7cd
--- /dev/null
+++ b/proxy/kiro_api_test.go
@@ -0,0 +1,96 @@
+package proxy
+
+import (
+	"io"
+	"kiro-go/config"
+	"net/http"
+	"path/filepath"
+	"strings"
+	"testing"
+)
+
+func TestResolveProfileArnReturnsCachedValueWithoutRequest(t *testing.T) {
+	kiroRestHttpStore.Store(&http.Client{
+		Transport: roundTripFunc(func(*http.Request) (*http.Response, error) {
+			t.Fatal("unexpected HTTP request for cached profile ARN")
+			return nil, nil
+		}),
+	})
+	t.Cleanup(func() { InitKiroHttpClient("") })
+
+	account := &config.Account{ProfileArn: " arn:aws:codewhisperer:profile/test "}
+	got, err := ResolveProfileArn(account)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != "arn:aws:codewhisperer:profile/test" {
+		t.Fatalf("expected trimmed cached ARN, got %q", got)
+	}
+}
+
+func TestResolveProfileArnFetchesAndCachesProfile(t *testing.T) {
+	configPath := filepath.Join(t.TempDir(), "config.json")
+	if err := config.Init(configPath); err != nil {
+		t.Fatalf("init config: %v", err)
+	}
+	account := config.Account{
+		ID:           "acct-1",
+		Email:        "user@example.com",
+		AccessToken:  "access-token",
+		Region:       "us-east-1",
+		UsageCurrent: 7,
+	}
+	if err := config.AddAccount(account); err != nil {
+		t.Fatalf("add account: %v", err)
+	}
+
+	kiroRestHttpStore.Store(&http.Client{
+		Transport: roundTripFunc(func(req *http.Request) (*http.Response, error) {
+			if req.Method != http.MethodPost {
+				t.Fatalf("expected POST, got %s", req.Method)
+			}
+			if req.URL.Path != "/ListAvailableProfiles" {
+				t.Fatalf("expected ListAvailableProfiles path, got %s", req.URL.Path)
+			}
+			if got := req.Header.Get("Content-Type"); got != "application/json" {
+				t.Fatalf("expected JSON content type, got %q", got)
+			}
+			return &http.Response{
+				StatusCode: http.StatusOK,
+				Body:       io.NopCloser(strings.NewReader(`{"profiles":[{"arn":" arn:aws:codewhisperer:profile/fetched "}]} `)),
+				Header:     make(http.Header),
+			}, nil
+		}),
+	})
+	t.Cleanup(func() { InitKiroHttpClient("") })
+
+	requestAccount := account
+	requestAccount.UsageCurrent = 0
+	got, err := ResolveProfileArn(&requestAccount)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if got != "arn:aws:codewhisperer:profile/fetched" {
+		t.Fatalf("expected fetched ARN, got %q", got)
+	}
+	if requestAccount.ProfileArn != got {
+		t.Fatalf("expected account to be updated with fetched ARN, got %q", requestAccount.ProfileArn)
+	}
+
+	accounts := config.GetAccounts()
+	if len(accounts) != 1 {
+		t.Fatalf("expected one persisted account, got %d", len(accounts))
+	}
+	if accounts[0].ProfileArn != got {
+		t.Fatalf("expected persisted account profile ARN %q, got %q", got, accounts[0].ProfileArn)
+	}
+	if accounts[0].UsageCurrent != 7 {
+		t.Fatalf("expected profile cache update to preserve usage fields, got usageCurrent=%v", accounts[0].UsageCurrent)
+	}
+}
+
+type roundTripFunc func(*http.Request) (*http.Response, error)
+
+func (fn roundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) {
+	return fn(req)
+}

From 1732b17ff9455e55cb9dcf34cf23c39f5b549042 Mon Sep 17 00:00:00 2001
From: Delicious233 <delicious233@hnu.edu.cn>
Date: Tue, 12 May 2026 18:57:12 +0800
Subject: [PATCH 22/22] fix: stabilize prompt cache fingerprints (#47)

---
 proxy/cache_tracker.go      | 69 +++++++++++++++--------------
 proxy/cache_tracker_test.go | 88 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 123 insertions(+), 34 deletions(-)

diff --git a/proxy/cache_tracker.go b/proxy/cache_tracker.go
index 582754b..024e4d3 100644
--- a/proxy/cache_tracker.go
+++ b/proxy/cache_tracker.go
@@ -254,9 +254,10 @@ func flattenClaudeCacheBlocks(req *ClaudeRequest) []cacheablePromptBlock {
 			"description":  tool.Description,
 			"input_schema": tool.InputSchema,
 		}
+		fingerprintValue := stripCachePositionKeys(toolValue)
 		blocks = append(blocks, cacheablePromptBlock{
-			Value:  toolValue,
-			Tokens: estimateApproxTokens(canonicalizeCacheValue(toolValue)),
+			Value:  fingerprintValue,
+			Tokens: estimateApproxTokens(canonicalizeCacheValue(fingerprintValue)),
 			TTL:    normalizePromptCacheTTL(extractPromptCacheTTL(tool)),
 		})
 	}
@@ -357,59 +358,52 @@ func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interf
 	blockValue := wrapper["block"]
 	ttl := normalizePromptCacheTTL(extractPromptCacheTTL(blockValue))
 
-	// Normalize volatile text (e.g. Claude Code's x-anthropic-billing-header
-	// which drifts on every request) so that fingerprints remain stable across
-	// requests within the same conversation.
-	if normalized, changed := normalizeCacheBlockContent(blockValue); changed {
-		cloned := make(map[string]interface{}, len(wrapper))
-		for k, v := range wrapper {
-			cloned[k] = v
-		}
-		cloned["block"] = normalized
-		wrapper = cloned
+	// Drop volatile billing metadata from the cache fingerprint. Claude Code's
+	// x-anthropic-billing-header can drift, appear, or disappear across
+	// otherwise identical requests, and it does not change model semantics.
+	if isAnthropicBillingHeaderBlock(blockValue) {
+		return
 	}
 
-	canonical := canonicalizeCacheValue(wrapper)
+	fingerprintValue := stripCachePositionKeys(wrapper)
+	canonical := canonicalizeCacheValue(fingerprintValue)
 	*blocks = append(*blocks, cacheablePromptBlock{
-		Value:        wrapper,
+		Value:        fingerprintValue,
 		Tokens:       estimateApproxTokens(canonical),
 		TTL:          ttl,
 		IsMessageEnd: isMessageEnd,
 	})
 }
 
-// normalizeCacheBlockContent replaces volatile but semantically irrelevant
-// fields with a placeholder so that the cumulative fingerprint stays stable
-// across requests in the same session. Currently handles:
-//   - Claude Code's "x-anthropic-billing-header: ..." system text block
-//     whose content drifts on every request (version, telemetry hash, etc.)
-func normalizeCacheBlockContent(value interface{}) (interface{}, bool) {
+func stripCachePositionKeys(value map[string]interface{}) map[string]interface{} {
+	cloned := make(map[string]interface{}, len(value))
+	for key, item := range value {
+		if isCachePositionKey(key) {
+			continue
+		}
+		cloned[key] = item
+	}
+	return cloned
+}
+
+func isAnthropicBillingHeaderBlock(value interface{}) bool {
 	blockMap, ok := value.(map[string]interface{})
 	if !ok {
-		return value, false
+		return false
 	}
 
 	// Only normalize text blocks (or blocks without an explicit type but containing text).
 	if t, ok := blockMap["type"].(string); ok && t != "" && t != "text" {
-		return value, false
+		return false
 	}
 
 	text, ok := blockMap["text"].(string)
 	if !ok {
-		return value, false
+		return false
 	}
 
 	trimmed := strings.TrimLeft(text, " \t\r\n")
-	if !strings.HasPrefix(strings.ToLower(trimmed), "x-anthropic-billing-header:") {
-		return value, false
-	}
-
-	cloned := make(map[string]interface{}, len(blockMap))
-	for k, v := range blockMap {
-		cloned[k] = v
-	}
-	cloned["text"] = "__anthropic_billing_header__"
-	return cloned, true
+	return strings.HasPrefix(strings.ToLower(trimmed), "x-anthropic-billing-header:")
 }
 
 func extractPromptCacheTTL(value interface{}) time.Duration {
@@ -590,6 +584,15 @@ func writeCanonicalJSON(buf *bytes.Buffer, value interface{}) {
 	}
 }
 
+func isCachePositionKey(key string) bool {
+	switch key {
+	case "tool_index", "system_index", "message_index", "block_index":
+		return true
+	default:
+		return false
+	}
+}
+
 func writeHashChunk(hasher hashWriter, chunk string) {
 	length := strconv.Itoa(len(chunk))
 	hasher.Write([]byte(length))
diff --git a/proxy/cache_tracker_test.go b/proxy/cache_tracker_test.go
index aa620c8..6b0262c 100644
--- a/proxy/cache_tracker_test.go
+++ b/proxy/cache_tracker_test.go
@@ -77,7 +77,7 @@ func TestBuildClaudeUsageMapIncludesCacheFields(t *testing.T) {
 // TestPromptCacheStableAcrossBillingHeaderDrift verifies that Claude Code's
 // per-request "x-anthropic-billing-header: cc_version=...; cch=...;" system
 // block (whose content drifts on every request) does not break cache hits.
-// The normalization logic should ensure the same conversation still matches.
+// The tracker should ignore that metadata when fingerprinting cached prefixes.
 func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
 	tracker := newPromptCacheTracker(time.Hour)
 	mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
@@ -124,6 +124,92 @@ func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
 	}
 }
 
+func TestPromptCacheStableWhenBillingHeaderAppearsOrDisappears(t *testing.T) {
+	tracker := newPromptCacheTracker(time.Hour)
+	mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
+
+	build := func(includeBilling bool) *ClaudeRequest {
+		system := []interface{}{}
+		if includeBilling {
+			system = append(system, map[string]interface{}{
+				"type": "text",
+				"text": "x-anthropic-billing-header: cc_version=2.1.87.1; cch=aaaa;",
+			})
+		}
+		system = append(system, map[string]interface{}{
+			"type": "text",
+			"text": mainSystem,
+			"cache_control": map[string]interface{}{
+				"type": "ephemeral",
+			},
+		})
+		return &ClaudeRequest{
+			Model:    "claude-sonnet-4.5",
+			System:   system,
+			Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
+		}
+	}
+
+	withBilling := tracker.BuildClaudeProfile(build(true), 2048)
+	if withBilling == nil {
+		t.Fatalf("profile with billing header should be built")
+	}
+	tracker.Update("acct-1", withBilling)
+
+	withoutBilling := tracker.BuildClaudeProfile(build(false), 2048)
+	if withoutBilling == nil {
+		t.Fatalf("profile without billing header should be built")
+	}
+	result := tracker.Compute("acct-1", withoutBilling)
+	if result.CacheReadInputTokens == 0 {
+		t.Fatalf("expected cache read when billing header disappears, got %+v", result)
+	}
+}
+
+func TestCanonicalCacheValueIgnoresPositionKeys(t *testing.T) {
+	first := canonicalizeCacheValue(stripCachePositionKeys(map[string]interface{}{
+		"kind":         "system",
+		"system_index": 0,
+		"block": map[string]interface{}{
+			"type": "text",
+			"text": "stable",
+		},
+	}))
+	second := canonicalizeCacheValue(stripCachePositionKeys(map[string]interface{}{
+		"kind":         "system",
+		"system_index": 1,
+		"block": map[string]interface{}{
+			"type": "text",
+			"text": "stable",
+		},
+	}))
+	if first != second {
+		t.Fatalf("expected position keys to be ignored, got %q vs %q", first, second)
+	}
+}
+
+func TestCanonicalCacheValuePreservesSemanticPositionKeys(t *testing.T) {
+	first := canonicalizeCacheValue(map[string]interface{}{
+		"kind": "system",
+		"block": map[string]interface{}{
+			"type":        "text",
+			"text":        "stable",
+			"block_index": 1,
+		},
+	})
+	second := canonicalizeCacheValue(map[string]interface{}{
+		"kind": "system",
+		"block": map[string]interface{}{
+			"type":        "text",
+			"text":        "stable",
+			"block_index": 2,
+		},
+	})
+	if first == second {
+		t.Fatalf("expected semantic block_index fields to remain fingerprinted")
+	}
+}
+
 // TestPromptCacheImplicitBreakpointAtMessageEnd verifies that once any
 // explicit cache_control breakpoint has been seen, subsequent message-end
 // boundaries act as implicit breakpoints. This allows multi-turn conversations