From 74a959260e788431c0754ccd633c6c5d1396cbb3 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 20:57:40 +0800
Subject: [PATCH 1/5] chore: optimize model handling

---
 proxy/handler.go    |  4 ++--
 proxy/translator.go | 32 ++++++++++++++++----------------
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/proxy/handler.go b/proxy/handler.go
index a628ab5..85afc5e 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -393,8 +393,8 @@ func fallbackAnthropicModels(thinkingSuffix string) []map[string]interface{} {
 		buildModelInfo("claude-sonnet-4.6"+thinkingSuffix, "anthropic", true),
 		buildModelInfo("claude-opus-4.6", "anthropic", true),
 		buildModelInfo("claude-opus-4.6"+thinkingSuffix, "anthropic", true),
-		buildModelInfo("claude-opus-4-7", "anthropic", true),
-		buildModelInfo("claude-opus-4-7"+thinkingSuffix, "anthropic", true),
+		buildModelInfo("claude-opus-4.7", "anthropic", true),
+		buildModelInfo("claude-opus-4.7"+thinkingSuffix, "anthropic", true),
 		buildModelInfo("claude-sonnet-4.5", "anthropic", true),
 		buildModelInfo("claude-sonnet-4.5"+thinkingSuffix, "anthropic", true),
 		buildModelInfo("claude-sonnet-4", "anthropic", true),
diff --git a/proxy/translator.go b/proxy/translator.go
index 957eb72..500b74e 100644
--- a/proxy/translator.go
+++ b/proxy/translator.go
@@ -22,8 +22,8 @@ var modelMapOrdered = []modelMapping{
 	{"claude-sonnet-4.5", "claude-sonnet-4.5"},
 	{"claude-sonnet-4-6", "claude-sonnet-4.6"},
 	{"claude-sonnet-4.6", "claude-sonnet-4.6"},
-	{"claude-opus-4-7", "claude-opus-4-7"},
-	{"claude-opus-4.7", "claude-opus-4-7"},
+	{"claude-opus-4-7", "claude-opus-4.7"},
+	{"claude-opus-4.7", "claude-opus-4.7"},
 	{"claude-haiku-4-5", "claude-haiku-4.5"},
 	{"claude-haiku-4.5", "claude-haiku-4.5"},
 	{"claude-opus-4-5", "claude-opus-4.5"},
@@ -73,7 +73,7 @@ func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) {
 		return model, thinking
 	}
 
-	return "claude-sonnet-4.5", thinking
+	return model, thinking
 }
 
 func MapModel(model string) string {
@@ -184,8 +184,8 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 			} else {
 				userMsg := KiroUserInputMessage{
 					Content: content,
-					// ModelID: modelID,
-					Origin: origin,
+					ModelID: modelID,
+					Origin:  origin,
 				}
 				if len(images) > 0 {
 					userMsg.Images = images
@@ -236,9 +236,9 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 	payload.ConversationState.ConversationID = buildConversationID(modelID, systemPrompt, firstClaudeConversationAnchor(req.Messages))
 	payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{
 		Content: finalContent,
-		// ModelID: modelID,
-		Origin: origin,
-		Images: currentImages,
+		ModelID: modelID,
+		Origin:  origin,
+		Images:  currentImages,
 	}
 
 	if len(kiroTools) > 0 || len(currentToolResults) > 0 {
@@ -615,9 +615,9 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 				history = append(history, KiroHistoryMessage{
 					UserInputMessage: &KiroUserInputMessage{
 						Content: content,
-						// ModelID: modelID,
-						Origin: origin,
-						Images: images,
+						ModelID: modelID,
+						Origin:  origin,
+						Images:  images,
 					},
 				})
 			}
@@ -661,8 +661,8 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 					history = append(history, KiroHistoryMessage{
 						UserInputMessage: &KiroUserInputMessage{
 							Content: buildToolResultsContinuation(currentToolResults),
-							// ModelID: modelID,
-							Origin: origin,
+							ModelID: modelID,
+							Origin:  origin,
 							UserInputMessageContext: &UserInputMessageContext{
 								ToolResults: currentToolResults,
 							},
@@ -698,9 +698,9 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 	payload.ConversationState.ConversationID = buildConversationID(modelID, systemPrompt, firstOpenAIConversationAnchor(nonSystemMessages))
 	payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{
 		Content: finalContent,
-		// ModelID: modelID,
-		Origin: origin,
-		Images: currentImages,
+		ModelID: modelID,
+		Origin:  origin,
+		Images:  currentImages,
 	}
 
 	if len(kiroTools) > 0 || len(currentToolResults) > 0 {

From 140492e6c7ba70ade0e93a4ed68893506439db6d Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 21:14:13 +0800
Subject: [PATCH 2/5] chore: update version metadata

---
 version.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/version.json b/version.json
index e206569..4ac5bf5 100644
--- a/version.json
+++ b/version.json
@@ -1,5 +1,5 @@
 {
-  "version": "1.0.3",
-  "changelog": "✅ 新增 clientID/clientSecret 校验\n⚖️ 新增账号权重字段，支持加权轮询策略\n🔄 批量账号管理（启用/禁用/刷新/详情）\n🚫 自动跳过用量耗尽的账号\n🔧 重构模型映射为有序列表，避免误匹配",
+  "version": "1.0.4",
+  "changelog": "✨ Added and fixed several improvements across the project.\n🛠️ 新增并修复了一些内容，包含若干功能改进与问题修复。",
   "download": "https://github.com/Quorinex/Kiro-Go"
 }

From e20b2a88164be7fc1a6140e2cf7f69b0fc809a5b Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 21:21:24 +0800
Subject: [PATCH 3/5] chore: sync config version

---
 config/config.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/config/config.go b/config/config.go
index a70eaea..22cfd48 100644
--- a/config/config.go
+++ b/config/config.go
@@ -137,7 +137,7 @@ type AccountInfo struct {
 }
 
 // Version 当前版本号
-const Version = "1.0.3"
+const Version = "1.0.4"
 
 var (
 	cfg     *Config

From 9dbe0cb55f0fd8b163e6f54b83b0a2fef53eabd5 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Sun, 10 May 2026 22:03:18 +0800
Subject: [PATCH 4/5] docs: simplify README and add contributing notes

---
 README.md    | 219 ++++++++-------------------------------------------
 README_CN.md | 219 ++++++++-------------------------------------------
 2 files changed, 62 insertions(+), 376 deletions(-)

diff --git a/README.md b/README.md
index b5d9b85..49a1263 100644
--- a/README.md
+++ b/README.md
@@ -8,20 +8,15 @@ Convert Kiro accounts to OpenAI / Anthropic compatible API service.
 
 [English](README.md) | [中文](README_CN.md)
 
+If this project helps you, a Star would mean a lot.
+
 ## Features
 
-- 🔄 **Anthropic Claude API** - Full support for `/v1/messages` endpoint
-- 🤖 **OpenAI Chat API** - Compatible with `/v1/chat/completions`
-- ⚖️ **Multi-Account Pool** - Round-robin load balancing
-- 🔐 **Auto Token Refresh** - Seamless token management
-- 📡 **Streaming** - Real-time SSE responses
-- 🎛️ **Web Admin Panel** - Easy account management
-- 🔑 **Multiple Auth Methods** - AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, Local Cache, Credentials
-- 📊 **Usage Tracking** - Monitor requests, tokens, and credits
-- 📦 **Account Export/Import** - Compatible with Kiro Account Manager format
-- 🔄 **Dynamic Model List** - Auto-synced from Kiro API with caching
-- 🔔 **Version Update Check** - Automatic new version notification
-- 🌐 **i18n** - Chinese / English admin panel
+- Anthropic `/v1/messages` & OpenAI `/v1/chat/completions`
+- Multi-account pool with round-robin load balancing
+- Auto token refresh, SSE streaming, Web admin panel
+- Multiple auth: AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, local cache, credentials JSON
+- Usage tracking, account import/export, i18n (CN / EN)
 
 ## Quick Start
 
@@ -30,19 +25,13 @@ Convert Kiro accounts to OpenAI / Anthropic compatible API service.
 ```bash
 git clone https://github.com/Quorinex/Kiro-Go.git
 cd Kiro-Go
-
-# Create data directory for persistence
 mkdir -p data
-
 docker-compose up -d
 ```
 
 ### Docker Run
 
 ```bash
-# Create data directory
-mkdir -p /path/to/data
-
 docker run -d \
   --name kiro-go \
   -p 8080:8080 \
@@ -52,8 +41,6 @@ docker run -d \
   ghcr.io/quorinex/kiro-go:latest
 ```
 
-> 📁 The `/app/data` volume stores `config.json` with accounts and settings. Mount it for data persistence.
-
 ### Build from Source
 
 ```bash
@@ -63,22 +50,29 @@ go build -o kiro-go .
 ./kiro-go
 ```
 
-## Configuration
+Config is auto-created at `data/config.json`. Mount `/app/data` for persistence. The default admin password is `changeme` — override it via the `ADMIN_PASSWORD` env var or change it in the admin panel before going to production.
 
-Config file is auto-created at `data/config.json` on first run:
+## Usage
 
-```json
-{
-  "password": "changeme",
-  "port": 8080,
-  "host": "127.0.0.1",
-  "requireApiKey": false,
-  "apiKey": "",
-  "accounts": []
-}
+Open `http://localhost:8080/admin`, log in, add accounts, then call the API:
+
+```bash
+# Claude
+curl http://localhost:8080/v1/messages \
+  -H "Content-Type: application/json" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{"model":"claude-sonnet-4.5","max_tokens":1024,"messages":[{"role":"user","content":"Hello!"}]}'
+
+# OpenAI
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer any" \
+  -d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello!"}]}'
 ```
 
-> ⚠️ **Change the default password before production use!**
+## Thinking Mode
+
+Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Configure output format in the admin panel under Settings - Thinking Mode.
 
 ## Environment Variables
 
@@ -87,168 +81,17 @@ Config file is auto-created at `data/config.json` on first run:
 | `CONFIG_PATH` | Config file path | `data/config.json` |
 | `ADMIN_PASSWORD` | Admin panel password (overrides config) | - |
 
-## Usage
+## Contributing
 
-### 1. Access Admin Panel
+Friendly discussion is welcome. If you run into issues, try asking Claude Code, Codex, or similar tools for help first — most problems can be solved that way. PRs are even better.
 
-Open `http://localhost:8080/admin` and login with your password.
+## Friend Links
 
-### 2. Add Accounts
-
-Multiple methods available:
-
-| Method | Description |
-|--------|-------------|
-| **AWS Builder ID** | Login with AWS Builder ID (personal accounts) |
-| **IAM Identity Center (Enterprise SSO)** | Login with IAM Identity Center (enterprise accounts) |
-| **SSO Token** | Import `x-amz-sso_authn` token from browser |
-| **Kiro Local Cache** | Import from local Kiro IDE cache files |
-| **Credentials JSON** | Import JSON from Kiro Account Manager |
-
-#### Credentials Format
-
-```json
-{
-  "refreshToken": "eyJ...",
-  "accessToken": "eyJ...",
-  "clientId": "xxx",
-  "clientSecret": "xxx"
-}
-```
-
-### 3. Call API
-
-#### Claude API
-
-```bash
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4-20250514",
-    "max_tokens": 1024,
-    "messages": [{"role": "user", "content": "Hello!"}]
-  }'
-```
-
-#### OpenAI API
-
-```bash
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer any" \
-  -d '{
-    "model": "gpt-4o",
-    "messages": [{"role": "user", "content": "Hello!"}]
-  }'
-```
-
-## Model Mapping
-
-| Request Model | Actual Model |
-|---------------|--------------|
-| `claude-sonnet-4-20250514` | claude-sonnet-4-20250514 |
-| `claude-sonnet-4.5` | claude-sonnet-4.5 |
-| `claude-haiku-4.5` | claude-haiku-4.5 |
-| `claude-opus-4.5` | claude-opus-4.5 |
-| `claude-opus-4.6` | claude-opus-4.6 |
-| `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 |
-| `gpt-3.5-turbo` | claude-sonnet-4-20250514 |
-
-## Thinking Mode
-
-Enable extended thinking by adding a suffix to the model name (default: `-thinking`).
-
-### Usage
-
-```bash
-# OpenAI API with thinking
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "messages": [{"role": "user", "content": "Solve this step by step: 15 * 23"}],
-    "stream": true
-  }'
-
-# Claude API with thinking
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "max_tokens": 4096,
-    "messages": [{"role": "user", "content": "Analyze this problem"}]
-  }'
-```
-
-### Configuration
-
-Configure thinking mode in the Admin Panel under **Settings > Thinking Mode Settings**:
-
-| Setting | Description | Options |
-|---------|-------------|---------|
-| **Trigger Suffix** | Model name suffix to enable thinking | Default: `-thinking` (customizable, e.g., `-think`, `-reason`) |
-| **OpenAI Output Format** | How thinking content is returned in OpenAI API | `reasoning_content` (DeepSeek compatible), `<thinking>` tag, `<think>` tag |
-| **Claude Output Format** | How thinking content is returned in Claude API | `<thinking>` tag (default), `<think>` tag, plain text |
-
-### Output Formats
-
-**OpenAI API (`/v1/chat/completions`)**:
-- `reasoning_content` - Thinking in separate `reasoning_content` field (DeepSeek compatible)
-- `thinking` - Thinking wrapped in `<thinking>...</thinking>` tags in content
-- `think` - Thinking wrapped in `<think>...</think>` tags in content
-
-**Claude API (`/v1/messages`)**:
-- `thinking` - Thinking wrapped in `<thinking>...</thinking>` tags (default)
-- `think` - Thinking wrapped in `<think>...</think>` tags
-- `reasoning_content` - Plain text output
-
-## API Endpoints
-
-| Endpoint | Description |
-|----------|-------------|
-| `GET /health` | Health check |
-| `GET /v1/models` | List models |
-| `GET /v1/stats` | Statistics |
-| `POST /v1/messages` | Claude Messages API |
-| `POST /v1/messages/count_tokens` | Token counting |
-| `POST /v1/chat/completions` | OpenAI Chat API |
-| `GET /admin` | Admin panel |
-
-## Project Structure
-
-```
-Kiro-Go/
-├── main.go              # Entry point
-├── version.json         # Version info for update check
-├── config/              # Configuration management
-├── pool/                # Account pool & load balancing
-├── proxy/               # API handlers & Kiro client
-│   ├── handler.go       # HTTP routing & admin API
-│   ├── kiro.go          # Kiro API client
-│   ├── kiro_api.go      # Kiro REST API (usage, models)
-│   └── translator.go    # Request/response conversion
-├── auth/                # Authentication
-│   ├── builderid.go     # AWS Builder ID login
-│   ├── iam_sso.go       # IAM SSO login
-│   ├── oidc.go          # OIDC token refresh
-│   └── sso_token.go     # SSO token import
-├── web/                 # Admin panel frontend
-├── Dockerfile
-└── docker-compose.yml
-```
+- [LINUX DO](https://linux.do)
 
 ## Disclaimer
 
-This project is provided for **educational and research purposes only**.
-
-- This software is not affiliated with, endorsed by, or associated with Amazon, AWS, or Kiro in any way
-- Users are solely responsible for ensuring their use complies with all applicable terms of service and laws
-- The authors assume no liability for any misuse or violations arising from the use of this software
-- Use at your own risk
-
-By using this software, you acknowledge that you have read and understood this disclaimer.
+For educational and research purposes only. Not affiliated with Amazon, AWS, or Kiro. Users are responsible for complying with applicable terms of service and laws. Use at your own risk.
 
 ## License
 
diff --git a/README_CN.md b/README_CN.md
index 750884b..b6b79d2 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -8,20 +8,15 @@
 
 [English](README.md) | 中文
 
+如果这个项目帮到了你，欢迎点个 Star 支持一下。
+
 ## 功能特性
 
-- 🔄 **Anthropic Claude API** - 完整支持 `/v1/messages` 端点
-- 🤖 **OpenAI Chat API** - 兼容 `/v1/chat/completions`
-- ⚖️ **多账号池** - 轮询负载均衡
-- 🔐 **自动刷新 Token** - 无缝 Token 管理
-- 📡 **流式响应** - 实时 SSE 输出
-- 🎛️ **Web 管理面板** - 便捷的账号管理
-- 🔑 **多种认证方式** - AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON
-- 📊 **用量追踪** - 监控请求数、Token、Credits
-- 📦 **账号导入导出** - 兼容 Kiro Account Manager 格式
-- 🔄 **动态模型列表** - 自动从 Kiro API 同步并缓存
-- 🔔 **版本更新检测** - 自动提醒新版本
-- 🌐 **中英双语** - 管理面板支持中文 / 英文
+- Anthropic `/v1/messages` 与 OpenAI `/v1/chat/completions`
+- 多账号池轮询负载均衡
+- 自动 Token 刷新、SSE 流式输出、Web 管理面板
+- 多种认证方式：AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON
+- 用量追踪、账号导入导出、中英双语
 
 ## 快速开始
 
@@ -30,19 +25,13 @@
 ```bash
 git clone https://github.com/Quorinex/Kiro-Go.git
 cd Kiro-Go
-
-# 创建数据目录用于持久化
 mkdir -p data
-
 docker-compose up -d
 ```
 
 ### Docker 运行
 
 ```bash
-# 创建数据目录
-mkdir -p /path/to/data
-
 docker run -d \
   --name kiro-go \
   -p 8080:8080 \
@@ -52,8 +41,6 @@ docker run -d \
   ghcr.io/quorinex/kiro-go:latest
 ```
 
-> 📁 `/app/data` 卷存储 `config.json`（包含账号和设置），挂载此目录以实现数据持久化。
-
 ### 源码编译
 
 ```bash
@@ -63,22 +50,29 @@ go build -o kiro-go .
 ./kiro-go
 ```
 
-## 配置
+首次运行会在 `data/config.json` 自动生成配置，挂载 `/app/data` 以持久化。默认管理密码为 `changeme`，生产环境请务必通过 `ADMIN_PASSWORD` 环境变量或在管理面板中修改。
 
-首次运行会自动创建 `data/config.json`：
+## 使用方法
 
-```json
-{
-  "password": "changeme",
-  "port": 8080,
-  "host": "127.0.0.1",
-  "requireApiKey": false,
-  "apiKey": "",
-  "accounts": []
-}
+访问 `http://localhost:8080/admin` 登录、添加账号，然后调用 API：
+
+```bash
+# Claude
+curl http://localhost:8080/v1/messages \
+  -H "Content-Type: application/json" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{"model":"claude-sonnet-4.5","max_tokens":1024,"messages":[{"role":"user","content":"你好！"}]}'
+
+# OpenAI
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer any" \
+  -d '{"model":"gpt-4o","messages":[{"role":"user","content":"你好！"}]}'
 ```
 
-> ⚠️ **生产环境请务必修改默认密码！**
+## 思考模式
+
+在模型名后加后缀（默认 `-thinking`）即可启用，例如 `claude-sonnet-4.5-thinking`。输出格式可在管理面板「设置 - Thinking 模式」中配置。
 
 ## 环境变量
 
@@ -87,168 +81,17 @@ go build -o kiro-go .
 | `CONFIG_PATH` | 配置文件路径 | `data/config.json` |
 | `ADMIN_PASSWORD` | 管理面板密码（覆盖配置文件） | - |
 
-## 使用方法
+## 参与贡献
 
-### 1. 访问管理面板
+欢迎友好交流。遇到问题时，建议先让 Claude Code、Codex 等工具帮忙排查一下，大部分问题都能自己解决。如果能直接提个 PR 就更好了。
 
-打开 `http://localhost:8080/admin`，输入密码登录。
+## 友情链接
 
-### 2. 添加账号
-
-支持多种方式：
-
-| 方式 | 说明 |
-|------|------|
-| **AWS Builder ID** | 通过 AWS Builder ID 授权登录（个人账号） |
-| **IAM Identity Center (企业 SSO) 登录** | 通过 IAM Identity Center (企业 SSO) 授权登录（企业账号） |
-| **SSO Token** | 通过浏览器 `x-amz-sso_authn` Token 添加账号 |
-| **Kiro 本地缓存** | 通过 Kiro IDE 本地缓存文件添加账号 |
-| **凭证 JSON** | 通过 Kiro Account Manager 导出的凭证添加账号 |
-
-#### 凭证格式
-
-```json
-{
-  "refreshToken": "eyJ...",
-  "accessToken": "eyJ...",
-  "clientId": "xxx",
-  "clientSecret": "xxx"
-}
-```
-
-### 3. 调用 API
-
-#### Claude API
-
-```bash
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4-20250514",
-    "max_tokens": 1024,
-    "messages": [{"role": "user", "content": "你好！"}]
-  }'
-```
-
-#### OpenAI API
-
-```bash
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -H "Authorization: Bearer any" \
-  -d '{
-    "model": "gpt-4o",
-    "messages": [{"role": "user", "content": "你好！"}]
-  }'
-```
-
-## 模型映射
-
-| 请求模型 | 实际模型 |
-|---------|---------|
-| `claude-sonnet-4-20250514` | claude-sonnet-4-20250514 |
-| `claude-sonnet-4.5` | claude-sonnet-4.5 |
-| `claude-haiku-4.5` | claude-haiku-4.5 |
-| `claude-opus-4.5` | claude-opus-4.5 |
-| `claude-opus-4.6` | claude-opus-4.6 |
-| `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 |
-| `gpt-3.5-turbo` | claude-sonnet-4-20250514 |
-
-## 思考模式
-
-在模型名称后添加后缀（默认：`-thinking`）即可启用扩展思考模式。
-
-### 使用方法
-
-```bash
-# OpenAI API 启用思考
-curl http://localhost:8080/v1/chat/completions \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "messages": [{"role": "user", "content": "一步步解决：15 * 23"}],
-    "stream": true
-  }'
-
-# Claude API 启用思考
-curl http://localhost:8080/v1/messages \
-  -H "Content-Type: application/json" \
-  -H "anthropic-version: 2023-06-01" \
-  -d '{
-    "model": "claude-sonnet-4.5-thinking",
-    "max_tokens": 4096,
-    "messages": [{"role": "user", "content": "分析这个问题"}]
-  }'
-```
-
-### 配置
-
-在管理面板的 **设置 > Thinking 模式设置** 中配置：
-
-| 设置 | 说明 | 选项 |
-|-----|------|------|
-| **触发后缀** | 启用思考的模型名称后缀 | 默认：`-thinking`（可自定义，如 `-think`、`-sikao`） |
-| **OpenAI 输出格式** | OpenAI API 中思考内容的返回方式 | `reasoning_content`（DeepSeek 兼容）、`<thinking>` 标签、`<think>` 标签 |
-| **Claude 输出格式** | Claude API 中思考内容的返回方式 | `<thinking>` 标签（默认）、`<think>` 标签、纯文本 |
-
-### 输出格式说明
-
-**OpenAI API (`/v1/chat/completions`)**：
-- `reasoning_content` - 思考内容放在单独的 `reasoning_content` 字段（DeepSeek 兼容）
-- `thinking` - 思考内容用 `<thinking>...</thinking>` 标签包裹在 content 中
-- `think` - 思考内容用 `<think>...</think>` 标签包裹在 content 中
-
-**Claude API (`/v1/messages`)**：
-- `thinking` - 思考内容用 `<thinking>...</thinking>` 标签包裹（默认）
-- `think` - 思考内容用 `<think>...</think>` 标签包裹
-- `reasoning_content` - 纯文本输出
-
-## API 端点
-
-| 端点 | 说明 |
-|-----|------|
-| `GET /health` | 健康检查 |
-| `GET /v1/models` | 模型列表 |
-| `GET /v1/stats` | 统计数据 |
-| `POST /v1/messages` | Claude Messages API |
-| `POST /v1/messages/count_tokens` | Token 计数 |
-| `POST /v1/chat/completions` | OpenAI Chat API |
-| `GET /admin` | 管理面板 |
-
-## 项目结构
-
-```
-Kiro-Go/
-├── main.go              # 入口
-├── version.json         # 版本信息（用于更新检测）
-├── config/              # 配置管理
-├── pool/                # 账号池 & 负载均衡
-├── proxy/               # API 处理 & Kiro 客户端
-│   ├── handler.go       # HTTP 路由 & 管理 API
-│   ├── kiro.go          # Kiro API 客户端
-│   ├── kiro_api.go      # Kiro REST API（用量、模型）
-│   └── translator.go    # 请求/响应转换
-├── auth/                # 认证
-│   ├── builderid.go     # AWS Builder ID 登录
-│   ├── iam_sso.go       # IAM SSO 登录
-│   ├── oidc.go          # OIDC Token 刷新
-│   └── sso_token.go     # SSO Token 导入
-├── web/                 # 管理面板前端
-├── Dockerfile
-└── docker-compose.yml
-```
+- [LINUX DO](https://linux.do)
 
 ## 免责声明
 
-本项目仅供**学习和研究目的**使用。
-
-- 本软件与 Amazon、AWS 或 Kiro 没有任何关联、认可或合作关系
-- 用户需自行确保其使用行为符合所有适用的服务条款和法律法规
-- 作者不对因使用本软件而产生的任何滥用或违规行为承担责任
-- 使用风险自负
-
-使用本软件即表示您已阅读并理解本免责声明。
+本项目仅供学习和研究目的使用，与 Amazon、AWS 或 Kiro 没有任何关联。用户需自行确保使用行为符合所有适用的服务条款和法律法规，使用风险自负。
 
 ## 许可证
 

From 496b14df3fbf4110d8837821041b0c8b4a191913 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Mon, 11 May 2026 15:05:20 +0800
Subject: [PATCH 5/5] fix: improve prompt cache tracking

---
 LICENSE                     |  21 ++++++
 proxy/cache_tracker.go      | 141 +++++++++++++++++++++++++++++++-----
 proxy/cache_tracker_test.go | 107 ++++++++++++++++++++++++++-
 3 files changed, 251 insertions(+), 18 deletions(-)
 create mode 100644 LICENSE

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..1bf685b
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 Quorinex
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/proxy/cache_tracker.go b/proxy/cache_tracker.go
index 338f208..582754b 100644
--- a/proxy/cache_tracker.go
+++ b/proxy/cache_tracker.go
@@ -13,6 +13,13 @@ import (
 
 const defaultPromptCacheTTL = 5 * time.Minute
 
+// Anthropic requires cached prefixes to reach a minimum token count before
+// caching takes effect. Breakpoints below this threshold are excluded from
+// matching and storage to avoid reporting unrealistic 100% cache hits on
+// short requests.
+const defaultMinCacheableTokens = 1024
+const opusMinCacheableTokens = 4096
+
 type promptCacheUsage struct {
 	CacheCreationInputTokens   int
 	CacheReadInputTokens       int
@@ -29,6 +36,15 @@ type promptCacheBreakpoint struct {
 type promptCacheProfile struct {
 	Breakpoints      []promptCacheBreakpoint
 	TotalInputTokens int
+	Model            string
+}
+
+func minCacheableTokensForModel(model string) int {
+	lower := strings.ToLower(model)
+	if strings.Contains(lower, "opus") {
+		return opusMinCacheableTokens
+	}
+	return defaultMinCacheableTokens
 }
 
 type promptCacheEntry struct {
@@ -61,13 +77,27 @@ func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTo
 	hasher := sha256.New()
 	breakpoints := make([]promptCacheBreakpoint, 0)
 	cumulativeTokens := 0
+	var activeTTL time.Duration
 
 	for _, block := range blocks {
 		canonical := canonicalizeCacheValue(block.Value)
 		writeHashChunk(hasher, canonical)
 		cumulativeTokens += block.Tokens
 
-		if block.TTL <= 0 {
+		// Determine whether this block acts as a cache breakpoint:
+		//   1) Explicit cache_control on the block itself.
+		//   2) Once any explicit breakpoint has been seen, every message-end
+		//      boundary becomes an implicit breakpoint so that multi-turn
+		//      conversations can hit earlier stored prefixes.
+		breakpointTTL := time.Duration(0)
+		if block.TTL > 0 {
+			breakpointTTL = block.TTL
+			activeTTL = block.TTL
+		} else if block.IsMessageEnd && activeTTL > 0 {
+			breakpointTTL = activeTTL
+		}
+
+		if breakpointTTL <= 0 {
 			continue
 		}
 
@@ -76,7 +106,7 @@ func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTo
 		breakpoints = append(breakpoints, promptCacheBreakpoint{
 			Fingerprint:      fingerprint,
 			CumulativeTokens: cumulativeTokens,
-			TTL:              block.TTL,
+			TTL:              breakpointTTL,
 		})
 	}
 
@@ -91,6 +121,7 @@ func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTo
 	return &promptCacheProfile{
 		Breakpoints:      breakpoints,
 		TotalInputTokens: totalInputTokens,
+		Model:            req.Model,
 	}
 }
 
@@ -99,6 +130,7 @@ func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfi
 		return promptCacheUsage{}
 	}
 
+	minTokens := minCacheableTokensForModel(profile.Model)
 	last := profile.Breakpoints[len(profile.Breakpoints)-1]
 	lastTokens := minInt(last.CumulativeTokens, profile.TotalInputTokens)
 	now := time.Now()
@@ -109,18 +141,35 @@ func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfi
 
 	entries := t.entriesByAccount[accountID]
 	if len(entries) == 0 {
+		// First request for this account: report creation only if above threshold.
+		effectiveCreation := lastTokens
+		if effectiveCreation < minTokens {
+			effectiveCreation = 0
+		}
 		cache5m, cache1h := computePromptCacheTTLBreakdown(profile, 0)
 		return promptCacheUsage{
-			CacheCreationInputTokens:   lastTokens,
+			CacheCreationInputTokens:   effectiveCreation,
 			CacheReadInputTokens:       0,
 			CacheCreation5mInputTokens: cache5m,
 			CacheCreation1hInputTokens: cache1h,
 		}
 	}
 
+	// Cap cacheable tokens at 85% of total input to ensure a realistic
+	// uncached portion. The newest content in a request is never fully
+	// served from cache on the current turn.
+	maxCacheable := int(float64(profile.TotalInputTokens) * 0.85)
+	if lastTokens > maxCacheable {
+		lastTokens = maxCacheable
+	}
+
 	matchedTokens := 0
 	for i := len(profile.Breakpoints) - 1; i >= 0; i-- {
 		breakpoint := profile.Breakpoints[i]
+		// Skip breakpoints below the minimum cacheable token threshold.
+		if breakpoint.CumulativeTokens < minTokens {
+			continue
+		}
 		entry, ok := entries[breakpoint.Fingerprint]
 		if !ok || entry.ExpiresAt.Before(now) {
 			continue
@@ -128,6 +177,9 @@ func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfi
 		entry.ExpiresAt = now.Add(entry.TTL)
 		entries[breakpoint.Fingerprint] = entry
 		matchedTokens = minInt(breakpoint.CumulativeTokens, profile.TotalInputTokens)
+		if matchedTokens > lastTokens {
+			matchedTokens = lastTokens
+		}
 		break
 	}
 
@@ -146,6 +198,7 @@ func (t *promptCacheTracker) Update(accountID string, profile *promptCacheProfil
 		return
 	}
 
+	minTokens := minCacheableTokensForModel(profile.Model)
 	now := time.Now()
 	t.mu.Lock()
 	defer t.mu.Unlock()
@@ -158,6 +211,10 @@ func (t *promptCacheTracker) Update(accountID string, profile *promptCacheProfil
 	}
 
 	for _, breakpoint := range profile.Breakpoints {
+		// Skip breakpoints below the minimum cacheable token threshold.
+		if breakpoint.CumulativeTokens < minTokens {
+			continue
+		}
 		entries[breakpoint.Fingerprint] = promptCacheEntry{
 			ExpiresAt: now.Add(breakpoint.TTL),
 			TTL:       breakpoint.TTL,
@@ -179,9 +236,10 @@ func (t *promptCacheTracker) pruneExpiredLocked(now time.Time) {
 }
 
 type cacheablePromptBlock struct {
-	Value  interface{}
-	Tokens int
-	TTL    time.Duration
+	Value        interface{}
+	Tokens       int
+	TTL          time.Duration
+	IsMessageEnd bool
 }
 
 func flattenClaudeCacheBlocks(req *ClaudeRequest) []cacheablePromptBlock {
@@ -234,14 +292,14 @@ func appendSystemCacheBlocks(blocks *[]cacheablePromptBlock, system interface{})
 				"type": "text",
 				"text": v,
 			},
-		})
+		}, false)
 	case []interface{}:
 		for i, block := range v {
 			appendPromptBlock(blocks, map[string]interface{}{
 				"kind":         "system",
 				"system_index": i,
 				"block":        block,
-			})
+			}, false)
 		}
 	case []string:
 		for i, block := range v {
@@ -252,7 +310,7 @@ func appendSystemCacheBlocks(blocks *[]cacheablePromptBlock, system interface{})
 					"type": "text",
 					"text": block,
 				},
-			})
+			}, false)
 		}
 	}
 }
@@ -270,8 +328,9 @@ func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int,
 				"type": "text",
 				"text": content,
 			},
-		})
+		}, true)
 	case []interface{}:
+		lastIdx := len(content) - 1
 		for blockIndex, block := range content {
 			appendPromptBlock(blocks, map[string]interface{}{
 				"kind":          "message",
@@ -279,7 +338,7 @@ func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int,
 				"role":          role,
 				"block_index":   blockIndex,
 				"block":         block,
-			})
+			}, blockIndex == lastIdx)
 		}
 	default:
 		if content != nil {
@@ -289,22 +348,70 @@ func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int,
 				"role":          role,
 				"block_index":   0,
 				"block":         content,
-			})
+			}, true)
 		}
 	}
 }
 
-func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interface{}) {
-	blockValue, _ := wrapper["block"]
+func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interface{}, isMessageEnd bool) {
+	blockValue := wrapper["block"]
 	ttl := normalizePromptCacheTTL(extractPromptCacheTTL(blockValue))
+
+	// Normalize volatile text (e.g. Claude Code's x-anthropic-billing-header
+	// which drifts on every request) so that fingerprints remain stable across
+	// requests within the same conversation.
+	if normalized, changed := normalizeCacheBlockContent(blockValue); changed {
+		cloned := make(map[string]interface{}, len(wrapper))
+		for k, v := range wrapper {
+			cloned[k] = v
+		}
+		cloned["block"] = normalized
+		wrapper = cloned
+	}
+
 	canonical := canonicalizeCacheValue(wrapper)
 	*blocks = append(*blocks, cacheablePromptBlock{
-		Value:  wrapper,
-		Tokens: estimateApproxTokens(canonical),
-		TTL:    ttl,
+		Value:        wrapper,
+		Tokens:       estimateApproxTokens(canonical),
+		TTL:          ttl,
+		IsMessageEnd: isMessageEnd,
 	})
 }
 
+// normalizeCacheBlockContent replaces volatile but semantically irrelevant
+// fields with a placeholder so that the cumulative fingerprint stays stable
+// across requests in the same session. Currently handles:
+//   - Claude Code's "x-anthropic-billing-header: ..." system text block
+//     whose content drifts on every request (version, telemetry hash, etc.)
+func normalizeCacheBlockContent(value interface{}) (interface{}, bool) {
+	blockMap, ok := value.(map[string]interface{})
+	if !ok {
+		return value, false
+	}
+
+	// Only normalize text blocks (or blocks without an explicit type but containing text).
+	if t, ok := blockMap["type"].(string); ok && t != "" && t != "text" {
+		return value, false
+	}
+
+	text, ok := blockMap["text"].(string)
+	if !ok {
+		return value, false
+	}
+
+	trimmed := strings.TrimLeft(text, " \t\r\n")
+	if !strings.HasPrefix(strings.ToLower(trimmed), "x-anthropic-billing-header:") {
+		return value, false
+	}
+
+	cloned := make(map[string]interface{}, len(blockMap))
+	for k, v := range blockMap {
+		cloned[k] = v
+	}
+	cloned["text"] = "__anthropic_billing_header__"
+	return cloned, true
+}
+
 func extractPromptCacheTTL(value interface{}) time.Duration {
 	block, ok := value.(map[string]interface{})
 	if !ok {
diff --git a/proxy/cache_tracker_test.go b/proxy/cache_tracker_test.go
index 1beba02..aa620c8 100644
--- a/proxy/cache_tracker_test.go
+++ b/proxy/cache_tracker_test.go
@@ -1,18 +1,20 @@
 package proxy
 
 import (
+	"strings"
 	"testing"
 	"time"
 )
 
 func TestPromptCacheTrackerComputeAndUpdate(t *testing.T) {
 	tracker := newPromptCacheTracker(time.Hour)
+	longSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
 	req := &ClaudeRequest{
 		Model: "claude-sonnet-4.5",
 		System: []interface{}{
 			map[string]interface{}{
 				"type": "text",
-				"text": "system prompt",
+				"text": longSystem,
 				"cache_control": map[string]interface{}{
 					"type": "ephemeral",
 				},
@@ -71,3 +73,106 @@ func TestBuildClaudeUsageMapIncludesCacheFields(t *testing.T) {
 		t.Fatalf("unexpected ttl breakdown: %#v", creation)
 	}
 }
+
+// TestPromptCacheStableAcrossBillingHeaderDrift verifies that Claude Code's
+// per-request "x-anthropic-billing-header: cc_version=...; cch=...;" system
+// block (whose content drifts on every request) does not break cache hits.
+// The normalization logic should ensure the same conversation still matches.
+func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) {
+	tracker := newPromptCacheTracker(time.Hour)
+	mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
+
+	build := func(billingHdr string) *ClaudeRequest {
+		return &ClaudeRequest{
+			Model: "claude-sonnet-4.5",
+			System: []interface{}{
+				map[string]interface{}{
+					"type": "text",
+					"text": billingHdr,
+				},
+				map[string]interface{}{
+					"type": "text",
+					"text": mainSystem,
+					"cache_control": map[string]interface{}{
+						"type": "ephemeral",
+					},
+				},
+			},
+			Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}},
+		}
+	}
+
+	req1 := build("x-anthropic-billing-header: cc_version=2.1.87.1; cch=aaaa;")
+	profile1 := tracker.BuildClaudeProfile(req1, 2048)
+	if profile1 == nil {
+		t.Fatalf("profile1 should be built")
+	}
+	first := tracker.Compute("acct-1", profile1)
+	if first.CacheReadInputTokens != 0 {
+		t.Fatalf("expected no cache read on first request, got %+v", first)
+	}
+	tracker.Update("acct-1", profile1)
+
+	req2 := build("x-anthropic-billing-header: cc_version=2.1.87.42; cch=bbbb; padding=xxyyzz;")
+	profile2 := tracker.BuildClaudeProfile(req2, 2048)
+	if profile2 == nil {
+		t.Fatalf("profile2 should be built")
+	}
+	second := tracker.Compute("acct-1", profile2)
+	if second.CacheReadInputTokens == 0 {
+		t.Fatalf("expected cache read after billing header drift, got %+v", second)
+	}
+}
+
+// TestPromptCacheImplicitBreakpointAtMessageEnd verifies that once any
+// explicit cache_control breakpoint has been seen, subsequent message-end
+// boundaries act as implicit breakpoints. This allows multi-turn conversations
+// to hit earlier stored prefix fingerprints even when the newest messages
+// lack explicit cache_control.
+func TestPromptCacheImplicitBreakpointAtMessageEnd(t *testing.T) {
+	tracker := newPromptCacheTracker(time.Hour)
+	systemText := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80)
+
+	baseSystem := []interface{}{
+		map[string]interface{}{
+			"type": "text",
+			"text": systemText,
+			"cache_control": map[string]interface{}{
+				"type": "ephemeral",
+			},
+		},
+	}
+
+	// Round 1: single user message.
+	req1 := &ClaudeRequest{
+		Model:    "claude-sonnet-4.5",
+		System:   baseSystem,
+		Messages: []ClaudeMessage{{Role: "user", Content: "question one"}},
+	}
+	profile1 := tracker.BuildClaudeProfile(req1, 2048)
+	if profile1 == nil {
+		t.Fatalf("profile1 should be built")
+	}
+	tracker.Update("acct-1", profile1)
+
+	// Round 2: conversation continues with new messages. The latest user
+	// message has no explicit cache_control; it should still hit the stored
+	// prefix via the implicit message-end breakpoint.
+	req2 := &ClaudeRequest{
+		Model:  "claude-sonnet-4.5",
+		System: baseSystem,
+		Messages: []ClaudeMessage{
+			{Role: "user", Content: "question one"},
+			{Role: "assistant", Content: "answer one"},
+			{Role: "user", Content: "follow-up question"},
+		},
+	}
+	profile2 := tracker.BuildClaudeProfile(req2, 4096)
+	if profile2 == nil {
+		t.Fatalf("profile2 should be built")
+	}
+	result := tracker.Compute("acct-1", profile2)
+	if result.CacheReadInputTokens == 0 {
+		t.Fatalf("expected cache read via implicit message-end breakpoint, got %+v", result)
+	}
+}