From 74a959260e788431c0754ccd633c6c5d1396cbb3 Mon Sep 17 00:00:00 2001 From: Quorinex Date: Sun, 10 May 2026 20:57:40 +0800 Subject: [PATCH 1/5] chore: optimize model handling --- proxy/handler.go | 4 ++-- proxy/translator.go | 32 ++++++++++++++++---------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/proxy/handler.go b/proxy/handler.go index a628ab5..85afc5e 100644 --- a/proxy/handler.go +++ b/proxy/handler.go @@ -393,8 +393,8 @@ func fallbackAnthropicModels(thinkingSuffix string) []map[string]interface{} { buildModelInfo("claude-sonnet-4.6"+thinkingSuffix, "anthropic", true), buildModelInfo("claude-opus-4.6", "anthropic", true), buildModelInfo("claude-opus-4.6"+thinkingSuffix, "anthropic", true), - buildModelInfo("claude-opus-4-7", "anthropic", true), - buildModelInfo("claude-opus-4-7"+thinkingSuffix, "anthropic", true), + buildModelInfo("claude-opus-4.7", "anthropic", true), + buildModelInfo("claude-opus-4.7"+thinkingSuffix, "anthropic", true), buildModelInfo("claude-sonnet-4.5", "anthropic", true), buildModelInfo("claude-sonnet-4.5"+thinkingSuffix, "anthropic", true), buildModelInfo("claude-sonnet-4", "anthropic", true), diff --git a/proxy/translator.go b/proxy/translator.go index 957eb72..500b74e 100644 --- a/proxy/translator.go +++ b/proxy/translator.go @@ -22,8 +22,8 @@ var modelMapOrdered = []modelMapping{ {"claude-sonnet-4.5", "claude-sonnet-4.5"}, {"claude-sonnet-4-6", "claude-sonnet-4.6"}, {"claude-sonnet-4.6", "claude-sonnet-4.6"}, - {"claude-opus-4-7", "claude-opus-4-7"}, - {"claude-opus-4.7", "claude-opus-4-7"}, + {"claude-opus-4-7", "claude-opus-4.7"}, + {"claude-opus-4.7", "claude-opus-4.7"}, {"claude-haiku-4-5", "claude-haiku-4.5"}, {"claude-haiku-4.5", "claude-haiku-4.5"}, {"claude-opus-4-5", "claude-opus-4.5"}, @@ -73,7 +73,7 @@ func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) { return model, thinking } - return "claude-sonnet-4.5", thinking + return model, thinking } func MapModel(model string) string { @@ -184,8 +184,8 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload { } else { userMsg := KiroUserInputMessage{ Content: content, - // ModelID: modelID, - Origin: origin, + ModelID: modelID, + Origin: origin, } if len(images) > 0 { userMsg.Images = images @@ -236,9 +236,9 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload { payload.ConversationState.ConversationID = buildConversationID(modelID, systemPrompt, firstClaudeConversationAnchor(req.Messages)) payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{ Content: finalContent, - // ModelID: modelID, - Origin: origin, - Images: currentImages, + ModelID: modelID, + Origin: origin, + Images: currentImages, } if len(kiroTools) > 0 || len(currentToolResults) > 0 { @@ -615,9 +615,9 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload { history = append(history, KiroHistoryMessage{ UserInputMessage: &KiroUserInputMessage{ Content: content, - // ModelID: modelID, - Origin: origin, - Images: images, + ModelID: modelID, + Origin: origin, + Images: images, }, }) } @@ -661,8 +661,8 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload { history = append(history, KiroHistoryMessage{ UserInputMessage: &KiroUserInputMessage{ Content: buildToolResultsContinuation(currentToolResults), - // ModelID: modelID, - Origin: origin, + ModelID: modelID, + Origin: origin, UserInputMessageContext: &UserInputMessageContext{ ToolResults: currentToolResults, }, @@ -698,9 +698,9 @@ func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload { payload.ConversationState.ConversationID = buildConversationID(modelID, systemPrompt, firstOpenAIConversationAnchor(nonSystemMessages)) payload.ConversationState.CurrentMessage.UserInputMessage = KiroUserInputMessage{ Content: finalContent, - // ModelID: modelID, - Origin: origin, - Images: currentImages, + ModelID: modelID, + Origin: origin, + Images: currentImages, } if len(kiroTools) > 0 || len(currentToolResults) > 0 { From 140492e6c7ba70ade0e93a4ed68893506439db6d Mon Sep 17 00:00:00 2001 From: Quorinex Date: Sun, 10 May 2026 21:14:13 +0800 Subject: [PATCH 2/5] chore: update version metadata --- version.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/version.json b/version.json index e206569..4ac5bf5 100644 --- a/version.json +++ b/version.json @@ -1,5 +1,5 @@ { - "version": "1.0.3", - "changelog": "✅ 新增 clientID/clientSecret 校验\n⚖️ 新增账号权重字段,支持加权轮询策略\n🔄 批量账号管理(启用/禁用/刷新/详情)\n🚫 自动跳过用量耗尽的账号\n🔧 重构模型映射为有序列表,避免误匹配", + "version": "1.0.4", + "changelog": "✨ Added and fixed several improvements across the project.\n🛠️ 新增并修复了一些内容,包含若干功能改进与问题修复。", "download": "https://github.com/Quorinex/Kiro-Go" } From e20b2a88164be7fc1a6140e2cf7f69b0fc809a5b Mon Sep 17 00:00:00 2001 From: Quorinex Date: Sun, 10 May 2026 21:21:24 +0800 Subject: [PATCH 3/5] chore: sync config version --- config/config.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/config.go b/config/config.go index a70eaea..22cfd48 100644 --- a/config/config.go +++ b/config/config.go @@ -137,7 +137,7 @@ type AccountInfo struct { } // Version 当前版本号 -const Version = "1.0.3" +const Version = "1.0.4" var ( cfg *Config From 9dbe0cb55f0fd8b163e6f54b83b0a2fef53eabd5 Mon Sep 17 00:00:00 2001 From: Quorinex Date: Sun, 10 May 2026 22:03:18 +0800 Subject: [PATCH 4/5] docs: simplify README and add contributing notes --- README.md | 219 ++++++++------------------------------------------- README_CN.md | 219 ++++++++------------------------------------------- 2 files changed, 62 insertions(+), 376 deletions(-) diff --git a/README.md b/README.md index b5d9b85..49a1263 100644 --- a/README.md +++ b/README.md @@ -8,20 +8,15 @@ Convert Kiro accounts to OpenAI / Anthropic compatible API service. [English](README.md) | [中文](README_CN.md) +If this project helps you, a Star would mean a lot. + ## Features -- 🔄 **Anthropic Claude API** - Full support for `/v1/messages` endpoint -- 🤖 **OpenAI Chat API** - Compatible with `/v1/chat/completions` -- ⚖️ **Multi-Account Pool** - Round-robin load balancing -- 🔐 **Auto Token Refresh** - Seamless token management -- 📡 **Streaming** - Real-time SSE responses -- 🎛️ **Web Admin Panel** - Easy account management -- 🔑 **Multiple Auth Methods** - AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, Local Cache, Credentials -- 📊 **Usage Tracking** - Monitor requests, tokens, and credits -- 📦 **Account Export/Import** - Compatible with Kiro Account Manager format -- 🔄 **Dynamic Model List** - Auto-synced from Kiro API with caching -- 🔔 **Version Update Check** - Automatic new version notification -- 🌐 **i18n** - Chinese / English admin panel +- Anthropic `/v1/messages` & OpenAI `/v1/chat/completions` +- Multi-account pool with round-robin load balancing +- Auto token refresh, SSE streaming, Web admin panel +- Multiple auth: AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, local cache, credentials JSON +- Usage tracking, account import/export, i18n (CN / EN) ## Quick Start @@ -30,19 +25,13 @@ Convert Kiro accounts to OpenAI / Anthropic compatible API service. ```bash git clone https://github.com/Quorinex/Kiro-Go.git cd Kiro-Go - -# Create data directory for persistence mkdir -p data - docker-compose up -d ``` ### Docker Run ```bash -# Create data directory -mkdir -p /path/to/data - docker run -d \ --name kiro-go \ -p 8080:8080 \ @@ -52,8 +41,6 @@ docker run -d \ ghcr.io/quorinex/kiro-go:latest ``` -> 📁 The `/app/data` volume stores `config.json` with accounts and settings. Mount it for data persistence. - ### Build from Source ```bash @@ -63,22 +50,29 @@ go build -o kiro-go . ./kiro-go ``` -## Configuration +Config is auto-created at `data/config.json`. Mount `/app/data` for persistence. The default admin password is `changeme` — override it via the `ADMIN_PASSWORD` env var or change it in the admin panel before going to production. -Config file is auto-created at `data/config.json` on first run: +## Usage -```json -{ - "password": "changeme", - "port": 8080, - "host": "127.0.0.1", - "requireApiKey": false, - "apiKey": "", - "accounts": [] -} +Open `http://localhost:8080/admin`, log in, add accounts, then call the API: + +```bash +# Claude +curl http://localhost:8080/v1/messages \ + -H "Content-Type: application/json" \ + -H "anthropic-version: 2023-06-01" \ + -d '{"model":"claude-sonnet-4.5","max_tokens":1024,"messages":[{"role":"user","content":"Hello!"}]}' + +# OpenAI +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any" \ + -d '{"model":"gpt-4o","messages":[{"role":"user","content":"Hello!"}]}' ``` -> ⚠️ **Change the default password before production use!** +## Thinking Mode + +Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Configure output format in the admin panel under Settings - Thinking Mode. ## Environment Variables @@ -87,168 +81,17 @@ Config file is auto-created at `data/config.json` on first run: | `CONFIG_PATH` | Config file path | `data/config.json` | | `ADMIN_PASSWORD` | Admin panel password (overrides config) | - | -## Usage +## Contributing -### 1. Access Admin Panel +Friendly discussion is welcome. If you run into issues, try asking Claude Code, Codex, or similar tools for help first — most problems can be solved that way. PRs are even better. -Open `http://localhost:8080/admin` and login with your password. +## Friend Links -### 2. Add Accounts - -Multiple methods available: - -| Method | Description | -|--------|-------------| -| **AWS Builder ID** | Login with AWS Builder ID (personal accounts) | -| **IAM Identity Center (Enterprise SSO)** | Login with IAM Identity Center (enterprise accounts) | -| **SSO Token** | Import `x-amz-sso_authn` token from browser | -| **Kiro Local Cache** | Import from local Kiro IDE cache files | -| **Credentials JSON** | Import JSON from Kiro Account Manager | - -#### Credentials Format - -```json -{ - "refreshToken": "eyJ...", - "accessToken": "eyJ...", - "clientId": "xxx", - "clientSecret": "xxx" -} -``` - -### 3. Call API - -#### Claude API - -```bash -curl http://localhost:8080/v1/messages \ - -H "Content-Type: application/json" \ - -H "anthropic-version: 2023-06-01" \ - -d '{ - "model": "claude-sonnet-4-20250514", - "max_tokens": 1024, - "messages": [{"role": "user", "content": "Hello!"}] - }' -``` - -#### OpenAI API - -```bash -curl http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer any" \ - -d '{ - "model": "gpt-4o", - "messages": [{"role": "user", "content": "Hello!"}] - }' -``` - -## Model Mapping - -| Request Model | Actual Model | -|---------------|--------------| -| `claude-sonnet-4-20250514` | claude-sonnet-4-20250514 | -| `claude-sonnet-4.5` | claude-sonnet-4.5 | -| `claude-haiku-4.5` | claude-haiku-4.5 | -| `claude-opus-4.5` | claude-opus-4.5 | -| `claude-opus-4.6` | claude-opus-4.6 | -| `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 | -| `gpt-3.5-turbo` | claude-sonnet-4-20250514 | - -## Thinking Mode - -Enable extended thinking by adding a suffix to the model name (default: `-thinking`). - -### Usage - -```bash -# OpenAI API with thinking -curl http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "claude-sonnet-4.5-thinking", - "messages": [{"role": "user", "content": "Solve this step by step: 15 * 23"}], - "stream": true - }' - -# Claude API with thinking -curl http://localhost:8080/v1/messages \ - -H "Content-Type: application/json" \ - -H "anthropic-version: 2023-06-01" \ - -d '{ - "model": "claude-sonnet-4.5-thinking", - "max_tokens": 4096, - "messages": [{"role": "user", "content": "Analyze this problem"}] - }' -``` - -### Configuration - -Configure thinking mode in the Admin Panel under **Settings > Thinking Mode Settings**: - -| Setting | Description | Options | -|---------|-------------|---------| -| **Trigger Suffix** | Model name suffix to enable thinking | Default: `-thinking` (customizable, e.g., `-think`, `-reason`) | -| **OpenAI Output Format** | How thinking content is returned in OpenAI API | `reasoning_content` (DeepSeek compatible), `` tag, `` tag | -| **Claude Output Format** | How thinking content is returned in Claude API | `` tag (default), `` tag, plain text | - -### Output Formats - -**OpenAI API (`/v1/chat/completions`)**: -- `reasoning_content` - Thinking in separate `reasoning_content` field (DeepSeek compatible) -- `thinking` - Thinking wrapped in `...` tags in content -- `think` - Thinking wrapped in `...` tags in content - -**Claude API (`/v1/messages`)**: -- `thinking` - Thinking wrapped in `...` tags (default) -- `think` - Thinking wrapped in `...` tags -- `reasoning_content` - Plain text output - -## API Endpoints - -| Endpoint | Description | -|----------|-------------| -| `GET /health` | Health check | -| `GET /v1/models` | List models | -| `GET /v1/stats` | Statistics | -| `POST /v1/messages` | Claude Messages API | -| `POST /v1/messages/count_tokens` | Token counting | -| `POST /v1/chat/completions` | OpenAI Chat API | -| `GET /admin` | Admin panel | - -## Project Structure - -``` -Kiro-Go/ -├── main.go # Entry point -├── version.json # Version info for update check -├── config/ # Configuration management -├── pool/ # Account pool & load balancing -├── proxy/ # API handlers & Kiro client -│ ├── handler.go # HTTP routing & admin API -│ ├── kiro.go # Kiro API client -│ ├── kiro_api.go # Kiro REST API (usage, models) -│ └── translator.go # Request/response conversion -├── auth/ # Authentication -│ ├── builderid.go # AWS Builder ID login -│ ├── iam_sso.go # IAM SSO login -│ ├── oidc.go # OIDC token refresh -│ └── sso_token.go # SSO token import -├── web/ # Admin panel frontend -├── Dockerfile -└── docker-compose.yml -``` +- [LINUX DO](https://linux.do) ## Disclaimer -This project is provided for **educational and research purposes only**. - -- This software is not affiliated with, endorsed by, or associated with Amazon, AWS, or Kiro in any way -- Users are solely responsible for ensuring their use complies with all applicable terms of service and laws -- The authors assume no liability for any misuse or violations arising from the use of this software -- Use at your own risk - -By using this software, you acknowledge that you have read and understood this disclaimer. +For educational and research purposes only. Not affiliated with Amazon, AWS, or Kiro. Users are responsible for complying with applicable terms of service and laws. Use at your own risk. ## License diff --git a/README_CN.md b/README_CN.md index 750884b..b6b79d2 100644 --- a/README_CN.md +++ b/README_CN.md @@ -8,20 +8,15 @@ [English](README.md) | 中文 +如果这个项目帮到了你,欢迎点个 Star 支持一下。 + ## 功能特性 -- 🔄 **Anthropic Claude API** - 完整支持 `/v1/messages` 端点 -- 🤖 **OpenAI Chat API** - 兼容 `/v1/chat/completions` -- ⚖️ **多账号池** - 轮询负载均衡 -- 🔐 **自动刷新 Token** - 无缝 Token 管理 -- 📡 **流式响应** - 实时 SSE 输出 -- 🎛️ **Web 管理面板** - 便捷的账号管理 -- 🔑 **多种认证方式** - AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON -- 📊 **用量追踪** - 监控请求数、Token、Credits -- 📦 **账号导入导出** - 兼容 Kiro Account Manager 格式 -- 🔄 **动态模型列表** - 自动从 Kiro API 同步并缓存 -- 🔔 **版本更新检测** - 自动提醒新版本 -- 🌐 **中英双语** - 管理面板支持中文 / 英文 +- Anthropic `/v1/messages` 与 OpenAI `/v1/chat/completions` +- 多账号池轮询负载均衡 +- 自动 Token 刷新、SSE 流式输出、Web 管理面板 +- 多种认证方式:AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON +- 用量追踪、账号导入导出、中英双语 ## 快速开始 @@ -30,19 +25,13 @@ ```bash git clone https://github.com/Quorinex/Kiro-Go.git cd Kiro-Go - -# 创建数据目录用于持久化 mkdir -p data - docker-compose up -d ``` ### Docker 运行 ```bash -# 创建数据目录 -mkdir -p /path/to/data - docker run -d \ --name kiro-go \ -p 8080:8080 \ @@ -52,8 +41,6 @@ docker run -d \ ghcr.io/quorinex/kiro-go:latest ``` -> 📁 `/app/data` 卷存储 `config.json`(包含账号和设置),挂载此目录以实现数据持久化。 - ### 源码编译 ```bash @@ -63,22 +50,29 @@ go build -o kiro-go . ./kiro-go ``` -## 配置 +首次运行会在 `data/config.json` 自动生成配置,挂载 `/app/data` 以持久化。默认管理密码为 `changeme`,生产环境请务必通过 `ADMIN_PASSWORD` 环境变量或在管理面板中修改。 -首次运行会自动创建 `data/config.json`: +## 使用方法 -```json -{ - "password": "changeme", - "port": 8080, - "host": "127.0.0.1", - "requireApiKey": false, - "apiKey": "", - "accounts": [] -} +访问 `http://localhost:8080/admin` 登录、添加账号,然后调用 API: + +```bash +# Claude +curl http://localhost:8080/v1/messages \ + -H "Content-Type: application/json" \ + -H "anthropic-version: 2023-06-01" \ + -d '{"model":"claude-sonnet-4.5","max_tokens":1024,"messages":[{"role":"user","content":"你好!"}]}' + +# OpenAI +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer any" \ + -d '{"model":"gpt-4o","messages":[{"role":"user","content":"你好!"}]}' ``` -> ⚠️ **生产环境请务必修改默认密码!** +## 思考模式 + +在模型名后加后缀(默认 `-thinking`)即可启用,例如 `claude-sonnet-4.5-thinking`。输出格式可在管理面板「设置 - Thinking 模式」中配置。 ## 环境变量 @@ -87,168 +81,17 @@ go build -o kiro-go . | `CONFIG_PATH` | 配置文件路径 | `data/config.json` | | `ADMIN_PASSWORD` | 管理面板密码(覆盖配置文件) | - | -## 使用方法 +## 参与贡献 -### 1. 访问管理面板 +欢迎友好交流。遇到问题时,建议先让 Claude Code、Codex 等工具帮忙排查一下,大部分问题都能自己解决。如果能直接提个 PR 就更好了。 -打开 `http://localhost:8080/admin`,输入密码登录。 +## 友情链接 -### 2. 添加账号 - -支持多种方式: - -| 方式 | 说明 | -|------|------| -| **AWS Builder ID** | 通过 AWS Builder ID 授权登录(个人账号) | -| **IAM Identity Center (企业 SSO) 登录** | 通过 IAM Identity Center (企业 SSO) 授权登录(企业账号) | -| **SSO Token** | 通过浏览器 `x-amz-sso_authn` Token 添加账号 | -| **Kiro 本地缓存** | 通过 Kiro IDE 本地缓存文件添加账号 | -| **凭证 JSON** | 通过 Kiro Account Manager 导出的凭证添加账号 | - -#### 凭证格式 - -```json -{ - "refreshToken": "eyJ...", - "accessToken": "eyJ...", - "clientId": "xxx", - "clientSecret": "xxx" -} -``` - -### 3. 调用 API - -#### Claude API - -```bash -curl http://localhost:8080/v1/messages \ - -H "Content-Type: application/json" \ - -H "anthropic-version: 2023-06-01" \ - -d '{ - "model": "claude-sonnet-4-20250514", - "max_tokens": 1024, - "messages": [{"role": "user", "content": "你好!"}] - }' -``` - -#### OpenAI API - -```bash -curl http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer any" \ - -d '{ - "model": "gpt-4o", - "messages": [{"role": "user", "content": "你好!"}] - }' -``` - -## 模型映射 - -| 请求模型 | 实际模型 | -|---------|---------| -| `claude-sonnet-4-20250514` | claude-sonnet-4-20250514 | -| `claude-sonnet-4.5` | claude-sonnet-4.5 | -| `claude-haiku-4.5` | claude-haiku-4.5 | -| `claude-opus-4.5` | claude-opus-4.5 | -| `claude-opus-4.6` | claude-opus-4.6 | -| `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 | -| `gpt-3.5-turbo` | claude-sonnet-4-20250514 | - -## 思考模式 - -在模型名称后添加后缀(默认:`-thinking`)即可启用扩展思考模式。 - -### 使用方法 - -```bash -# OpenAI API 启用思考 -curl http://localhost:8080/v1/chat/completions \ - -H "Content-Type: application/json" \ - -d '{ - "model": "claude-sonnet-4.5-thinking", - "messages": [{"role": "user", "content": "一步步解决:15 * 23"}], - "stream": true - }' - -# Claude API 启用思考 -curl http://localhost:8080/v1/messages \ - -H "Content-Type: application/json" \ - -H "anthropic-version: 2023-06-01" \ - -d '{ - "model": "claude-sonnet-4.5-thinking", - "max_tokens": 4096, - "messages": [{"role": "user", "content": "分析这个问题"}] - }' -``` - -### 配置 - -在管理面板的 **设置 > Thinking 模式设置** 中配置: - -| 设置 | 说明 | 选项 | -|-----|------|------| -| **触发后缀** | 启用思考的模型名称后缀 | 默认:`-thinking`(可自定义,如 `-think`、`-sikao`) | -| **OpenAI 输出格式** | OpenAI API 中思考内容的返回方式 | `reasoning_content`(DeepSeek 兼容)、`` 标签、`` 标签 | -| **Claude 输出格式** | Claude API 中思考内容的返回方式 | `` 标签(默认)、`` 标签、纯文本 | - -### 输出格式说明 - -**OpenAI API (`/v1/chat/completions`)**: -- `reasoning_content` - 思考内容放在单独的 `reasoning_content` 字段(DeepSeek 兼容) -- `thinking` - 思考内容用 `...` 标签包裹在 content 中 -- `think` - 思考内容用 `...` 标签包裹在 content 中 - -**Claude API (`/v1/messages`)**: -- `thinking` - 思考内容用 `...` 标签包裹(默认) -- `think` - 思考内容用 `...` 标签包裹 -- `reasoning_content` - 纯文本输出 - -## API 端点 - -| 端点 | 说明 | -|-----|------| -| `GET /health` | 健康检查 | -| `GET /v1/models` | 模型列表 | -| `GET /v1/stats` | 统计数据 | -| `POST /v1/messages` | Claude Messages API | -| `POST /v1/messages/count_tokens` | Token 计数 | -| `POST /v1/chat/completions` | OpenAI Chat API | -| `GET /admin` | 管理面板 | - -## 项目结构 - -``` -Kiro-Go/ -├── main.go # 入口 -├── version.json # 版本信息(用于更新检测) -├── config/ # 配置管理 -├── pool/ # 账号池 & 负载均衡 -├── proxy/ # API 处理 & Kiro 客户端 -│ ├── handler.go # HTTP 路由 & 管理 API -│ ├── kiro.go # Kiro API 客户端 -│ ├── kiro_api.go # Kiro REST API(用量、模型) -│ └── translator.go # 请求/响应转换 -├── auth/ # 认证 -│ ├── builderid.go # AWS Builder ID 登录 -│ ├── iam_sso.go # IAM SSO 登录 -│ ├── oidc.go # OIDC Token 刷新 -│ └── sso_token.go # SSO Token 导入 -├── web/ # 管理面板前端 -├── Dockerfile -└── docker-compose.yml -``` +- [LINUX DO](https://linux.do) ## 免责声明 -本项目仅供**学习和研究目的**使用。 - -- 本软件与 Amazon、AWS 或 Kiro 没有任何关联、认可或合作关系 -- 用户需自行确保其使用行为符合所有适用的服务条款和法律法规 -- 作者不对因使用本软件而产生的任何滥用或违规行为承担责任 -- 使用风险自负 - -使用本软件即表示您已阅读并理解本免责声明。 +本项目仅供学习和研究目的使用,与 Amazon、AWS 或 Kiro 没有任何关联。用户需自行确保使用行为符合所有适用的服务条款和法律法规,使用风险自负。 ## 许可证 From 496b14df3fbf4110d8837821041b0c8b4a191913 Mon Sep 17 00:00:00 2001 From: Quorinex Date: Mon, 11 May 2026 15:05:20 +0800 Subject: [PATCH 5/5] fix: improve prompt cache tracking --- LICENSE | 21 ++++++ proxy/cache_tracker.go | 141 +++++++++++++++++++++++++++++++----- proxy/cache_tracker_test.go | 107 ++++++++++++++++++++++++++- 3 files changed, 251 insertions(+), 18 deletions(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..1bf685b --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Quorinex + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/proxy/cache_tracker.go b/proxy/cache_tracker.go index 338f208..582754b 100644 --- a/proxy/cache_tracker.go +++ b/proxy/cache_tracker.go @@ -13,6 +13,13 @@ import ( const defaultPromptCacheTTL = 5 * time.Minute +// Anthropic requires cached prefixes to reach a minimum token count before +// caching takes effect. Breakpoints below this threshold are excluded from +// matching and storage to avoid reporting unrealistic 100% cache hits on +// short requests. +const defaultMinCacheableTokens = 1024 +const opusMinCacheableTokens = 4096 + type promptCacheUsage struct { CacheCreationInputTokens int CacheReadInputTokens int @@ -29,6 +36,15 @@ type promptCacheBreakpoint struct { type promptCacheProfile struct { Breakpoints []promptCacheBreakpoint TotalInputTokens int + Model string +} + +func minCacheableTokensForModel(model string) int { + lower := strings.ToLower(model) + if strings.Contains(lower, "opus") { + return opusMinCacheableTokens + } + return defaultMinCacheableTokens } type promptCacheEntry struct { @@ -61,13 +77,27 @@ func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTo hasher := sha256.New() breakpoints := make([]promptCacheBreakpoint, 0) cumulativeTokens := 0 + var activeTTL time.Duration for _, block := range blocks { canonical := canonicalizeCacheValue(block.Value) writeHashChunk(hasher, canonical) cumulativeTokens += block.Tokens - if block.TTL <= 0 { + // Determine whether this block acts as a cache breakpoint: + // 1) Explicit cache_control on the block itself. + // 2) Once any explicit breakpoint has been seen, every message-end + // boundary becomes an implicit breakpoint so that multi-turn + // conversations can hit earlier stored prefixes. + breakpointTTL := time.Duration(0) + if block.TTL > 0 { + breakpointTTL = block.TTL + activeTTL = block.TTL + } else if block.IsMessageEnd && activeTTL > 0 { + breakpointTTL = activeTTL + } + + if breakpointTTL <= 0 { continue } @@ -76,7 +106,7 @@ func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTo breakpoints = append(breakpoints, promptCacheBreakpoint{ Fingerprint: fingerprint, CumulativeTokens: cumulativeTokens, - TTL: block.TTL, + TTL: breakpointTTL, }) } @@ -91,6 +121,7 @@ func (t *promptCacheTracker) BuildClaudeProfile(req *ClaudeRequest, totalInputTo return &promptCacheProfile{ Breakpoints: breakpoints, TotalInputTokens: totalInputTokens, + Model: req.Model, } } @@ -99,6 +130,7 @@ func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfi return promptCacheUsage{} } + minTokens := minCacheableTokensForModel(profile.Model) last := profile.Breakpoints[len(profile.Breakpoints)-1] lastTokens := minInt(last.CumulativeTokens, profile.TotalInputTokens) now := time.Now() @@ -109,18 +141,35 @@ func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfi entries := t.entriesByAccount[accountID] if len(entries) == 0 { + // First request for this account: report creation only if above threshold. + effectiveCreation := lastTokens + if effectiveCreation < minTokens { + effectiveCreation = 0 + } cache5m, cache1h := computePromptCacheTTLBreakdown(profile, 0) return promptCacheUsage{ - CacheCreationInputTokens: lastTokens, + CacheCreationInputTokens: effectiveCreation, CacheReadInputTokens: 0, CacheCreation5mInputTokens: cache5m, CacheCreation1hInputTokens: cache1h, } } + // Cap cacheable tokens at 85% of total input to ensure a realistic + // uncached portion. The newest content in a request is never fully + // served from cache on the current turn. + maxCacheable := int(float64(profile.TotalInputTokens) * 0.85) + if lastTokens > maxCacheable { + lastTokens = maxCacheable + } + matchedTokens := 0 for i := len(profile.Breakpoints) - 1; i >= 0; i-- { breakpoint := profile.Breakpoints[i] + // Skip breakpoints below the minimum cacheable token threshold. + if breakpoint.CumulativeTokens < minTokens { + continue + } entry, ok := entries[breakpoint.Fingerprint] if !ok || entry.ExpiresAt.Before(now) { continue @@ -128,6 +177,9 @@ func (t *promptCacheTracker) Compute(accountID string, profile *promptCacheProfi entry.ExpiresAt = now.Add(entry.TTL) entries[breakpoint.Fingerprint] = entry matchedTokens = minInt(breakpoint.CumulativeTokens, profile.TotalInputTokens) + if matchedTokens > lastTokens { + matchedTokens = lastTokens + } break } @@ -146,6 +198,7 @@ func (t *promptCacheTracker) Update(accountID string, profile *promptCacheProfil return } + minTokens := minCacheableTokensForModel(profile.Model) now := time.Now() t.mu.Lock() defer t.mu.Unlock() @@ -158,6 +211,10 @@ func (t *promptCacheTracker) Update(accountID string, profile *promptCacheProfil } for _, breakpoint := range profile.Breakpoints { + // Skip breakpoints below the minimum cacheable token threshold. + if breakpoint.CumulativeTokens < minTokens { + continue + } entries[breakpoint.Fingerprint] = promptCacheEntry{ ExpiresAt: now.Add(breakpoint.TTL), TTL: breakpoint.TTL, @@ -179,9 +236,10 @@ func (t *promptCacheTracker) pruneExpiredLocked(now time.Time) { } type cacheablePromptBlock struct { - Value interface{} - Tokens int - TTL time.Duration + Value interface{} + Tokens int + TTL time.Duration + IsMessageEnd bool } func flattenClaudeCacheBlocks(req *ClaudeRequest) []cacheablePromptBlock { @@ -234,14 +292,14 @@ func appendSystemCacheBlocks(blocks *[]cacheablePromptBlock, system interface{}) "type": "text", "text": v, }, - }) + }, false) case []interface{}: for i, block := range v { appendPromptBlock(blocks, map[string]interface{}{ "kind": "system", "system_index": i, "block": block, - }) + }, false) } case []string: for i, block := range v { @@ -252,7 +310,7 @@ func appendSystemCacheBlocks(blocks *[]cacheablePromptBlock, system interface{}) "type": "text", "text": block, }, - }) + }, false) } } } @@ -270,8 +328,9 @@ func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int, "type": "text", "text": content, }, - }) + }, true) case []interface{}: + lastIdx := len(content) - 1 for blockIndex, block := range content { appendPromptBlock(blocks, map[string]interface{}{ "kind": "message", @@ -279,7 +338,7 @@ func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int, "role": role, "block_index": blockIndex, "block": block, - }) + }, blockIndex == lastIdx) } default: if content != nil { @@ -289,22 +348,70 @@ func appendMessageCacheBlocks(blocks *[]cacheablePromptBlock, messageIndex int, "role": role, "block_index": 0, "block": content, - }) + }, true) } } } -func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interface{}) { - blockValue, _ := wrapper["block"] +func appendPromptBlock(blocks *[]cacheablePromptBlock, wrapper map[string]interface{}, isMessageEnd bool) { + blockValue := wrapper["block"] ttl := normalizePromptCacheTTL(extractPromptCacheTTL(blockValue)) + + // Normalize volatile text (e.g. Claude Code's x-anthropic-billing-header + // which drifts on every request) so that fingerprints remain stable across + // requests within the same conversation. + if normalized, changed := normalizeCacheBlockContent(blockValue); changed { + cloned := make(map[string]interface{}, len(wrapper)) + for k, v := range wrapper { + cloned[k] = v + } + cloned["block"] = normalized + wrapper = cloned + } + canonical := canonicalizeCacheValue(wrapper) *blocks = append(*blocks, cacheablePromptBlock{ - Value: wrapper, - Tokens: estimateApproxTokens(canonical), - TTL: ttl, + Value: wrapper, + Tokens: estimateApproxTokens(canonical), + TTL: ttl, + IsMessageEnd: isMessageEnd, }) } +// normalizeCacheBlockContent replaces volatile but semantically irrelevant +// fields with a placeholder so that the cumulative fingerprint stays stable +// across requests in the same session. Currently handles: +// - Claude Code's "x-anthropic-billing-header: ..." system text block +// whose content drifts on every request (version, telemetry hash, etc.) +func normalizeCacheBlockContent(value interface{}) (interface{}, bool) { + blockMap, ok := value.(map[string]interface{}) + if !ok { + return value, false + } + + // Only normalize text blocks (or blocks without an explicit type but containing text). + if t, ok := blockMap["type"].(string); ok && t != "" && t != "text" { + return value, false + } + + text, ok := blockMap["text"].(string) + if !ok { + return value, false + } + + trimmed := strings.TrimLeft(text, " \t\r\n") + if !strings.HasPrefix(strings.ToLower(trimmed), "x-anthropic-billing-header:") { + return value, false + } + + cloned := make(map[string]interface{}, len(blockMap)) + for k, v := range blockMap { + cloned[k] = v + } + cloned["text"] = "__anthropic_billing_header__" + return cloned, true +} + func extractPromptCacheTTL(value interface{}) time.Duration { block, ok := value.(map[string]interface{}) if !ok { diff --git a/proxy/cache_tracker_test.go b/proxy/cache_tracker_test.go index 1beba02..aa620c8 100644 --- a/proxy/cache_tracker_test.go +++ b/proxy/cache_tracker_test.go @@ -1,18 +1,20 @@ package proxy import ( + "strings" "testing" "time" ) func TestPromptCacheTrackerComputeAndUpdate(t *testing.T) { tracker := newPromptCacheTracker(time.Hour) + longSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80) req := &ClaudeRequest{ Model: "claude-sonnet-4.5", System: []interface{}{ map[string]interface{}{ "type": "text", - "text": "system prompt", + "text": longSystem, "cache_control": map[string]interface{}{ "type": "ephemeral", }, @@ -71,3 +73,106 @@ func TestBuildClaudeUsageMapIncludesCacheFields(t *testing.T) { t.Fatalf("unexpected ttl breakdown: %#v", creation) } } + +// TestPromptCacheStableAcrossBillingHeaderDrift verifies that Claude Code's +// per-request "x-anthropic-billing-header: cc_version=...; cch=...;" system +// block (whose content drifts on every request) does not break cache hits. +// The normalization logic should ensure the same conversation still matches. +func TestPromptCacheStableAcrossBillingHeaderDrift(t *testing.T) { + tracker := newPromptCacheTracker(time.Hour) + mainSystem := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80) + + build := func(billingHdr string) *ClaudeRequest { + return &ClaudeRequest{ + Model: "claude-sonnet-4.5", + System: []interface{}{ + map[string]interface{}{ + "type": "text", + "text": billingHdr, + }, + map[string]interface{}{ + "type": "text", + "text": mainSystem, + "cache_control": map[string]interface{}{ + "type": "ephemeral", + }, + }, + }, + Messages: []ClaudeMessage{{Role: "user", Content: "hello world"}}, + } + } + + req1 := build("x-anthropic-billing-header: cc_version=2.1.87.1; cch=aaaa;") + profile1 := tracker.BuildClaudeProfile(req1, 2048) + if profile1 == nil { + t.Fatalf("profile1 should be built") + } + first := tracker.Compute("acct-1", profile1) + if first.CacheReadInputTokens != 0 { + t.Fatalf("expected no cache read on first request, got %+v", first) + } + tracker.Update("acct-1", profile1) + + req2 := build("x-anthropic-billing-header: cc_version=2.1.87.42; cch=bbbb; padding=xxyyzz;") + profile2 := tracker.BuildClaudeProfile(req2, 2048) + if profile2 == nil { + t.Fatalf("profile2 should be built") + } + second := tracker.Compute("acct-1", profile2) + if second.CacheReadInputTokens == 0 { + t.Fatalf("expected cache read after billing header drift, got %+v", second) + } +} + +// TestPromptCacheImplicitBreakpointAtMessageEnd verifies that once any +// explicit cache_control breakpoint has been seen, subsequent message-end +// boundaries act as implicit breakpoints. This allows multi-turn conversations +// to hit earlier stored prefix fingerprints even when the newest messages +// lack explicit cache_control. +func TestPromptCacheImplicitBreakpointAtMessageEnd(t *testing.T) { + tracker := newPromptCacheTracker(time.Hour) + systemText := strings.Repeat("You are a helpful coding assistant with deep knowledge of Go, Rust, Python, and TypeScript. ", 80) + + baseSystem := []interface{}{ + map[string]interface{}{ + "type": "text", + "text": systemText, + "cache_control": map[string]interface{}{ + "type": "ephemeral", + }, + }, + } + + // Round 1: single user message. + req1 := &ClaudeRequest{ + Model: "claude-sonnet-4.5", + System: baseSystem, + Messages: []ClaudeMessage{{Role: "user", Content: "question one"}}, + } + profile1 := tracker.BuildClaudeProfile(req1, 2048) + if profile1 == nil { + t.Fatalf("profile1 should be built") + } + tracker.Update("acct-1", profile1) + + // Round 2: conversation continues with new messages. The latest user + // message has no explicit cache_control; it should still hit the stored + // prefix via the implicit message-end breakpoint. + req2 := &ClaudeRequest{ + Model: "claude-sonnet-4.5", + System: baseSystem, + Messages: []ClaudeMessage{ + {Role: "user", Content: "question one"}, + {Role: "assistant", Content: "answer one"}, + {Role: "user", Content: "follow-up question"}, + }, + } + profile2 := tracker.BuildClaudeProfile(req2, 4096) + if profile2 == nil { + t.Fatalf("profile2 should be built") + } + result := tracker.Compute("acct-1", profile2) + if result.CacheReadInputTokens == 0 { + t.Fatalf("expected cache read via implicit message-end breakpoint, got %+v", result) + } +}