From 01e9d0577c2e022bfb4deb7006884c2f22fa7f84 Mon Sep 17 00:00:00 2001 From: Quorinex Date: Wed, 4 Feb 2026 17:42:30 +0800 Subject: [PATCH] feat: add thinking mode support with configurable output formats --- README.md | 49 +++++ README_CN.md | 49 +++++ auth/iam_sso.go | 2 +- config/config.go | 191 +++++++++++------ main.go | 15 +- proxy/handler.go | 494 ++++++++++++++++++++++++++++++++++++++++---- proxy/kiro.go | 11 +- proxy/translator.go | 131 +++++++++++- web/index.html | 48 +++++ 9 files changed, 877 insertions(+), 113 deletions(-) diff --git a/README.md b/README.md index fd53b48..5e7b0e9 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,55 @@ curl http://localhost:8080/v1/chat/completions \ | `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 | | `gpt-3.5-turbo` | claude-sonnet-4-20250514 | +## Thinking Mode + +Enable extended thinking by adding a suffix to the model name (default: `-thinking`). + +### Usage + +```bash +# OpenAI API with thinking +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-sonnet-4.5-thinking", + "messages": [{"role": "user", "content": "Solve this step by step: 15 * 23"}], + "stream": true + }' + +# Claude API with thinking +curl http://localhost:8080/v1/messages \ + -H "Content-Type: application/json" \ + -H "anthropic-version: 2023-06-01" \ + -d '{ + "model": "claude-sonnet-4.5-thinking", + "max_tokens": 4096, + "messages": [{"role": "user", "content": "Analyze this problem"}] + }' +``` + +### Configuration + +Configure thinking mode in the Admin Panel under **Settings > Thinking Mode Settings**: + +| Setting | Description | Options | +|---------|-------------|---------| +| **Trigger Suffix** | Model name suffix to enable thinking | Default: `-thinking` (customizable, e.g., `-think`, `-reason`) | +| **OpenAI Output Format** | How thinking content is returned in OpenAI API | `reasoning_content` (DeepSeek compatible), `` tag, `` tag | +| **Claude Output Format** | How thinking content is returned in Claude API | `` tag (default), `` tag, plain text | + +### Output Formats + +**OpenAI API (`/v1/chat/completions`)**: +- `reasoning_content` - Thinking in separate `reasoning_content` field (DeepSeek compatible) +- `thinking` - Thinking wrapped in `...` tags in content +- `think` - Thinking wrapped in `...` tags in content + +**Claude API (`/v1/messages`)**: +- `thinking` - Thinking wrapped in `...` tags (default) +- `think` - Thinking wrapped in `...` tags +- `reasoning_content` - Plain text output + ## API Endpoints | Endpoint | Description | diff --git a/README_CN.md b/README_CN.md index 2826304..a4ea2e8 100644 --- a/README_CN.md +++ b/README_CN.md @@ -150,6 +150,55 @@ curl http://localhost:8080/v1/chat/completions \ | `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 | | `gpt-3.5-turbo` | claude-sonnet-4-20250514 | +## 思考模式 + +在模型名称后添加后缀(默认:`-thinking`)即可启用扩展思考模式。 + +### 使用方法 + +```bash +# OpenAI API 启用思考 +curl http://localhost:8080/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "claude-sonnet-4.5-thinking", + "messages": [{"role": "user", "content": "一步步解决:15 * 23"}], + "stream": true + }' + +# Claude API 启用思考 +curl http://localhost:8080/v1/messages \ + -H "Content-Type: application/json" \ + -H "anthropic-version: 2023-06-01" \ + -d '{ + "model": "claude-sonnet-4.5-thinking", + "max_tokens": 4096, + "messages": [{"role": "user", "content": "分析这个问题"}] + }' +``` + +### 配置 + +在管理面板的 **设置 > Thinking 模式设置** 中配置: + +| 设置 | 说明 | 选项 | +|-----|------|------| +| **触发后缀** | 启用思考的模型名称后缀 | 默认:`-thinking`(可自定义,如 `-think`、`-sikao`) | +| **OpenAI 输出格式** | OpenAI API 中思考内容的返回方式 | `reasoning_content`(DeepSeek 兼容)、`` 标签、`` 标签 | +| **Claude 输出格式** | Claude API 中思考内容的返回方式 | `` 标签(默认)、`` 标签、纯文本 | + +### 输出格式说明 + +**OpenAI API (`/v1/chat/completions`)**: +- `reasoning_content` - 思考内容放在单独的 `reasoning_content` 字段(DeepSeek 兼容) +- `thinking` - 思考内容用 `...` 标签包裹在 content 中 +- `think` - 思考内容用 `...` 标签包裹在 content 中 + +**Claude API (`/v1/messages`)**: +- `thinking` - 思考内容用 `...` 标签包裹(默认) +- `think` - 思考内容用 `...` 标签包裹 +- `reasoning_content` - 纯文本输出 + ## API 端点 | 端点 | 说明 | diff --git a/auth/iam_sso.go b/auth/iam_sso.go index cc8ea06..3ed6bb2 100644 --- a/auth/iam_sso.go +++ b/auth/iam_sso.go @@ -158,7 +158,7 @@ func CompleteIamSsoLogin(sessionID, callbackUrl string) (accessToken, refreshTok func registerOIDCClient(oidcBase, startUrl, redirectUri string) (clientID, clientSecret string, err error) { payload := map[string]interface{}{ - "clientName": "Kiro API Proxy", + "clientName": "Kiro", "clientType": "public", "scopes": scopes, "grantTypes": []string{"authorization_code", "refresh_token"}, diff --git a/config/config.go b/config/config.go index 581ed34..c55cfb9 100644 --- a/config/config.go +++ b/config/config.go @@ -1,5 +1,13 @@ -// Package config 配置管理模块 -// 负责账号、设置、统计数据的持久化存储 +// Package config provides configuration management for Kiro API Proxy. +// +// This package handles persistent storage and retrieval of: +// - Account credentials and authentication tokens +// - Server settings (port, host, API keys) +// - Usage statistics and metrics +// - Thinking mode configuration for AI responses +// +// All configuration is stored in a JSON file with thread-safe access +// via read-write mutex protection. package config import ( @@ -10,7 +18,9 @@ import ( "sync" ) -// GenerateMachineId 生成 UUID v4 格式的机器码 +// GenerateMachineId generates a UUID v4 format machine identifier. +// This ID is used to uniquely identify the proxy instance in Kiro API requests, +// helping with request tracking and rate limiting on the server side. func GenerateMachineId() string { bytes := make([]byte, 16) rand.Read(bytes) @@ -20,67 +30,74 @@ func GenerateMachineId() string { bytes[0:4], bytes[4:6], bytes[6:8], bytes[8:10], bytes[10:16]) } -// Account 账号信息 +// Account represents a Kiro API account with authentication credentials and usage statistics. type Account struct { - // 基本信息 - ID string `json:"id"` - Email string `json:"email,omitempty"` - UserId string `json:"userId,omitempty"` - Nickname string `json:"nickname,omitempty"` - - // 认证信息 - AccessToken string `json:"accessToken"` - RefreshToken string `json:"refreshToken"` - ClientID string `json:"clientId,omitempty"` - ClientSecret string `json:"clientSecret,omitempty"` - AuthMethod string `json:"authMethod"` // idc | social - Provider string `json:"provider,omitempty"` - Region string `json:"region"` - StartUrl string `json:"startUrl,omitempty"` - ExpiresAt int64 `json:"expiresAt,omitempty"` - MachineId string `json:"machineId,omitempty"` // UUID 格式机器码 - - // 状态 - Enabled bool `json:"enabled"` - - // 订阅信息 - SubscriptionType string `json:"subscriptionType,omitempty"` // FREE | PRO | PRO_PLUS | POWER - SubscriptionTitle string `json:"subscriptionTitle,omitempty"` - DaysRemaining int `json:"daysRemaining,omitempty"` - - // 使用量 - UsageCurrent float64 `json:"usageCurrent,omitempty"` - UsageLimit float64 `json:"usageLimit,omitempty"` - UsagePercent float64 `json:"usagePercent,omitempty"` - NextResetDate string `json:"nextResetDate,omitempty"` - LastRefresh int64 `json:"lastRefresh,omitempty"` - - // 运行时统计 - RequestCount int `json:"requestCount,omitempty"` - ErrorCount int `json:"errorCount,omitempty"` - LastUsed int64 `json:"lastUsed,omitempty"` - TotalTokens int `json:"totalTokens,omitempty"` - TotalCredits float64 `json:"totalCredits,omitempty"` + // Basic identification + ID string `json:"id"` // Unique account identifier (UUID) + Email string `json:"email,omitempty"` // User email address + UserId string `json:"userId,omitempty"` // Kiro user ID + Nickname string `json:"nickname,omitempty"` // Display name for admin panel + + // Authentication credentials + AccessToken string `json:"accessToken"` // OAuth access token for API calls + RefreshToken string `json:"refreshToken"` // OAuth refresh token for token renewal + ClientID string `json:"clientId,omitempty"` // OIDC client ID (for IdC auth) + ClientSecret string `json:"clientSecret,omitempty"` // OIDC client secret (for IdC auth) + AuthMethod string `json:"authMethod"` // Authentication method: "idc" (AWS IdC) or "social" (GitHub/Google) + Provider string `json:"provider,omitempty"` // Identity provider name (e.g., "BuilderId", "GitHub") + Region string `json:"region"` // AWS region for OIDC endpoints + StartUrl string `json:"startUrl,omitempty"` // AWS SSO start URL + ExpiresAt int64 `json:"expiresAt,omitempty"` // Token expiration timestamp (Unix seconds) + MachineId string `json:"machineId,omitempty"` // UUID machine identifier for request tracking + + // Account status + Enabled bool `json:"enabled"` // Whether account is active in the pool + + // Subscription information + SubscriptionType string `json:"subscriptionType,omitempty"` // Tier: FREE, PRO, PRO_PLUS, or POWER + SubscriptionTitle string `json:"subscriptionTitle,omitempty"` // Human-readable subscription name + DaysRemaining int `json:"daysRemaining,omitempty"` // Days until subscription expires + + // Usage tracking + UsageCurrent float64 `json:"usageCurrent,omitempty"` // Current period usage (credits) + UsageLimit float64 `json:"usageLimit,omitempty"` // Maximum allowed usage per period + UsagePercent float64 `json:"usagePercent,omitempty"` // Usage percentage (0.0-1.0) + NextResetDate string `json:"nextResetDate,omitempty"` // Date when usage resets (YYYY-MM-DD) + LastRefresh int64 `json:"lastRefresh,omitempty"` // Last info refresh timestamp + + // Runtime statistics (updated during operation) + RequestCount int `json:"requestCount,omitempty"` // Total requests processed + ErrorCount int `json:"errorCount,omitempty"` // Total errors encountered + LastUsed int64 `json:"lastUsed,omitempty"` // Last request timestamp + TotalTokens int `json:"totalTokens,omitempty"` // Cumulative tokens processed + TotalCredits float64 `json:"totalCredits,omitempty"` // Cumulative credits consumed } -// Config 全局配置 +// Config represents the global application configuration. type Config struct { - Password string `json:"password"` - Port int `json:"port"` - Host string `json:"host"` - ApiKey string `json:"apiKey,omitempty"` - RequireApiKey bool `json:"requireApiKey"` - Accounts []Account `json:"accounts"` - - // 全局统计 - TotalRequests int `json:"totalRequests,omitempty"` - SuccessRequests int `json:"successRequests,omitempty"` - FailedRequests int `json:"failedRequests,omitempty"` - TotalTokens int `json:"totalTokens,omitempty"` - TotalCredits float64 `json:"totalCredits,omitempty"` + // Server settings + Password string `json:"password"` // Admin panel password + Port int `json:"port"` // HTTP server port (default: 8080) + Host string `json:"host"` // HTTP server bind address (default: 0.0.0.0) + ApiKey string `json:"apiKey,omitempty"` // API key for client authentication + RequireApiKey bool `json:"requireApiKey"` // Whether to enforce API key validation + Accounts []Account `json:"accounts"` // Registered Kiro accounts + + // Thinking mode configuration for extended reasoning output + ThinkingSuffix string `json:"thinkingSuffix,omitempty"` // Model suffix to trigger thinking mode (default: "-thinking") + OpenAIThinkingFormat string `json:"openaiThinkingFormat,omitempty"` // OpenAI output format: "reasoning_content", "thinking", or "think" + ClaudeThinkingFormat string `json:"claudeThinkingFormat,omitempty"` // Claude output format: "reasoning_content", "thinking", or "think" + + // Global statistics (persisted across restarts) + TotalRequests int `json:"totalRequests,omitempty"` // Total API requests received + SuccessRequests int `json:"successRequests,omitempty"` // Successful requests count + FailedRequests int `json:"failedRequests,omitempty"` // Failed requests count + TotalTokens int `json:"totalTokens,omitempty"` // Total tokens processed + TotalCredits float64 `json:"totalCredits,omitempty"` // Total credits consumed } -// AccountInfo 账户信息更新结构 +// AccountInfo contains account metadata retrieved from Kiro API. +// Used for updating subscription and usage information. type AccountInfo struct { Email string UserId string @@ -100,7 +117,8 @@ var ( cfgPath string ) -// Init 初始化配置 +// Init initializes the configuration system with the specified file path. +// If the file doesn't exist, a default configuration is created. func Init(path string) error { cfgPath = path return Load() @@ -113,7 +131,8 @@ func Load() error { data, err := os.ReadFile(cfgPath) if err != nil { if os.IsNotExist(err) { - // 创建默认配置,Docker 环境默认监听 0.0.0.0 + // Create default configuration. + // Binds to 0.0.0.0 by default for Docker/container compatibility. cfg = &Config{ Password: "changeme", Port: 8080, @@ -134,7 +153,8 @@ func Load() error { return nil } -// Save 保存配置到文件 +// Save persists the current configuration to the JSON file. +// Uses indented formatting for human readability. func Save() error { data, err := json.MarshalIndent(cfg, "", " ") if err != nil { @@ -143,7 +163,8 @@ func Save() error { return os.WriteFile(cfgPath, data, 0600) } -// SetPassword 设置密码(用于环境变量覆盖) +// SetPassword updates the admin password. +// Primarily used for environment variable override in containerized deployments. func SetPassword(password string) { cfgLock.Lock() defer cfgLock.Unlock() @@ -303,7 +324,8 @@ func UpdateAccountStats(id string, requestCount, errorCount, totalTokens int, to return nil } -// UpdateAccountInfo 更新账户的订阅和使用量信息 +// UpdateAccountInfo updates an account's subscription and usage information. +// Called after refreshing account data from Kiro API. func UpdateAccountInfo(id string, info AccountInfo) error { cfgLock.Lock() defer cfgLock.Unlock() @@ -328,3 +350,46 @@ func UpdateAccountInfo(id string, info AccountInfo) error { } return nil } + +// ThinkingConfig holds settings for AI thinking/reasoning mode. +// When enabled, models output their reasoning process alongside the response. +type ThinkingConfig struct { + Suffix string `json:"suffix"` // Model name suffix that triggers thinking mode + OpenAIFormat string `json:"openaiFormat"` // Output format for OpenAI-compatible responses + ClaudeFormat string `json:"claudeFormat"` // Output format for Claude-compatible responses +} + +// GetThinkingConfig 获取 thinking 配置 +func GetThinkingConfig() ThinkingConfig { + cfgLock.RLock() + defer cfgLock.RUnlock() + + suffix := cfg.ThinkingSuffix + if suffix == "" { + suffix = "-thinking" + } + openaiFormat := cfg.OpenAIThinkingFormat + if openaiFormat == "" { + openaiFormat = "reasoning_content" + } + claudeFormat := cfg.ClaudeThinkingFormat + if claudeFormat == "" { + claudeFormat = "thinking" + } + + return ThinkingConfig{ + Suffix: suffix, + OpenAIFormat: openaiFormat, + ClaudeFormat: claudeFormat, + } +} + +// UpdateThinkingConfig 更新 thinking 配置 +func UpdateThinkingConfig(suffix, openaiFormat, claudeFormat string) error { + cfgLock.Lock() + defer cfgLock.Unlock() + cfg.ThinkingSuffix = suffix + cfg.OpenAIThinkingFormat = openaiFormat + cfg.ClaudeThinkingFormat = claudeFormat + return Save() +} diff --git a/main.go b/main.go index 12b8357..68d33f5 100644 --- a/main.go +++ b/main.go @@ -1,5 +1,16 @@ -// Kiro API Proxy - 将 Kiro API 转换为 OpenAI/Anthropic 兼容格式 -// 支持多账号池、自动 Token 刷新、流式响应 +// Package main provides the entry point for Kiro API Proxy. +// +// Kiro API Proxy is a reverse proxy service that translates Kiro API requests +// into OpenAI and Anthropic (Claude) compatible formats. Key features include: +// - Multi-account pool with round-robin load balancing +// - Automatic OAuth token refresh +// - Streaming response support for real-time AI interactions +// - Admin panel for account and configuration management +// +// The service exposes the following endpoints: +// - /v1/messages - Claude API compatible endpoint +// - /v1/chat/completions - OpenAI API compatible endpoint +// - /admin - Web-based administration panel package main import ( diff --git a/proxy/handler.go b/proxy/handler.go index f2f4432..494a580 100644 --- a/proxy/handler.go +++ b/proxy/handler.go @@ -206,9 +206,13 @@ func (h *Handler) handleHealth(w http.ResponseWriter, r *http.Request) { func (h *Handler) handleModels(w http.ResponseWriter, r *http.Request) { models := []map[string]interface{}{ {"id": "claude-sonnet-4.5", "object": "model", "owned_by": "anthropic"}, + {"id": "claude-sonnet-4.5-thinking", "object": "model", "owned_by": "anthropic"}, {"id": "claude-sonnet-4", "object": "model", "owned_by": "anthropic"}, + {"id": "claude-sonnet-4-thinking", "object": "model", "owned_by": "anthropic"}, {"id": "claude-haiku-4.5", "object": "model", "owned_by": "anthropic"}, + {"id": "claude-haiku-4.5-thinking", "object": "model", "owned_by": "anthropic"}, {"id": "claude-opus-4.5", "object": "model", "owned_by": "anthropic"}, + {"id": "claude-opus-4.5-thinking", "object": "model", "owned_by": "anthropic"}, {"id": "auto", "object": "model", "owned_by": "kiro-api"}, {"id": "gpt-4o", "object": "model", "owned_by": "kiro-proxy"}, {"id": "gpt-4", "object": "model", "owned_by": "kiro-proxy"}, @@ -318,8 +322,13 @@ func (h *Handler) handleClaudeMessages(w http.ResponseWriter, r *http.Request) { return } + // 解析模型和 thinking 模式 + thinkingCfg := config.GetThinkingConfig() + actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix) + req.Model = actualModel + // 转换请求 - kiroPayload := ClaudeToKiro(&req) + kiroPayload := ClaudeToKiro(&req, thinking) // 流式或非流式 if req.Stream { @@ -341,6 +350,9 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco return } + // 获取 thinking 输出格式配置 + thinkingFormat := config.GetThinkingConfig().ClaudeFormat + msgID := "msg_" + uuid.New().String() var contentStarted bool var toolUseIndex int @@ -348,6 +360,157 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco var credits float64 var toolUses []KiroToolUse + // Thinking 标签解析状态 + var textBuffer string + var inThinkingBlock bool + + // 发送文本的辅助函数 + // thinkingState: 0=普通内容, 1=thinking开始, 2=thinking中间, 3=thinking结束 + sendText := func(text string, thinkingState int) { + // 确保 content_block 已开始 + if !contentStarted { + h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ + "type": "content_block_start", + "index": 0, + "content_block": map[string]string{"type": "text", "text": ""}, + }) + contentStarted = true + } + + if thinkingState == 0 { + // 普通内容 + if text == "" { + return + } + h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ + "type": "content_block_delta", + "index": 0, + "delta": map[string]string{"type": "text_delta", "text": text}, + }) + } else { + // thinking 内容 + var outputText string + switch thinkingFormat { + case "think": + switch thinkingState { + case 1: + outputText = "" + text + case 2: + outputText = text + case 3: + outputText = text + "" + } + case "reasoning_content": + // Claude 格式不支持 reasoning_content,直接输出内容 + outputText = text + default: // "thinking" + switch thinkingState { + case 1: + outputText = "" + text + case 2: + outputText = text + case 3: + outputText = text + "" + } + } + if outputText == "" { + return + } + h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ + "type": "content_block_delta", + "index": 0, + "delta": map[string]string{"type": "text_delta", "text": outputText}, + }) + } + } + + // 处理文本,解析 标签 + var thinkingStarted bool + + processClaudeText := func(text string, isThinking bool, forceFlush bool) { + // 如果是 reasoningContentEvent,直接输出 + if isThinking { + if !thinkingStarted { + sendText(text, 1) + thinkingStarted = true + } else { + sendText(text, 2) + } + return + } + + textBuffer += text + + for { + if !inThinkingBlock { + thinkingStart := strings.Index(textBuffer, "") + if thinkingStart != -1 { + if thinkingStart > 0 { + sendText(textBuffer[:thinkingStart], 0) + } + textBuffer = textBuffer[thinkingStart+10:] + inThinkingBlock = true + thinkingStarted = false + } else if forceFlush || len([]rune(textBuffer)) > 50 { + // 使用 rune 切片来正确处理 Unicode 字符 + runes := []rune(textBuffer) + safeLen := len(runes) + if !forceFlush { + safeLen = max(0, len(runes)-15) + } + if safeLen > 0 { + sendText(string(runes[:safeLen]), 0) + textBuffer = string(runes[safeLen:]) + } + break + } else { + break + } + } else { + thinkingEnd := strings.Index(textBuffer, "") + if thinkingEnd != -1 { + content := textBuffer[:thinkingEnd] + if !thinkingStarted { + sendText(content, 1) + sendText("", 3) + } else { + sendText(content, 3) + } + textBuffer = textBuffer[thinkingEnd+11:] + inThinkingBlock = false + thinkingStarted = false + } else if forceFlush { + if textBuffer != "" { + if !thinkingStarted { + sendText(textBuffer, 1) + sendText("", 3) + } else { + sendText(textBuffer, 3) + } + textBuffer = "" + } + break + } else { + // 流式输出 thinking 块内的内容 + runes := []rune(textBuffer) + if len(runes) > 20 { + safeLen := len(runes) - 15 + if safeLen > 0 { + if !thinkingStarted { + sendText(string(runes[:safeLen]), 1) + thinkingStarted = true + } else { + sendText(string(runes[:safeLen]), 2) + } + textBuffer = string(runes[safeLen:]) + } + } + break + } + } + } + } + // 发送 message_start h.sendSSE(w, flusher, "message_start", map[string]interface{}{ "type": "message_start", @@ -365,27 +528,12 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco if text == "" { return } - // 确保 content_block 已开始 - if !contentStarted { - h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ - "type": "content_block_start", - "index": 0, - "content_block": map[string]string{"type": "text", "text": ""}, - }) - contentStarted = true - } - // 直接转发文本,不缓冲 - outputText := text - if isThinking { - outputText = "" + text + "" - } - h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ - "type": "content_block_delta", - "index": 0, - "delta": map[string]string{"type": "text_delta", "text": outputText}, - }) + processClaudeText(text, isThinking, false) }, OnToolUse: func(tu KiroToolUse) { + // 先刷新缓冲区 + processClaudeText("", false, true) + toolUses = append(toolUses, tu) // 关闭文本块 @@ -451,6 +599,9 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco return } + // 刷新剩余缓冲区 + processClaudeText("", false, true) + h.recordSuccess(inputTokens, outputTokens, credits) h.pool.RecordSuccess(account.ID) h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits) @@ -510,6 +661,7 @@ func (h *Handler) recordFailure() { // handleClaudeNonStream Claude 非流式响应 func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string) { var content string + var thinkingContent string var toolUses []KiroToolUse var inputTokens, outputTokens int var credits float64 @@ -517,7 +669,7 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A callback := &KiroStreamCallback{ OnText: func(text string, isThinking bool) { if isThinking { - content += "" + text + "" + thinkingContent += text } else { content += text } @@ -549,7 +701,21 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A h.pool.RecordSuccess(account.ID) h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits) - resp := KiroToClaudeResponse(content, toolUses, inputTokens, outputTokens, model) + // 合并 thinking 内容(如果有 reasoningContentEvent 的内容) + thinkingFormat := config.GetThinkingConfig().ClaudeFormat + finalContent := content + if thinkingContent != "" { + switch thinkingFormat { + case "think": + finalContent = "" + thinkingContent + "" + content + case "reasoning_content": + finalContent = thinkingContent + content // Claude 格式不支持 reasoning_content,直接拼接 + default: // "thinking" + finalContent = "" + thinkingContent + "" + content + } + } + + resp := KiroToClaudeResponse(finalContent, toolUses, inputTokens, outputTokens, model) w.Header().Set("Content-Type", "application/json; charset=utf-8") json.NewEncoder(w).Encode(resp) } @@ -596,7 +762,12 @@ func (h *Handler) handleOpenAIChat(w http.ResponseWriter, r *http.Request) { return } - kiroPayload := OpenAIToKiro(&req) + // 解析模型和 thinking 模式 + thinkingCfg := config.GetThinkingConfig() + actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix) + req.Model = actualModel + + kiroPayload := OpenAIToKiro(&req, thinking) if req.Stream { h.handleOpenAIStream(w, account, kiroPayload, req.Model) @@ -617,38 +788,224 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco return } + // 获取 thinking 输出格式配置 + thinkingFormat := config.GetThinkingConfig().OpenAIFormat + chatID := "chatcmpl-" + uuid.New().String() var toolCalls []ToolCall var toolCallIndex int var inputTokens, outputTokens int var credits float64 - callback := &KiroStreamCallback{ - OnText: func(text string, isThinking bool) { - if text == "" { + // Thinking 标签解析状态 + var textBuffer string + var inThinkingBlock bool + + // 发送 chunk 的辅助函数 + // thinkingState: 0=普通内容, 1=thinking开始, 2=thinking中间, 3=thinking结束 + sendChunk := func(content string, thinkingState int) { + if content == "" && thinkingState == 2 { + return + } + + var chunk map[string]interface{} + + if thinkingState > 0 { + // thinking 内容 + switch thinkingFormat { + case "thinking": + // 流式输出标签 + var text string + switch thinkingState { + case 1: // 开始 + text = "" + content + case 2: // 中间 + text = content + case 3: // 结束 + text = content + "" + } + if text == "" { + return + } + chunk = map[string]interface{}{ + "id": chatID, + "object": "chat.completion.chunk", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{{ + "index": 0, + "delta": map[string]string{"content": text}, + "finish_reason": nil, + }}, + } + case "think": + var text string + switch thinkingState { + case 1: + text = "" + content + case 2: + text = content + case 3: + text = content + "" + } + if text == "" { + return + } + chunk = map[string]interface{}{ + "id": chatID, + "object": "chat.completion.chunk", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{{ + "index": 0, + "delta": map[string]string{"content": text}, + "finish_reason": nil, + }}, + } + default: // "reasoning_content" + if content == "" { + return + } + chunk = map[string]interface{}{ + "id": chatID, + "object": "chat.completion.chunk", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{{ + "index": 0, + "delta": map[string]string{"reasoning_content": content}, + "finish_reason": nil, + }}, + } + } + } else { + // 普通内容 + if content == "" { return } - // 直接转发,不缓冲 - deltaKey := "content" - if isThinking { - deltaKey = "reasoning_content" - } - chunk := map[string]interface{}{ + chunk = map[string]interface{}{ "id": chatID, "object": "chat.completion.chunk", "created": time.Now().Unix(), "model": model, "choices": []map[string]interface{}{{ "index": 0, - "delta": map[string]string{deltaKey: text}, + "delta": map[string]string{"content": content}, "finish_reason": nil, }}, } - data, _ := json.Marshal(chunk) - fmt.Fprintf(w, "data: %s\n\n", string(data)) - flusher.Flush() + } + data, _ := json.Marshal(chunk) + fmt.Fprintf(w, "data: %s\n\n", string(data)) + flusher.Flush() + } + + // 处理文本,解析 标签 + // thinkingStarted 用于跟踪是否已发送开始标签 + var thinkingStarted bool + + processText := func(text string, isThinking bool, forceFlush bool) { + // 如果是 reasoningContentEvent,直接输出 + if isThinking { + if !thinkingStarted { + sendChunk(text, 1) // 开始 + thinkingStarted = true + } else { + sendChunk(text, 2) // 中间 + } + return + } + + textBuffer += text + + for { + if !inThinkingBlock { + // 查找 开始标签 + thinkingStart := strings.Index(textBuffer, "") + if thinkingStart != -1 { + // 输出 thinking 标签之前的内容 + if thinkingStart > 0 { + sendChunk(textBuffer[:thinkingStart], 0) + } + textBuffer = textBuffer[thinkingStart+10:] // 移除 + inThinkingBlock = true + thinkingStarted = false // 重置,准备发送新的开始标签 + } else if forceFlush || len([]rune(textBuffer)) > 50 { + // 没有找到标签,安全输出(保留可能的部分标签) + runes := []rune(textBuffer) + safeLen := len(runes) + if !forceFlush { + safeLen = max(0, len(runes)-15) + } + if safeLen > 0 { + sendChunk(string(runes[:safeLen]), 0) + textBuffer = string(runes[safeLen:]) + } + break + } else { + break + } + } else { + // 在 thinking 块内,查找 结束标签 + thinkingEnd := strings.Index(textBuffer, "") + if thinkingEnd != -1 { + // 输出 thinking 内容 + content := textBuffer[:thinkingEnd] + if !thinkingStarted { + // 一次性输出完整内容(开始+内容+结束) + sendChunk(content, 1) // 开始 + sendChunk("", 3) // 结束(空内容,只发结束标签) + } else { + // 已经开始了,发送剩余内容和结束 + sendChunk(content, 3) // 结束 + } + textBuffer = textBuffer[thinkingEnd+11:] // 移除 + inThinkingBlock = false + thinkingStarted = false + } else if forceFlush { + // 强制刷新:输出剩余内容 + if textBuffer != "" { + if !thinkingStarted { + sendChunk(textBuffer, 1) // 开始 + sendChunk("", 3) // 结束 + } else { + sendChunk(textBuffer, 3) // 结束 + } + textBuffer = "" + } + break + } else { + // 流式输出 thinking 块内的内容 + runes := []rune(textBuffer) + if len(runes) > 20 { + safeLen := len(runes) - 15 // 保留可能的 部分 + if safeLen > 0 { + if !thinkingStarted { + sendChunk(string(runes[:safeLen]), 1) // 开始 + thinkingStarted = true + } else { + sendChunk(string(runes[:safeLen]), 2) // 中间 + } + textBuffer = string(runes[safeLen:]) + } + } + break + } + } + } + } + + callback := &KiroStreamCallback{ + OnText: func(text string, isThinking bool) { + if text == "" { + return + } + processText(text, isThinking, false) }, OnToolUse: func(tu KiroToolUse) { + // 先刷新缓冲区 + processText("", false, true) + args, _ := json.Marshal(tu.Input) tc := ToolCall{ID: tu.ToolUseID, Type: "function"} tc.Function.Name = tu.Name @@ -700,6 +1057,9 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco return } + // 刷新剩余缓冲区 + processText("", false, true) + h.recordSuccess(inputTokens, outputTokens, credits) h.pool.RecordSuccess(account.ID) h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits) @@ -730,6 +1090,7 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco // handleOpenAINonStream OpenAI 非流式响应 func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string) { var content string + var reasoningContent string var toolUses []KiroToolUse var inputTokens, outputTokens int var credits float64 @@ -737,8 +1098,7 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A callback := &KiroStreamCallback{ OnText: func(text string, isThinking bool) { if isThinking { - // 非流式模式下,thinking 内容可以作为单独字段或忽略 - // 这里暂时忽略 + reasoningContent += text } else { content += text } @@ -761,7 +1121,14 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A h.pool.RecordSuccess(account.ID) h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits) - resp := KiroToOpenAIResponse(content, toolUses, inputTokens, outputTokens, model) + // 解析 content 中的 标签 + finalContent, extractedReasoning := extractThinkingFromContent(content) + if extractedReasoning != "" { + reasoningContent = extractedReasoning + reasoningContent + } + + thinkingFormat := config.GetThinkingConfig().OpenAIFormat + resp := KiroToOpenAIResponseWithReasoning(finalContent, reasoningContent, toolUses, inputTokens, outputTokens, model, thinkingFormat) w.Header().Set("Content-Type", "application/json; charset=utf-8") json.NewEncoder(w).Encode(resp) } @@ -862,6 +1229,10 @@ func (h *Handler) handleAdminAPI(w http.ResponseWriter, r *http.Request) { h.apiResetStats(w, r) case path == "/generate-machine-id" && r.Method == "GET": h.apiGenerateMachineId(w, r) + case path == "/thinking" && r.Method == "GET": + h.apiGetThinkingConfig(w, r) + case path == "/thinking" && r.Method == "POST": + h.apiUpdateThinkingConfig(w, r) default: w.WriteHeader(404) json.NewEncoder(w).Encode(map[string]string{"error": "Not Found"}) @@ -1509,3 +1880,48 @@ func (h *Handler) serveStaticFile(w http.ResponseWriter, r *http.Request) { path := strings.TrimPrefix(r.URL.Path, "/admin/") http.ServeFile(w, r, "web/"+path) } + +// apiGetThinkingConfig 获取 thinking 配置 +func (h *Handler) apiGetThinkingConfig(w http.ResponseWriter, r *http.Request) { + cfg := config.GetThinkingConfig() + json.NewEncoder(w).Encode(map[string]interface{}{ + "suffix": cfg.Suffix, + "openaiFormat": cfg.OpenAIFormat, + "claudeFormat": cfg.ClaudeFormat, + }) +} + +// apiUpdateThinkingConfig 更新 thinking 配置 +func (h *Handler) apiUpdateThinkingConfig(w http.ResponseWriter, r *http.Request) { + var req struct { + Suffix string `json:"suffix"` + OpenAIFormat string `json:"openaiFormat"` + ClaudeFormat string `json:"claudeFormat"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + w.WriteHeader(400) + json.NewEncoder(w).Encode(map[string]string{"error": "Invalid JSON"}) + return + } + + // 验证格式 + validFormats := map[string]bool{"reasoning_content": true, "thinking": true, "think": true} + if req.OpenAIFormat != "" && !validFormats[req.OpenAIFormat] { + w.WriteHeader(400) + json.NewEncoder(w).Encode(map[string]string{"error": "Invalid openaiFormat, must be: reasoning_content, thinking, or think"}) + return + } + if req.ClaudeFormat != "" && !validFormats[req.ClaudeFormat] { + w.WriteHeader(400) + json.NewEncoder(w).Encode(map[string]string{"error": "Invalid claudeFormat, must be: reasoning_content, thinking, or think"}) + return + } + + if err := config.UpdateThinkingConfig(req.Suffix, req.OpenAIFormat, req.ClaudeFormat); err != nil { + w.WriteHeader(500) + json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + return + } + + json.NewEncoder(w).Encode(map[string]bool{"success": true}) +} diff --git a/proxy/kiro.go b/proxy/kiro.go index 71c0f92..9807f87 100644 --- a/proxy/kiro.go +++ b/proxy/kiro.go @@ -120,6 +120,9 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt return err } + // 预估输入 token(约 3 字符 = 1 token) + estimatedInputTokens := max(1, len(body)/3) + req, err := http.NewRequest("POST", KiroEndpoint, bytes.NewReader(body)) if err != nil { return err @@ -160,13 +163,13 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body)) } - return parseEventStream(resp.Body, callback) + return parseEventStream(resp.Body, callback, estimatedInputTokens) } // ==================== Event Stream 解析 ==================== // parseEventStream 解析 AWS Event Stream 二进制格式 -func parseEventStream(body io.Reader, callback *KiroStreamCallback) error { +func parseEventStream(body io.Reader, callback *KiroStreamCallback, estimatedInputTokens int) error { // 不使用 bufio,直接读取避免缓冲延迟 var inputTokens, outputTokens int var totalOutputChars int @@ -249,6 +252,10 @@ func parseEventStream(body io.Reader, callback *KiroStreamCallback) error { if outputTokens == 0 && totalOutputChars > 0 { outputTokens = max(1, totalOutputChars/3) } + // 如果 Kiro 没返回 inputTokens,使用预估值 + if inputTokens == 0 { + inputTokens = estimatedInputTokens + } if callback.OnCredits != nil && totalCredits > 0 { callback.OnCredits(totalCredits) diff --git a/proxy/translator.go b/proxy/translator.go index 7849601..4ace590 100644 --- a/proxy/translator.go +++ b/proxy/translator.go @@ -30,18 +30,41 @@ var modelMap = map[string]string{ "gpt-3.5-turbo": "claude-sonnet-4.5", } -func MapModel(model string) string { +// Thinking 模式提示 +const ThinkingModePrompt = `enabled +200000` + +// ParseModelAndThinking 解析模型名称,返回实际模型和是否启用 thinking +func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) { lower := strings.ToLower(model) + thinking := false + + // 使用配置的后缀检查 + suffixLower := strings.ToLower(thinkingSuffix) + if strings.HasSuffix(lower, suffixLower) { + thinking = true + model = model[:len(model)-len(thinkingSuffix)] + lower = strings.ToLower(model) + } + + // 映射模型 for k, v := range modelMap { if strings.Contains(lower, k) { - return v + return v, thinking } } + // 如果已经是有效的 Kiro 模型,直接返回 if strings.HasPrefix(lower, "claude-") { - return model + return model, thinking } - return "claude-sonnet-4.5" + + return "claude-sonnet-4.5", thinking +} + +func MapModel(model string) string { + mapped, _ := ParseModelAndThinking(model, "-thinking") + return mapped } // ==================== Claude API 类型 ==================== @@ -106,13 +129,18 @@ type ClaudeUsage struct { const maxToolDescLen = 10237 -func ClaudeToKiro(req *ClaudeRequest) *KiroPayload { +func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload { modelID := MapModel(req.Model) origin := "AI_EDITOR" // 提取系统提示 systemPrompt := extractSystemPrompt(req.System) + // 如果启用 thinking 模式,注入 thinking 提示 + if thinking { + systemPrompt = ThinkingModePrompt + "\n\n" + systemPrompt + } + // 注入时间戳 timestamp := time.Now().Format(time.RFC3339) systemPrompt = "[Context: Current time is " + timestamp + "]\n\n" + systemPrompt @@ -507,7 +535,7 @@ type OpenAIUsage struct { // ==================== OpenAI -> Kiro 转换 ==================== -func OpenAIToKiro(req *OpenAIRequest) *KiroPayload { +func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload { modelID := MapModel(req.Model) origin := "AI_EDITOR" @@ -525,6 +553,11 @@ func OpenAIToKiro(req *OpenAIRequest) *KiroPayload { } } + // 如果启用 thinking 模式,注入 thinking 提示 + if thinking { + systemPrompt = ThinkingModePrompt + "\n\n" + systemPrompt + } + // 注入时间戳 timestamp := time.Now().Format(time.RFC3339) systemPrompt = "[Context: Current time is " + timestamp + "]\n\n" + systemPrompt @@ -809,3 +842,89 @@ func KiroToOpenAIResponse(content string, toolUses []KiroToolUse, inputTokens, o }, } } + +// extractThinkingFromContent 从内容中提取 标签内的内容 +func extractThinkingFromContent(content string) (string, string) { + var reasoning string + result := content + + for { + start := strings.Index(result, "") + if start == -1 { + break + } + end := strings.Index(result[start:], "") + if end == -1 { + break + } + end += start + + // 提取 thinking 内容 + thinkingContent := result[start+10 : end] + reasoning += thinkingContent + + // 从结果中移除 thinking 标签 + result = result[:start] + result[end+11:] + } + + return strings.TrimSpace(result), reasoning +} + +// KiroToOpenAIResponseWithReasoning 带 reasoning_content 的 OpenAI 响应 +func KiroToOpenAIResponseWithReasoning(content, reasoningContent string, toolUses []KiroToolUse, inputTokens, outputTokens int, model, thinkingFormat string) map[string]interface{} { + finishReason := "stop" + + message := map[string]interface{}{ + "role": "assistant", + } + + if len(toolUses) > 0 { + message["content"] = nil + toolCalls := make([]map[string]interface{}, len(toolUses)) + for i, tu := range toolUses { + args, _ := json.Marshal(tu.Input) + toolCalls[i] = map[string]interface{}{ + "id": tu.ToolUseID, + "type": "function", + "function": map[string]string{ + "name": tu.Name, + "arguments": string(args), + }, + } + } + message["tool_calls"] = toolCalls + finishReason = "tool_calls" + } else { + // 根据配置格式化 thinking 输出 + if reasoningContent != "" { + switch thinkingFormat { + case "thinking": + message["content"] = "" + reasoningContent + "" + content + case "think": + message["content"] = "" + reasoningContent + "" + content + default: // "reasoning_content" + message["content"] = content + message["reasoning_content"] = reasoningContent + } + } else { + message["content"] = content + } + } + + return map[string]interface{}{ + "id": "chatcmpl-" + uuid.New().String(), + "object": "chat.completion", + "created": time.Now().Unix(), + "model": model, + "choices": []map[string]interface{}{{ + "index": 0, + "message": message, + "finish_reason": finishReason, + }}, + "usage": map[string]int{ + "prompt_tokens": inputTokens, + "completion_tokens": outputTokens, + "total_tokens": inputTokens + outputTokens, + }, + } +} diff --git a/web/index.html b/web/index.html index c058575..89fe5c2 100644 --- a/web/index.html +++ b/web/index.html @@ -191,6 +191,31 @@ +
+
Thinking 模式设置
+
+ + + 模型名称加此后缀即启用思考模式,如 claude-sonnet-4.5-thinking +
+
+ + +
+
+ + +
+ +
管理密码
@@ -477,6 +502,29 @@ const d = await res.json(); document.getElementById('requireApiKey').checked = d.requireApiKey; document.getElementById('apiKeyInput').value = d.apiKey || ''; + // 加载 thinking 配置 + loadThinkingConfig(); + } + + async function loadThinkingConfig() { + const res = await fetch('/admin/api/thinking', { headers: { 'X-Admin-Password': password } }); + const d = await res.json(); + document.getElementById('thinkingSuffix').value = d.suffix || '-thinking'; + document.getElementById('openaiThinkingFormat').value = d.openaiFormat || 'reasoning_content'; + document.getElementById('claudeThinkingFormat').value = d.claudeFormat || 'thinking'; + } + + async function saveThinkingConfig() { + const res = await fetch('/admin/api/thinking', { + method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Admin-Password': password }, + body: JSON.stringify({ + suffix: document.getElementById('thinkingSuffix').value || '-thinking', + openaiFormat: document.getElementById('openaiThinkingFormat').value, + claudeFormat: document.getElementById('claudeThinkingFormat').value + }) + }); + const d = await res.json(); + if (d.success) { alert('Thinking 设置已保存'); } else { alert('保存失败: ' + d.error); } } async function saveSettings() {