From 01e9d0577c2e022bfb4deb7006884c2f22fa7f84 Mon Sep 17 00:00:00 2001
From: Quorinex <quorinex@users.noreply.github.com>
Date: Wed, 4 Feb 2026 17:42:30 +0800
Subject: [PATCH] feat: add thinking mode support with configurable output
 formats

---
 README.md           |  49 +++++
 README_CN.md        |  49 +++++
 auth/iam_sso.go     |   2 +-
 config/config.go    | 191 +++++++++++------
 main.go             |  15 +-
 proxy/handler.go    | 494 ++++++++++++++++++++++++++++++++++++++++----
 proxy/kiro.go       |  11 +-
 proxy/translator.go | 131 +++++++++++-
 web/index.html      |  48 +++++
 9 files changed, 877 insertions(+), 113 deletions(-)
diff --git a/README.md b/README.md
index fd53b48..5e7b0e9 100644
--- a/README.md
+++ b/README.md
@@ -150,6 +150,55 @@ curl http://localhost:8080/v1/chat/completions \
 | `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 |
 | `gpt-3.5-turbo` | claude-sonnet-4-20250514 |
 
+## Thinking Mode
+
+Enable extended thinking by adding a suffix to the model name (default: `-thinking`).
+
+### Usage
+
+```bash
+# OpenAI API with thinking
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.5-thinking",
+    "messages": [{"role": "user", "content": "Solve this step by step: 15 * 23"}],
+    "stream": true
+  }'
+
+# Claude API with thinking
+curl http://localhost:8080/v1/messages \
+  -H "Content-Type: application/json" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{
+    "model": "claude-sonnet-4.5-thinking",
+    "max_tokens": 4096,
+    "messages": [{"role": "user", "content": "Analyze this problem"}]
+  }'
+```
+
+### Configuration
+
+Configure thinking mode in the Admin Panel under **Settings > Thinking Mode Settings**:
+
+| Setting | Description | Options |
+|---------|-------------|---------|
+| **Trigger Suffix** | Model name suffix to enable thinking | Default: `-thinking` (customizable, e.g., `-think`, `-reason`) |
+| **OpenAI Output Format** | How thinking content is returned in OpenAI API | `reasoning_content` (DeepSeek compatible), `<thinking>` tag, `<think>` tag |
+| **Claude Output Format** | How thinking content is returned in Claude API | `<thinking>` tag (default), `<think>` tag, plain text |
+
+### Output Formats
+
+**OpenAI API (`/v1/chat/completions`)**:
+- `reasoning_content` - Thinking in separate `reasoning_content` field (DeepSeek compatible)
+- `thinking` - Thinking wrapped in `<thinking>...</thinking>` tags in content
+- `think` - Thinking wrapped in `<think>...</think>` tags in content
+
+**Claude API (`/v1/messages`)**:
+- `thinking` - Thinking wrapped in `<thinking>...</thinking>` tags (default)
+- `think` - Thinking wrapped in `<think>...</think>` tags
+- `reasoning_content` - Plain text output
+
 ## API Endpoints
 
 | Endpoint | Description |
diff --git a/README_CN.md b/README_CN.md
index 2826304..a4ea2e8 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -150,6 +150,55 @@ curl http://localhost:8080/v1/chat/completions \
 | `gpt-4o`, `gpt-4` | claude-sonnet-4-20250514 |
 | `gpt-3.5-turbo` | claude-sonnet-4-20250514 |
 
+## 思考模式
+
+在模型名称后添加后缀（默认：`-thinking`）即可启用扩展思考模式。
+
+### 使用方法
+
+```bash
+# OpenAI API 启用思考
+curl http://localhost:8080/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "claude-sonnet-4.5-thinking",
+    "messages": [{"role": "user", "content": "一步步解决：15 * 23"}],
+    "stream": true
+  }'
+
+# Claude API 启用思考
+curl http://localhost:8080/v1/messages \
+  -H "Content-Type: application/json" \
+  -H "anthropic-version: 2023-06-01" \
+  -d '{
+    "model": "claude-sonnet-4.5-thinking",
+    "max_tokens": 4096,
+    "messages": [{"role": "user", "content": "分析这个问题"}]
+  }'
+```
+
+### 配置
+
+在管理面板的 **设置 > Thinking 模式设置** 中配置：
+
+| 设置 | 说明 | 选项 |
+|-----|------|------|
+| **触发后缀** | 启用思考的模型名称后缀 | 默认：`-thinking`（可自定义，如 `-think`、`-sikao`） |
+| **OpenAI 输出格式** | OpenAI API 中思考内容的返回方式 | `reasoning_content`（DeepSeek 兼容）、`<thinking>` 标签、`<think>` 标签 |
+| **Claude 输出格式** | Claude API 中思考内容的返回方式 | `<thinking>` 标签（默认）、`<think>` 标签、纯文本 |
+
+### 输出格式说明
+
+**OpenAI API (`/v1/chat/completions`)**：
+- `reasoning_content` - 思考内容放在单独的 `reasoning_content` 字段（DeepSeek 兼容）
+- `thinking` - 思考内容用 `<thinking>...</thinking>` 标签包裹在 content 中
+- `think` - 思考内容用 `<think>...</think>` 标签包裹在 content 中
+
+**Claude API (`/v1/messages`)**：
+- `thinking` - 思考内容用 `<thinking>...</thinking>` 标签包裹（默认）
+- `think` - 思考内容用 `<think>...</think>` 标签包裹
+- `reasoning_content` - 纯文本输出
+
 ## API 端点
 
 | 端点 | 说明 |
diff --git a/auth/iam_sso.go b/auth/iam_sso.go
index cc8ea06..3ed6bb2 100644
--- a/auth/iam_sso.go
+++ b/auth/iam_sso.go
@@ -158,7 +158,7 @@ func CompleteIamSsoLogin(sessionID, callbackUrl string) (accessToken, refreshTok
 
 func registerOIDCClient(oidcBase, startUrl, redirectUri string) (clientID, clientSecret string, err error) {
 	payload := map[string]interface{}{
-		"clientName":   "Kiro API Proxy",
+		"clientName":   "Kiro",
 		"clientType":   "public",
 		"scopes":       scopes,
 		"grantTypes":   []string{"authorization_code", "refresh_token"},
diff --git a/config/config.go b/config/config.go
index 581ed34..c55cfb9 100644
--- a/config/config.go
+++ b/config/config.go
@@ -1,5 +1,13 @@
-// Package config 配置管理模块
-// 负责账号、设置、统计数据的持久化存储
+// Package config provides configuration management for Kiro API Proxy.
+//
+// This package handles persistent storage and retrieval of:
+//   - Account credentials and authentication tokens
+//   - Server settings (port, host, API keys)
+//   - Usage statistics and metrics
+//   - Thinking mode configuration for AI responses
+//
+// All configuration is stored in a JSON file with thread-safe access
+// via read-write mutex protection.
 package config
 
 import (
@@ -10,7 +18,9 @@ import (
 	"sync"
 )
 
-// GenerateMachineId 生成 UUID v4 格式的机器码
+// GenerateMachineId generates a UUID v4 format machine identifier.
+// This ID is used to uniquely identify the proxy instance in Kiro API requests,
+// helping with request tracking and rate limiting on the server side.
 func GenerateMachineId() string {
 	bytes := make([]byte, 16)
 	rand.Read(bytes)
@@ -20,67 +30,74 @@ func GenerateMachineId() string {
 		bytes[0:4], bytes[4:6], bytes[6:8], bytes[8:10], bytes[10:16])
 }
 
-// Account 账号信息
+// Account represents a Kiro API account with authentication credentials and usage statistics.
 type Account struct {
-	// 基本信息
-	ID           string `json:"id"`
-	Email        string `json:"email,omitempty"`
-	UserId       string `json:"userId,omitempty"`
-	Nickname     string `json:"nickname,omitempty"`
-	
-	// 认证信息
-	AccessToken  string `json:"accessToken"`
-	RefreshToken string `json:"refreshToken"`
-	ClientID     string `json:"clientId,omitempty"`
-	ClientSecret string `json:"clientSecret,omitempty"`
-	AuthMethod   string `json:"authMethod"`          // idc | social
-	Provider     string `json:"provider,omitempty"`
-	Region       string `json:"region"`
-	StartUrl     string `json:"startUrl,omitempty"`
-	ExpiresAt    int64  `json:"expiresAt,omitempty"`
-	MachineId    string `json:"machineId,omitempty"` // UUID 格式机器码
-	
-	// 状态
-	Enabled bool `json:"enabled"`
-	
-	// 订阅信息
-	SubscriptionType  string `json:"subscriptionType,omitempty"`  // FREE | PRO | PRO_PLUS | POWER
-	SubscriptionTitle string `json:"subscriptionTitle,omitempty"`
-	DaysRemaining     int    `json:"daysRemaining,omitempty"`
-	
-	// 使用量
-	UsageCurrent  float64 `json:"usageCurrent,omitempty"`
-	UsageLimit    float64 `json:"usageLimit,omitempty"`
-	UsagePercent  float64 `json:"usagePercent,omitempty"`
-	NextResetDate string  `json:"nextResetDate,omitempty"`
-	LastRefresh   int64   `json:"lastRefresh,omitempty"`
-	
-	// 运行时统计
-	RequestCount int     `json:"requestCount,omitempty"`
-	ErrorCount   int     `json:"errorCount,omitempty"`
-	LastUsed     int64   `json:"lastUsed,omitempty"`
-	TotalTokens  int     `json:"totalTokens,omitempty"`
-	TotalCredits float64 `json:"totalCredits,omitempty"`
+	// Basic identification
+	ID       string `json:"id"`                 // Unique account identifier (UUID)
+	Email    string `json:"email,omitempty"`    // User email address
+	UserId   string `json:"userId,omitempty"`   // Kiro user ID
+	Nickname string `json:"nickname,omitempty"` // Display name for admin panel
+
+	// Authentication credentials
+	AccessToken  string `json:"accessToken"`            // OAuth access token for API calls
+	RefreshToken string `json:"refreshToken"`           // OAuth refresh token for token renewal
+	ClientID     string `json:"clientId,omitempty"`     // OIDC client ID (for IdC auth)
+	ClientSecret string `json:"clientSecret,omitempty"` // OIDC client secret (for IdC auth)
+	AuthMethod   string `json:"authMethod"`             // Authentication method: "idc" (AWS IdC) or "social" (GitHub/Google)
+	Provider     string `json:"provider,omitempty"`     // Identity provider name (e.g., "BuilderId", "GitHub")
+	Region       string `json:"region"`                 // AWS region for OIDC endpoints
+	StartUrl     string `json:"startUrl,omitempty"`     // AWS SSO start URL
+	ExpiresAt    int64  `json:"expiresAt,omitempty"`    // Token expiration timestamp (Unix seconds)
+	MachineId    string `json:"machineId,omitempty"`    // UUID machine identifier for request tracking
+
+	// Account status
+	Enabled bool `json:"enabled"` // Whether account is active in the pool
+
+	// Subscription information
+	SubscriptionType  string `json:"subscriptionType,omitempty"`  // Tier: FREE, PRO, PRO_PLUS, or POWER
+	SubscriptionTitle string `json:"subscriptionTitle,omitempty"` // Human-readable subscription name
+	DaysRemaining     int    `json:"daysRemaining,omitempty"`     // Days until subscription expires
+
+	// Usage tracking
+	UsageCurrent  float64 `json:"usageCurrent,omitempty"`  // Current period usage (credits)
+	UsageLimit    float64 `json:"usageLimit,omitempty"`    // Maximum allowed usage per period
+	UsagePercent  float64 `json:"usagePercent,omitempty"`  // Usage percentage (0.0-1.0)
+	NextResetDate string  `json:"nextResetDate,omitempty"` // Date when usage resets (YYYY-MM-DD)
+	LastRefresh   int64   `json:"lastRefresh,omitempty"`   // Last info refresh timestamp
+
+	// Runtime statistics (updated during operation)
+	RequestCount int     `json:"requestCount,omitempty"` // Total requests processed
+	ErrorCount   int     `json:"errorCount,omitempty"`   // Total errors encountered
+	LastUsed     int64   `json:"lastUsed,omitempty"`     // Last request timestamp
+	TotalTokens  int     `json:"totalTokens,omitempty"`  // Cumulative tokens processed
+	TotalCredits float64 `json:"totalCredits,omitempty"` // Cumulative credits consumed
 }
 
-// Config 全局配置
+// Config represents the global application configuration.
 type Config struct {
-	Password      string    `json:"password"`
-	Port          int       `json:"port"`
-	Host          string    `json:"host"`
-	ApiKey        string    `json:"apiKey,omitempty"`
-	RequireApiKey bool      `json:"requireApiKey"`
-	Accounts      []Account `json:"accounts"`
-	
-	// 全局统计
-	TotalRequests   int     `json:"totalRequests,omitempty"`
-	SuccessRequests int     `json:"successRequests,omitempty"`
-	FailedRequests  int     `json:"failedRequests,omitempty"`
-	TotalTokens     int     `json:"totalTokens,omitempty"`
-	TotalCredits    float64 `json:"totalCredits,omitempty"`
+	// Server settings
+	Password      string    `json:"password"`      // Admin panel password
+	Port          int       `json:"port"`          // HTTP server port (default: 8080)
+	Host          string    `json:"host"`          // HTTP server bind address (default: 0.0.0.0)
+	ApiKey        string    `json:"apiKey,omitempty"`        // API key for client authentication
+	RequireApiKey bool      `json:"requireApiKey"` // Whether to enforce API key validation
+	Accounts      []Account `json:"accounts"`      // Registered Kiro accounts
+
+	// Thinking mode configuration for extended reasoning output
+	ThinkingSuffix       string `json:"thinkingSuffix,omitempty"`       // Model suffix to trigger thinking mode (default: "-thinking")
+	OpenAIThinkingFormat string `json:"openaiThinkingFormat,omitempty"` // OpenAI output format: "reasoning_content", "thinking", or "think"
+	ClaudeThinkingFormat string `json:"claudeThinkingFormat,omitempty"` // Claude output format: "reasoning_content", "thinking", or "think"
+
+	// Global statistics (persisted across restarts)
+	TotalRequests   int     `json:"totalRequests,omitempty"`   // Total API requests received
+	SuccessRequests int     `json:"successRequests,omitempty"` // Successful requests count
+	FailedRequests  int     `json:"failedRequests,omitempty"`  // Failed requests count
+	TotalTokens     int     `json:"totalTokens,omitempty"`     // Total tokens processed
+	TotalCredits    float64 `json:"totalCredits,omitempty"`    // Total credits consumed
 }
 
-// AccountInfo 账户信息更新结构
+// AccountInfo contains account metadata retrieved from Kiro API.
+// Used for updating subscription and usage information.
 type AccountInfo struct {
 	Email             string
 	UserId            string
@@ -100,7 +117,8 @@ var (
 	cfgPath string
 )
 
-// Init 初始化配置
+// Init initializes the configuration system with the specified file path.
+// If the file doesn't exist, a default configuration is created.
 func Init(path string) error {
 	cfgPath = path
 	return Load()
@@ -113,7 +131,8 @@ func Load() error {
 	data, err := os.ReadFile(cfgPath)
 	if err != nil {
 		if os.IsNotExist(err) {
-			// 创建默认配置，Docker 环境默认监听 0.0.0.0
+			// Create default configuration.
+			// Binds to 0.0.0.0 by default for Docker/container compatibility.
 			cfg = &Config{
 				Password:      "changeme",
 				Port:          8080,
@@ -134,7 +153,8 @@ func Load() error {
 	return nil
 }
 
-// Save 保存配置到文件
+// Save persists the current configuration to the JSON file.
+// Uses indented formatting for human readability.
 func Save() error {
 	data, err := json.MarshalIndent(cfg, "", "  ")
 	if err != nil {
@@ -143,7 +163,8 @@ func Save() error {
 	return os.WriteFile(cfgPath, data, 0600)
 }
 
-// SetPassword 设置密码（用于环境变量覆盖）
+// SetPassword updates the admin password.
+// Primarily used for environment variable override in containerized deployments.
 func SetPassword(password string) {
 	cfgLock.Lock()
 	defer cfgLock.Unlock()
@@ -303,7 +324,8 @@ func UpdateAccountStats(id string, requestCount, errorCount, totalTokens int, to
 	return nil
 }
 
-// UpdateAccountInfo 更新账户的订阅和使用量信息
+// UpdateAccountInfo updates an account's subscription and usage information.
+// Called after refreshing account data from Kiro API.
 func UpdateAccountInfo(id string, info AccountInfo) error {
 	cfgLock.Lock()
 	defer cfgLock.Unlock()
@@ -328,3 +350,46 @@ func UpdateAccountInfo(id string, info AccountInfo) error {
 	}
 	return nil
 }
+
+// ThinkingConfig holds settings for AI thinking/reasoning mode.
+// When enabled, models output their reasoning process alongside the response.
+type ThinkingConfig struct {
+	Suffix       string `json:"suffix"`       // Model name suffix that triggers thinking mode
+	OpenAIFormat string `json:"openaiFormat"` // Output format for OpenAI-compatible responses
+	ClaudeFormat string `json:"claudeFormat"` // Output format for Claude-compatible responses
+}
+
+// GetThinkingConfig 获取 thinking 配置
+func GetThinkingConfig() ThinkingConfig {
+	cfgLock.RLock()
+	defer cfgLock.RUnlock()
+	
+	suffix := cfg.ThinkingSuffix
+	if suffix == "" {
+		suffix = "-thinking"
+	}
+	openaiFormat := cfg.OpenAIThinkingFormat
+	if openaiFormat == "" {
+		openaiFormat = "reasoning_content"
+	}
+	claudeFormat := cfg.ClaudeThinkingFormat
+	if claudeFormat == "" {
+		claudeFormat = "thinking"
+	}
+	
+	return ThinkingConfig{
+		Suffix:       suffix,
+		OpenAIFormat: openaiFormat,
+		ClaudeFormat: claudeFormat,
+	}
+}
+
+// UpdateThinkingConfig 更新 thinking 配置
+func UpdateThinkingConfig(suffix, openaiFormat, claudeFormat string) error {
+	cfgLock.Lock()
+	defer cfgLock.Unlock()
+	cfg.ThinkingSuffix = suffix
+	cfg.OpenAIThinkingFormat = openaiFormat
+	cfg.ClaudeThinkingFormat = claudeFormat
+	return Save()
+}
diff --git a/main.go b/main.go
index 12b8357..68d33f5 100644
--- a/main.go
+++ b/main.go
@@ -1,5 +1,16 @@
-// Kiro API Proxy - 将 Kiro API 转换为 OpenAI/Anthropic 兼容格式
-// 支持多账号池、自动 Token 刷新、流式响应
+// Package main provides the entry point for Kiro API Proxy.
+//
+// Kiro API Proxy is a reverse proxy service that translates Kiro API requests
+// into OpenAI and Anthropic (Claude) compatible formats. Key features include:
+//   - Multi-account pool with round-robin load balancing
+//   - Automatic OAuth token refresh
+//   - Streaming response support for real-time AI interactions
+//   - Admin panel for account and configuration management
+//
+// The service exposes the following endpoints:
+//   - /v1/messages - Claude API compatible endpoint
+//   - /v1/chat/completions - OpenAI API compatible endpoint
+//   - /admin - Web-based administration panel
 package main
 
 import (
diff --git a/proxy/handler.go b/proxy/handler.go
index f2f4432..494a580 100644
--- a/proxy/handler.go
+++ b/proxy/handler.go
@@ -206,9 +206,13 @@ func (h *Handler) handleHealth(w http.ResponseWriter, r *http.Request) {
 func (h *Handler) handleModels(w http.ResponseWriter, r *http.Request) {
 	models := []map[string]interface{}{
 		{"id": "claude-sonnet-4.5", "object": "model", "owned_by": "anthropic"},
+		{"id": "claude-sonnet-4.5-thinking", "object": "model", "owned_by": "anthropic"},
 		{"id": "claude-sonnet-4", "object": "model", "owned_by": "anthropic"},
+		{"id": "claude-sonnet-4-thinking", "object": "model", "owned_by": "anthropic"},
 		{"id": "claude-haiku-4.5", "object": "model", "owned_by": "anthropic"},
+		{"id": "claude-haiku-4.5-thinking", "object": "model", "owned_by": "anthropic"},
 		{"id": "claude-opus-4.5", "object": "model", "owned_by": "anthropic"},
+		{"id": "claude-opus-4.5-thinking", "object": "model", "owned_by": "anthropic"},
 		{"id": "auto", "object": "model", "owned_by": "kiro-api"},
 		{"id": "gpt-4o", "object": "model", "owned_by": "kiro-proxy"},
 		{"id": "gpt-4", "object": "model", "owned_by": "kiro-proxy"},
@@ -318,8 +322,13 @@ func (h *Handler) handleClaudeMessages(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
+	// 解析模型和 thinking 模式
+	thinkingCfg := config.GetThinkingConfig()
+	actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix)
+	req.Model = actualModel
+
 	// 转换请求
-	kiroPayload := ClaudeToKiro(&req)
+	kiroPayload := ClaudeToKiro(&req, thinking)
 
 	// 流式或非流式
 	if req.Stream {
@@ -341,6 +350,9 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 		return
 	}
 
+	// 获取 thinking 输出格式配置
+	thinkingFormat := config.GetThinkingConfig().ClaudeFormat
+
 	msgID := "msg_" + uuid.New().String()
 	var contentStarted bool
 	var toolUseIndex int
@@ -348,6 +360,157 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 	var credits float64
 	var toolUses []KiroToolUse
 
+	// Thinking 标签解析状态
+	var textBuffer string
+	var inThinkingBlock bool
+
+	// 发送文本的辅助函数
+	// thinkingState: 0=普通内容, 1=thinking开始, 2=thinking中间, 3=thinking结束
+	sendText := func(text string, thinkingState int) {
+		// 确保 content_block 已开始
+		if !contentStarted {
+			h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
+				"type":          "content_block_start",
+				"index":         0,
+				"content_block": map[string]string{"type": "text", "text": ""},
+			})
+			contentStarted = true
+		}
+		
+		if thinkingState == 0 {
+			// 普通内容
+			if text == "" {
+				return
+			}
+			h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
+				"type":  "content_block_delta",
+				"index": 0,
+				"delta": map[string]string{"type": "text_delta", "text": text},
+			})
+		} else {
+			// thinking 内容
+			var outputText string
+			switch thinkingFormat {
+			case "think":
+				switch thinkingState {
+				case 1:
+					outputText = "<think>" + text
+				case 2:
+					outputText = text
+				case 3:
+					outputText = text + "</think>"
+				}
+			case "reasoning_content":
+				// Claude 格式不支持 reasoning_content，直接输出内容
+				outputText = text
+			default: // "thinking"
+				switch thinkingState {
+				case 1:
+					outputText = "<thinking>" + text
+				case 2:
+					outputText = text
+				case 3:
+					outputText = text + "</thinking>"
+				}
+			}
+			if outputText == "" {
+				return
+			}
+			h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
+				"type":  "content_block_delta",
+				"index": 0,
+				"delta": map[string]string{"type": "text_delta", "text": outputText},
+			})
+		}
+	}
+
+	// 处理文本，解析 <thinking> 标签
+	var thinkingStarted bool
+	
+	processClaudeText := func(text string, isThinking bool, forceFlush bool) {
+		// 如果是 reasoningContentEvent，直接输出
+		if isThinking {
+			if !thinkingStarted {
+				sendText(text, 1)
+				thinkingStarted = true
+			} else {
+				sendText(text, 2)
+			}
+			return
+		}
+
+		textBuffer += text
+
+		for {
+			if !inThinkingBlock {
+				thinkingStart := strings.Index(textBuffer, "<thinking>")
+				if thinkingStart != -1 {
+					if thinkingStart > 0 {
+						sendText(textBuffer[:thinkingStart], 0)
+					}
+					textBuffer = textBuffer[thinkingStart+10:]
+					inThinkingBlock = true
+					thinkingStarted = false
+				} else if forceFlush || len([]rune(textBuffer)) > 50 {
+					// 使用 rune 切片来正确处理 Unicode 字符
+					runes := []rune(textBuffer)
+					safeLen := len(runes)
+					if !forceFlush {
+						safeLen = max(0, len(runes)-15)
+					}
+					if safeLen > 0 {
+						sendText(string(runes[:safeLen]), 0)
+						textBuffer = string(runes[safeLen:])
+					}
+					break
+				} else {
+					break
+				}
+			} else {
+				thinkingEnd := strings.Index(textBuffer, "</thinking>")
+				if thinkingEnd != -1 {
+					content := textBuffer[:thinkingEnd]
+					if !thinkingStarted {
+						sendText(content, 1)
+						sendText("", 3)
+					} else {
+						sendText(content, 3)
+					}
+					textBuffer = textBuffer[thinkingEnd+11:]
+					inThinkingBlock = false
+					thinkingStarted = false
+				} else if forceFlush {
+					if textBuffer != "" {
+						if !thinkingStarted {
+							sendText(textBuffer, 1)
+							sendText("", 3)
+						} else {
+							sendText(textBuffer, 3)
+						}
+						textBuffer = ""
+					}
+					break
+				} else {
+					// 流式输出 thinking 块内的内容
+					runes := []rune(textBuffer)
+					if len(runes) > 20 {
+						safeLen := len(runes) - 15
+						if safeLen > 0 {
+							if !thinkingStarted {
+								sendText(string(runes[:safeLen]), 1)
+								thinkingStarted = true
+							} else {
+								sendText(string(runes[:safeLen]), 2)
+							}
+							textBuffer = string(runes[safeLen:])
+						}
+					}
+					break
+				}
+			}
+		}
+	}
+
 	// 发送 message_start
 	h.sendSSE(w, flusher, "message_start", map[string]interface{}{
 		"type": "message_start",
@@ -365,27 +528,12 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 			if text == "" {
 				return
 			}
-			// 确保 content_block 已开始
-			if !contentStarted {
-				h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{
-					"type":          "content_block_start",
-					"index":         0,
-					"content_block": map[string]string{"type": "text", "text": ""},
-				})
-				contentStarted = true
-			}
-			// 直接转发文本，不缓冲
-			outputText := text
-			if isThinking {
-				outputText = "<thinking>" + text + "</thinking>"
-			}
-			h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{
-				"type":  "content_block_delta",
-				"index": 0,
-				"delta": map[string]string{"type": "text_delta", "text": outputText},
-			})
+			processClaudeText(text, isThinking, false)
 		},
 		OnToolUse: func(tu KiroToolUse) {
+			// 先刷新缓冲区
+			processClaudeText("", false, true)
+
 			toolUses = append(toolUses, tu)
 
 			// 关闭文本块
@@ -451,6 +599,9 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
 		return
 	}
 
+	// 刷新剩余缓冲区
+	processClaudeText("", false, true)
+
 	h.recordSuccess(inputTokens, outputTokens, credits)
 	h.pool.RecordSuccess(account.ID)
 	h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
@@ -510,6 +661,7 @@ func (h *Handler) recordFailure() {
 // handleClaudeNonStream Claude 非流式响应
 func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string) {
 	var content string
+	var thinkingContent string
 	var toolUses []KiroToolUse
 	var inputTokens, outputTokens int
 	var credits float64
@@ -517,7 +669,7 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
 	callback := &KiroStreamCallback{
 		OnText: func(text string, isThinking bool) {
 			if isThinking {
-				content += "<thinking>" + text + "</thinking>"
+				thinkingContent += text
 			} else {
 				content += text
 			}
@@ -549,7 +701,21 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
 	h.pool.RecordSuccess(account.ID)
 	h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
 
-	resp := KiroToClaudeResponse(content, toolUses, inputTokens, outputTokens, model)
+	// 合并 thinking 内容（如果有 reasoningContentEvent 的内容）
+	thinkingFormat := config.GetThinkingConfig().ClaudeFormat
+	finalContent := content
+	if thinkingContent != "" {
+		switch thinkingFormat {
+		case "think":
+			finalContent = "<think>" + thinkingContent + "</think>" + content
+		case "reasoning_content":
+			finalContent = thinkingContent + content // Claude 格式不支持 reasoning_content，直接拼接
+		default: // "thinking"
+			finalContent = "<thinking>" + thinkingContent + "</thinking>" + content
+		}
+	}
+
+	resp := KiroToClaudeResponse(finalContent, toolUses, inputTokens, outputTokens, model)
 	w.Header().Set("Content-Type", "application/json; charset=utf-8")
 	json.NewEncoder(w).Encode(resp)
 }
@@ -596,7 +762,12 @@ func (h *Handler) handleOpenAIChat(w http.ResponseWriter, r *http.Request) {
 		return
 	}
 
-	kiroPayload := OpenAIToKiro(&req)
+	// 解析模型和 thinking 模式
+	thinkingCfg := config.GetThinkingConfig()
+	actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix)
+	req.Model = actualModel
+
+	kiroPayload := OpenAIToKiro(&req, thinking)
 
 	if req.Stream {
 		h.handleOpenAIStream(w, account, kiroPayload, req.Model)
@@ -617,38 +788,224 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco
 		return
 	}
 
+	// 获取 thinking 输出格式配置
+	thinkingFormat := config.GetThinkingConfig().OpenAIFormat
+
 	chatID := "chatcmpl-" + uuid.New().String()
 	var toolCalls []ToolCall
 	var toolCallIndex int
 	var inputTokens, outputTokens int
 	var credits float64
 
-	callback := &KiroStreamCallback{
-		OnText: func(text string, isThinking bool) {
-			if text == "" {
+	// Thinking 标签解析状态
+	var textBuffer string
+	var inThinkingBlock bool
+
+	// 发送 chunk 的辅助函数
+	// thinkingState: 0=普通内容, 1=thinking开始, 2=thinking中间, 3=thinking结束
+	sendChunk := func(content string, thinkingState int) {
+		if content == "" && thinkingState == 2 {
+			return
+		}
+		
+		var chunk map[string]interface{}
+		
+		if thinkingState > 0 {
+			// thinking 内容
+			switch thinkingFormat {
+			case "thinking":
+				// 流式输出标签
+				var text string
+				switch thinkingState {
+				case 1: // 开始
+					text = "<thinking>" + content
+				case 2: // 中间
+					text = content
+				case 3: // 结束
+					text = content + "</thinking>"
+				}
+				if text == "" {
+					return
+				}
+				chunk = map[string]interface{}{
+					"id":      chatID,
+					"object":  "chat.completion.chunk",
+					"created": time.Now().Unix(),
+					"model":   model,
+					"choices": []map[string]interface{}{{
+						"index":         0,
+						"delta":         map[string]string{"content": text},
+						"finish_reason": nil,
+					}},
+				}
+			case "think":
+				var text string
+				switch thinkingState {
+				case 1:
+					text = "<think>" + content
+				case 2:
+					text = content
+				case 3:
+					text = content + "</think>"
+				}
+				if text == "" {
+					return
+				}
+				chunk = map[string]interface{}{
+					"id":      chatID,
+					"object":  "chat.completion.chunk",
+					"created": time.Now().Unix(),
+					"model":   model,
+					"choices": []map[string]interface{}{{
+						"index":         0,
+						"delta":         map[string]string{"content": text},
+						"finish_reason": nil,
+					}},
+				}
+			default: // "reasoning_content"
+				if content == "" {
+					return
+				}
+				chunk = map[string]interface{}{
+					"id":      chatID,
+					"object":  "chat.completion.chunk",
+					"created": time.Now().Unix(),
+					"model":   model,
+					"choices": []map[string]interface{}{{
+						"index":         0,
+						"delta":         map[string]string{"reasoning_content": content},
+						"finish_reason": nil,
+					}},
+				}
+			}
+		} else {
+			// 普通内容
+			if content == "" {
 				return
 			}
-			// 直接转发，不缓冲
-			deltaKey := "content"
-			if isThinking {
-				deltaKey = "reasoning_content"
-			}
-			chunk := map[string]interface{}{
+			chunk = map[string]interface{}{
 				"id":      chatID,
 				"object":  "chat.completion.chunk",
 				"created": time.Now().Unix(),
 				"model":   model,
 				"choices": []map[string]interface{}{{
 					"index":         0,
-					"delta":         map[string]string{deltaKey: text},
+					"delta":         map[string]string{"content": content},
 					"finish_reason": nil,
 				}},
 			}
-			data, _ := json.Marshal(chunk)
-			fmt.Fprintf(w, "data: %s\n\n", string(data))
-			flusher.Flush()
+		}
+		data, _ := json.Marshal(chunk)
+		fmt.Fprintf(w, "data: %s\n\n", string(data))
+		flusher.Flush()
+	}
+
+	// 处理文本，解析 <thinking> 标签
+	// thinkingStarted 用于跟踪是否已发送开始标签
+	var thinkingStarted bool
+	
+	processText := func(text string, isThinking bool, forceFlush bool) {
+		// 如果是 reasoningContentEvent，直接输出
+		if isThinking {
+			if !thinkingStarted {
+				sendChunk(text, 1) // 开始
+				thinkingStarted = true
+			} else {
+				sendChunk(text, 2) // 中间
+			}
+			return
+		}
+
+		textBuffer += text
+
+		for {
+			if !inThinkingBlock {
+				// 查找 <thinking> 开始标签
+				thinkingStart := strings.Index(textBuffer, "<thinking>")
+				if thinkingStart != -1 {
+					// 输出 thinking 标签之前的内容
+					if thinkingStart > 0 {
+						sendChunk(textBuffer[:thinkingStart], 0)
+					}
+					textBuffer = textBuffer[thinkingStart+10:] // 移除 <thinking>
+					inThinkingBlock = true
+					thinkingStarted = false // 重置，准备发送新的开始标签
+				} else if forceFlush || len([]rune(textBuffer)) > 50 {
+					// 没有找到标签，安全输出（保留可能的部分标签）
+					runes := []rune(textBuffer)
+					safeLen := len(runes)
+					if !forceFlush {
+						safeLen = max(0, len(runes)-15)
+					}
+					if safeLen > 0 {
+						sendChunk(string(runes[:safeLen]), 0)
+						textBuffer = string(runes[safeLen:])
+					}
+					break
+				} else {
+					break
+				}
+			} else {
+				// 在 thinking 块内，查找 </thinking> 结束标签
+				thinkingEnd := strings.Index(textBuffer, "</thinking>")
+				if thinkingEnd != -1 {
+					// 输出 thinking 内容
+					content := textBuffer[:thinkingEnd]
+					if !thinkingStarted {
+						// 一次性输出完整内容（开始+内容+结束）
+						sendChunk(content, 1) // 开始
+						sendChunk("", 3)      // 结束（空内容，只发结束标签）
+					} else {
+						// 已经开始了，发送剩余内容和结束
+						sendChunk(content, 3) // 结束
+					}
+					textBuffer = textBuffer[thinkingEnd+11:] // 移除 </thinking>
+					inThinkingBlock = false
+					thinkingStarted = false
+				} else if forceFlush {
+					// 强制刷新：输出剩余内容
+					if textBuffer != "" {
+						if !thinkingStarted {
+							sendChunk(textBuffer, 1) // 开始
+							sendChunk("", 3)         // 结束
+						} else {
+							sendChunk(textBuffer, 3) // 结束
+						}
+						textBuffer = ""
+					}
+					break
+				} else {
+					// 流式输出 thinking 块内的内容
+					runes := []rune(textBuffer)
+					if len(runes) > 20 {
+						safeLen := len(runes) - 15 // 保留可能的 </thinking> 部分
+						if safeLen > 0 {
+							if !thinkingStarted {
+								sendChunk(string(runes[:safeLen]), 1) // 开始
+								thinkingStarted = true
+							} else {
+								sendChunk(string(runes[:safeLen]), 2) // 中间
+							}
+							textBuffer = string(runes[safeLen:])
+						}
+					}
+					break
+				}
+			}
+		}
+	}
+
+	callback := &KiroStreamCallback{
+		OnText: func(text string, isThinking bool) {
+			if text == "" {
+				return
+			}
+			processText(text, isThinking, false)
 		},
 		OnToolUse: func(tu KiroToolUse) {
+			// 先刷新缓冲区
+			processText("", false, true)
+
 			args, _ := json.Marshal(tu.Input)
 			tc := ToolCall{ID: tu.ToolUseID, Type: "function"}
 			tc.Function.Name = tu.Name
@@ -700,6 +1057,9 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco
 		return
 	}
 
+	// 刷新剩余缓冲区
+	processText("", false, true)
+
 	h.recordSuccess(inputTokens, outputTokens, credits)
 	h.pool.RecordSuccess(account.ID)
 	h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
@@ -730,6 +1090,7 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco
 // handleOpenAINonStream OpenAI 非流式响应
 func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string) {
 	var content string
+	var reasoningContent string
 	var toolUses []KiroToolUse
 	var inputTokens, outputTokens int
 	var credits float64
@@ -737,8 +1098,7 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A
 	callback := &KiroStreamCallback{
 		OnText: func(text string, isThinking bool) {
 			if isThinking {
-				// 非流式模式下，thinking 内容可以作为单独字段或忽略
-				// 这里暂时忽略
+				reasoningContent += text
 			} else {
 				content += text
 			}
@@ -761,7 +1121,14 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A
 	h.pool.RecordSuccess(account.ID)
 	h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
 
-	resp := KiroToOpenAIResponse(content, toolUses, inputTokens, outputTokens, model)
+	// 解析 content 中的 <thinking> 标签
+	finalContent, extractedReasoning := extractThinkingFromContent(content)
+	if extractedReasoning != "" {
+		reasoningContent = extractedReasoning + reasoningContent
+	}
+
+	thinkingFormat := config.GetThinkingConfig().OpenAIFormat
+	resp := KiroToOpenAIResponseWithReasoning(finalContent, reasoningContent, toolUses, inputTokens, outputTokens, model, thinkingFormat)
 	w.Header().Set("Content-Type", "application/json; charset=utf-8")
 	json.NewEncoder(w).Encode(resp)
 }
@@ -862,6 +1229,10 @@ func (h *Handler) handleAdminAPI(w http.ResponseWriter, r *http.Request) {
 		h.apiResetStats(w, r)
 	case path == "/generate-machine-id" && r.Method == "GET":
 		h.apiGenerateMachineId(w, r)
+	case path == "/thinking" && r.Method == "GET":
+		h.apiGetThinkingConfig(w, r)
+	case path == "/thinking" && r.Method == "POST":
+		h.apiUpdateThinkingConfig(w, r)
 	default:
 		w.WriteHeader(404)
 		json.NewEncoder(w).Encode(map[string]string{"error": "Not Found"})
@@ -1509,3 +1880,48 @@ func (h *Handler) serveStaticFile(w http.ResponseWriter, r *http.Request) {
 	path := strings.TrimPrefix(r.URL.Path, "/admin/")
 	http.ServeFile(w, r, "web/"+path)
 }
+
+// apiGetThinkingConfig 获取 thinking 配置
+func (h *Handler) apiGetThinkingConfig(w http.ResponseWriter, r *http.Request) {
+	cfg := config.GetThinkingConfig()
+	json.NewEncoder(w).Encode(map[string]interface{}{
+		"suffix":       cfg.Suffix,
+		"openaiFormat": cfg.OpenAIFormat,
+		"claudeFormat": cfg.ClaudeFormat,
+	})
+}
+
+// apiUpdateThinkingConfig 更新 thinking 配置
+func (h *Handler) apiUpdateThinkingConfig(w http.ResponseWriter, r *http.Request) {
+	var req struct {
+		Suffix       string `json:"suffix"`
+		OpenAIFormat string `json:"openaiFormat"`
+		ClaudeFormat string `json:"claudeFormat"`
+	}
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		w.WriteHeader(400)
+		json.NewEncoder(w).Encode(map[string]string{"error": "Invalid JSON"})
+		return
+	}
+
+	// 验证格式
+	validFormats := map[string]bool{"reasoning_content": true, "thinking": true, "think": true}
+	if req.OpenAIFormat != "" && !validFormats[req.OpenAIFormat] {
+		w.WriteHeader(400)
+		json.NewEncoder(w).Encode(map[string]string{"error": "Invalid openaiFormat, must be: reasoning_content, thinking, or think"})
+		return
+	}
+	if req.ClaudeFormat != "" && !validFormats[req.ClaudeFormat] {
+		w.WriteHeader(400)
+		json.NewEncoder(w).Encode(map[string]string{"error": "Invalid claudeFormat, must be: reasoning_content, thinking, or think"})
+		return
+	}
+
+	if err := config.UpdateThinkingConfig(req.Suffix, req.OpenAIFormat, req.ClaudeFormat); err != nil {
+		w.WriteHeader(500)
+		json.NewEncoder(w).Encode(map[string]string{"error": err.Error()})
+		return
+	}
+
+	json.NewEncoder(w).Encode(map[string]bool{"success": true})
+}
diff --git a/proxy/kiro.go b/proxy/kiro.go
index 71c0f92..9807f87 100644
--- a/proxy/kiro.go
+++ b/proxy/kiro.go
@@ -120,6 +120,9 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
 		return err
 	}
 
+	// 预估输入 token（约 3 字符 = 1 token）
+	estimatedInputTokens := max(1, len(body)/3)
+
 	req, err := http.NewRequest("POST", KiroEndpoint, bytes.NewReader(body))
 	if err != nil {
 		return err
@@ -160,13 +163,13 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt
 		return fmt.Errorf("HTTP %d: %s", resp.StatusCode, string(body))
 	}
 
-	return parseEventStream(resp.Body, callback)
+	return parseEventStream(resp.Body, callback, estimatedInputTokens)
 }
 
 // ==================== Event Stream 解析 ====================
 
 // parseEventStream 解析 AWS Event Stream 二进制格式
-func parseEventStream(body io.Reader, callback *KiroStreamCallback) error {
+func parseEventStream(body io.Reader, callback *KiroStreamCallback, estimatedInputTokens int) error {
 	// 不使用 bufio，直接读取避免缓冲延迟
 	var inputTokens, outputTokens int
 	var totalOutputChars int
@@ -249,6 +252,10 @@ func parseEventStream(body io.Reader, callback *KiroStreamCallback) error {
 	if outputTokens == 0 && totalOutputChars > 0 {
 		outputTokens = max(1, totalOutputChars/3)
 	}
+	// 如果 Kiro 没返回 inputTokens，使用预估值
+	if inputTokens == 0 {
+		inputTokens = estimatedInputTokens
+	}
 
 	if callback.OnCredits != nil && totalCredits > 0 {
 		callback.OnCredits(totalCredits)
diff --git a/proxy/translator.go b/proxy/translator.go
index 7849601..4ace590 100644
--- a/proxy/translator.go
+++ b/proxy/translator.go
@@ -30,18 +30,41 @@ var modelMap = map[string]string{
 	"gpt-3.5-turbo":            "claude-sonnet-4.5",
 }
 
-func MapModel(model string) string {
+// Thinking 模式提示
+const ThinkingModePrompt = `<thinking_mode>enabled</thinking_mode>
+<max_thinking_length>200000</max_thinking_length>`
+
+// ParseModelAndThinking 解析模型名称，返回实际模型和是否启用 thinking
+func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) {
 	lower := strings.ToLower(model)
+	thinking := false
+	
+	// 使用配置的后缀检查
+	suffixLower := strings.ToLower(thinkingSuffix)
+	if strings.HasSuffix(lower, suffixLower) {
+		thinking = true
+		model = model[:len(model)-len(thinkingSuffix)]
+		lower = strings.ToLower(model)
+	}
+	
+	// 映射模型
 	for k, v := range modelMap {
 		if strings.Contains(lower, k) {
-			return v
+			return v, thinking
 		}
 	}
+	
 	// 如果已经是有效的 Kiro 模型，直接返回
 	if strings.HasPrefix(lower, "claude-") {
-		return model
+		return model, thinking
 	}
-	return "claude-sonnet-4.5"
+	
+	return "claude-sonnet-4.5", thinking
+}
+
+func MapModel(model string) string {
+	mapped, _ := ParseModelAndThinking(model, "-thinking")
+	return mapped
 }
 
 // ==================== Claude API 类型 ====================
@@ -106,13 +129,18 @@ type ClaudeUsage struct {
 
 const maxToolDescLen = 10237
 
-func ClaudeToKiro(req *ClaudeRequest) *KiroPayload {
+func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
 	modelID := MapModel(req.Model)
 	origin := "AI_EDITOR"
 
 	// 提取系统提示
 	systemPrompt := extractSystemPrompt(req.System)
 	
+	// 如果启用 thinking 模式，注入 thinking 提示
+	if thinking {
+		systemPrompt = ThinkingModePrompt + "\n\n" + systemPrompt
+	}
+	
 	// 注入时间戳
 	timestamp := time.Now().Format(time.RFC3339)
 	systemPrompt = "[Context: Current time is " + timestamp + "]\n\n" + systemPrompt
@@ -507,7 +535,7 @@ type OpenAIUsage struct {
 
 // ==================== OpenAI -> Kiro 转换 ====================
 
-func OpenAIToKiro(req *OpenAIRequest) *KiroPayload {
+func OpenAIToKiro(req *OpenAIRequest, thinking bool) *KiroPayload {
 	modelID := MapModel(req.Model)
 	origin := "AI_EDITOR"
 
@@ -525,6 +553,11 @@ func OpenAIToKiro(req *OpenAIRequest) *KiroPayload {
 		}
 	}
 
+	// 如果启用 thinking 模式，注入 thinking 提示
+	if thinking {
+		systemPrompt = ThinkingModePrompt + "\n\n" + systemPrompt
+	}
+
 	// 注入时间戳
 	timestamp := time.Now().Format(time.RFC3339)
 	systemPrompt = "[Context: Current time is " + timestamp + "]\n\n" + systemPrompt
@@ -809,3 +842,89 @@ func KiroToOpenAIResponse(content string, toolUses []KiroToolUse, inputTokens, o
 		},
 	}
 }
+
+// extractThinkingFromContent 从内容中提取 <thinking> 标签内的内容
+func extractThinkingFromContent(content string) (string, string) {
+	var reasoning string
+	result := content
+
+	for {
+		start := strings.Index(result, "<thinking>")
+		if start == -1 {
+			break
+		}
+		end := strings.Index(result[start:], "</thinking>")
+		if end == -1 {
+			break
+		}
+		end += start
+
+		// 提取 thinking 内容
+		thinkingContent := result[start+10 : end]
+		reasoning += thinkingContent
+
+		// 从结果中移除 thinking 标签
+		result = result[:start] + result[end+11:]
+	}
+
+	return strings.TrimSpace(result), reasoning
+}
+
+// KiroToOpenAIResponseWithReasoning 带 reasoning_content 的 OpenAI 响应
+func KiroToOpenAIResponseWithReasoning(content, reasoningContent string, toolUses []KiroToolUse, inputTokens, outputTokens int, model, thinkingFormat string) map[string]interface{} {
+	finishReason := "stop"
+
+	message := map[string]interface{}{
+		"role": "assistant",
+	}
+
+	if len(toolUses) > 0 {
+		message["content"] = nil
+		toolCalls := make([]map[string]interface{}, len(toolUses))
+		for i, tu := range toolUses {
+			args, _ := json.Marshal(tu.Input)
+			toolCalls[i] = map[string]interface{}{
+				"id":   tu.ToolUseID,
+				"type": "function",
+				"function": map[string]string{
+					"name":      tu.Name,
+					"arguments": string(args),
+				},
+			}
+		}
+		message["tool_calls"] = toolCalls
+		finishReason = "tool_calls"
+	} else {
+		// 根据配置格式化 thinking 输出
+		if reasoningContent != "" {
+			switch thinkingFormat {
+			case "thinking":
+				message["content"] = "<thinking>" + reasoningContent + "</thinking>" + content
+			case "think":
+				message["content"] = "<think>" + reasoningContent + "</think>" + content
+			default: // "reasoning_content"
+				message["content"] = content
+				message["reasoning_content"] = reasoningContent
+			}
+		} else {
+			message["content"] = content
+		}
+	}
+
+	return map[string]interface{}{
+		"id":      "chatcmpl-" + uuid.New().String(),
+		"object":  "chat.completion",
+		"created": time.Now().Unix(),
+		"model":   model,
+		"choices": []map[string]interface{}{{
+			"index":         0,
+			"message":       message,
+			"finish_reason": finishReason,
+		}},
+		"usage": map[string]int{
+			"prompt_tokens":     inputTokens,
+			"completion_tokens": outputTokens,
+			"total_tokens":      inputTokens + outputTokens,
+		},
+	}
+}
diff --git a/web/index.html b/web/index.html
index c058575..89fe5c2 100644
--- a/web/index.html
+++ b/web/index.html
@@ -191,6 +191,31 @@
                 </div>
                 <button class="btn btn-primary" onclick="saveSettings()">保存设置</button>
             </div>
+            <div class="card">
+                <div class="card-header"><span class="card-title">Thinking 模式设置</span></div>
+                <div class="form-group">
+                    <label>触发后缀</label>
+                    <input type="text" id="thinkingSuffix" placeholder="-thinking">
+                    <small style="color:#64748b;font-size:12px;margin-top:4px;display:block">模型名称加此后缀即启用思考模式，如 claude-sonnet-4.5-thinking</small>
+                </div>
+                <div class="form-group">
+                    <label>OpenAI API 输出格式</label>
+                    <select id="openaiThinkingFormat">
+                        <option value="reasoning_content">reasoning_content (DeepSeek 兼容)</option>
+                        <option value="thinking">&lt;thinking&gt; 标签 (Claude 原生)</option>
+                        <option value="think">&lt;think&gt; 标签 (OpenAI 原生)</option>
+                    </select>
+                </div>
+                <div class="form-group">
+                    <label>Claude API 输出格式</label>
+                    <select id="claudeThinkingFormat">
+                        <option value="thinking">&lt;thinking&gt; 标签 (Claude 原生)</option>
+                        <option value="think">&lt;think&gt; 标签</option>
+                        <option value="reasoning_content">直接输出 (无标签)</option>
+                    </select>
+                </div>
+                <button class="btn btn-primary" onclick="saveThinkingConfig()">保存 Thinking 设置</button>
+            </div>
             <div class="card">
                 <div class="card-header"><span class="card-title">管理密码</span></div>
                 <div class="form-group">
@@ -477,6 +502,29 @@
             const d = await res.json();
             document.getElementById('requireApiKey').checked = d.requireApiKey;
             document.getElementById('apiKeyInput').value = d.apiKey || '';
+            // 加载 thinking 配置
+            loadThinkingConfig();
+        }
+
+        async function loadThinkingConfig() {
+            const res = await fetch('/admin/api/thinking', { headers: { 'X-Admin-Password': password } });
+            const d = await res.json();
+            document.getElementById('thinkingSuffix').value = d.suffix || '-thinking';
+            document.getElementById('openaiThinkingFormat').value = d.openaiFormat || 'reasoning_content';
+            document.getElementById('claudeThinkingFormat').value = d.claudeFormat || 'thinking';
+        }
+
+        async function saveThinkingConfig() {
+            const res = await fetch('/admin/api/thinking', {
+                method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Admin-Password': password },
+                body: JSON.stringify({
+                    suffix: document.getElementById('thinkingSuffix').value || '-thinking',
+                    openaiFormat: document.getElementById('openaiThinkingFormat').value,
+                    claudeFormat: document.getElementById('claudeThinkingFormat').value
+                })
+            });
+            const d = await res.json();
+            if (d.success) { alert('Thinking 设置已保存'); } else { alert('保存失败: ' + d.error); }
         }
 
         async function saveSettings() {