From 31aa6aa4215035e894ac8a0476f2c8c01f6076db Mon Sep 17 00:00:00 2001 From: Naive YH Date: Mon, 11 May 2026 17:23:21 +0800 Subject: [PATCH 1/9] fix: accurate input_tokens via contextUsageEvent + smart routing for SDK clients --- proxy/handler.go | 330 ++++++++++++++++++++++++++++++++++++++++++++++- proxy/kiro.go | 27 +++- 2 files changed, 346 insertions(+), 11 deletions(-) diff --git a/proxy/handler.go b/proxy/handler.go index 85afc5e..cff78fe 100644 --- a/proxy/handler.go +++ b/proxy/handler.go @@ -262,6 +262,12 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } h.handleClaudeMessages(w, r) + case path == "/cc/v1/messages": + if !h.validateApiKey(r) { + h.sendClaudeError(w, 401, "authentication_error", "Invalid or missing API key") + return + } + h.handleClaudeMessagesBuffered(w, r) case path == "/v1/messages/count_tokens" || path == "/messages/count_tokens": if !h.validateApiKey(r) { h.sendClaudeError(w, 401, "authentication_error", "Invalid or missing API key") @@ -631,9 +637,13 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re // 转换请求 kiroPayload := ClaudeToKiro(&req, thinking) - // 流式或非流式 + // 流式或非流式;SDK 客户端(Claude Code、opencode 等)自动使用缓冲模式以获取精确 message_start if req.Stream { - h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) + if isAnthropicSDKRequest(r) { + h.handleClaudeStreamBuffered(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) + } else { + h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) + } } else { h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) } @@ -657,6 +667,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco msgID := "msg_" + uuid.New().String() var inputTokens, outputTokens int var credits float64 + var realInputTokens int var toolUses []KiroToolUse var nextContentIndex int var rawContentBuilder strings.Builder @@ -978,6 +989,9 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco OnCredits: func(c float64) { credits = c }, + OnContextUsage: func(pct float64) { + realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0) + }, } err := CallKiroAPI(account, payload, callback) @@ -999,7 +1013,9 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco } closeActiveBlock() - if inputTokens <= 0 { + if realInputTokens > 0 { + inputTokens = realInputTokens + } else if inputTokens <= 0 { inputTokens = estimatedInputTokens } outputContent, extractedReasoning := extractThinkingFromContent(rawContentBuilder.String()) @@ -1042,6 +1058,290 @@ func (h *Handler) sendSSE(w http.ResponseWriter, flusher http.Flusher, event str flusher.Flush() } +// isAnthropicSDKRequest 检测请求是否来自基于 Anthropic 官方 SDK 的客户端 +// (Claude Code、opencode、Roo Code 等),这类客户端读取 message_start.input_tokens 来展示上下文用量 +func isAnthropicSDKRequest(r *http.Request) bool { + if r.Header.Get("x-stainless-lang") != "" { + return true + } + ua := strings.ToLower(r.Header.Get("User-Agent")) + return strings.Contains(ua, "claude") || strings.Contains(ua, "anthropic-sdk") +} + +// handleClaudeMessagesBuffered Claude API 缓冲模式处理(/cc/v1/messages 及自动识别的 SDK 客户端) +func (h *Handler) handleClaudeMessagesBuffered(w http.ResponseWriter, r *http.Request) { + h.handleClaudeMessagesInternalBuffered(w, r) +} + +func (h *Handler) handleClaudeMessagesInternalBuffered(w http.ResponseWriter, r *http.Request) { + if r.Method != "POST" { + http.Error(w, "Method Not Allowed", 405) + return + } + + body, err := io.ReadAll(r.Body) + if err != nil { + h.sendClaudeError(w, 400, "invalid_request_error", "Failed to read request body") + return + } + + var req ClaudeRequest + if err := json.Unmarshal(body, &req); err != nil { + h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON: "+err.Error()) + return + } + if msg := validateClaudeRequestShape(&req); msg != "" { + h.sendClaudeError(w, 400, "invalid_request_error", msg) + return + } + + account := h.pool.GetNext() + if account == nil { + h.sendClaudeError(w, 503, "api_error", "No available accounts") + return + } + + if err := h.ensureValidToken(account); err != nil { + h.sendClaudeError(w, 503, "api_error", "Token refresh failed: "+err.Error()) + return + } + + thinkingCfg := config.GetThinkingConfig() + actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix) + req.Model = actualModel + estimatedInputTokens := estimateClaudeRequestInputTokens(&req) + cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens) + cacheUsage := h.promptCache.Compute(account.ID, cacheProfile) + + kiroPayload := ClaudeToKiro(&req, thinking) + + if req.Stream { + h.handleClaudeStreamBuffered(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) + } else { + h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) + } +} + +// handleClaudeStreamBuffered Claude 缓冲流式响应 +// 等待上游流完成后得到精确 input_tokens,回填 message_start 后一次性推送所有 SSE 事件 +// 等待期间每 25 秒发送 ping 事件保活 +func (h *Handler) handleClaudeStreamBuffered(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) { + w.Header().Set("Content-Type", "text/event-stream; charset=utf-8") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + + flusher, ok := w.(http.Flusher) + if !ok { + h.sendClaudeError(w, 500, "api_error", "Streaming not supported") + return + } + + // ping 保活 goroutine(25 秒间隔,防止客户端超时断开) + pingStop := make(chan struct{}) + var stopOnce sync.Once + stopPing := func() { stopOnce.Do(func() { close(pingStop) }) } + defer stopPing() + + go func() { + ticker := time.NewTicker(25 * time.Second) + defer ticker.Stop() + for { + select { + case <-ticker.C: + fmt.Fprintf(w, "event: ping\ndata: {}\n\n") + flusher.Flush() + case <-pingStop: + return + } + } + }() + + // 缓冲阶段:收集所有内容 + var contentBuilder strings.Builder + var thinkingBuilder strings.Builder + var toolUses []KiroToolUse + var inputTokens, outputTokens int + var credits float64 + var realInputTokens int + + callback := &KiroStreamCallback{ + OnText: func(text string, isThinking bool) { + if isThinking { + thinkingBuilder.WriteString(text) + } else { + contentBuilder.WriteString(text) + } + }, + OnToolUse: func(tu KiroToolUse) { + toolUses = append(toolUses, tu) + }, + OnComplete: func(inTok, outTok int) { + inputTokens = inTok + outputTokens = outTok + }, + OnError: func(err error) { + h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "quota")) + }, + OnCredits: func(c float64) { + credits = c + }, + OnContextUsage: func(pct float64) { + realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0) + }, + } + + err := CallKiroAPI(account, payload, callback) + stopPing() + + if err != nil { + h.recordFailure() + h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "quota")) + h.sendSSE(w, flusher, "error", map[string]interface{}{ + "type": "error", + "error": map[string]string{"type": "api_error", "message": err.Error()}, + }) + return + } + + // 确定精确 input_tokens + finalInputTokens := estimatedInputTokens + if realInputTokens > 0 { + finalInputTokens = realInputTokens + } else if inputTokens > 0 { + finalInputTokens = inputTokens + } + + // 处理 thinking 内容 + thinkingFormat := config.GetThinkingConfig().ClaudeFormat + rawContent := contentBuilder.String() + rawThinking := thinkingBuilder.String() + outputContent, extractedReasoning := extractThinkingFromContent(rawContent) + thinkingOutput := rawThinking + if thinking && thinkingOutput == "" && extractedReasoning != "" { + thinkingOutput = extractedReasoning + } + if !thinking { + thinkingOutput = "" + } + outputTokens = estimateClaudeOutputTokens(outputContent, thinkingOutput, toolUses) + + h.recordSuccess(finalInputTokens, outputTokens, credits) + h.pool.RecordSuccess(account.ID) + h.pool.UpdateStats(account.ID, finalInputTokens+outputTokens, credits) + h.promptCache.Update(account.ID, cacheProfile) + + msgID := "msg_" + uuid.New().String() + contentIndex := 0 + + // 推送阶段:message_start 携带精确 input_tokens + h.sendSSE(w, flusher, "message_start", map[string]interface{}{ + "type": "message_start", + "message": map[string]interface{}{ + "id": msgID, + "type": "message", + "role": "assistant", + "content": []interface{}{}, + "model": model, + "stop_reason": nil, + "stop_sequence": nil, + "usage": buildClaudeUsageMap(finalInputTokens, 0, cacheUsage, cacheProfile != nil), + }, + }) + h.sendSSE(w, flusher, "ping", map[string]interface{}{"type": "ping"}) + + // 推送 thinking 块 + if thinking && thinkingOutput != "" { + switch thinkingFormat { + case "think": + h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ + "type": "content_block_start", "index": contentIndex, + "content_block": map[string]string{"type": "text", "text": ""}, + }) + h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ + "type": "content_block_delta", "index": contentIndex, + "delta": map[string]string{"type": "text_delta", "text": "" + thinkingOutput + ""}, + }) + h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ + "type": "content_block_stop", "index": contentIndex, + }) + contentIndex++ + case "reasoning_content": + h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ + "type": "content_block_start", "index": contentIndex, + "content_block": map[string]string{"type": "text", "text": ""}, + }) + h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ + "type": "content_block_delta", "index": contentIndex, + "delta": map[string]string{"type": "text_delta", "text": thinkingOutput}, + }) + h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ + "type": "content_block_stop", "index": contentIndex, + }) + contentIndex++ + default: // native thinking block + h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ + "type": "content_block_start", "index": contentIndex, + "content_block": map[string]string{"type": "thinking", "thinking": ""}, + }) + h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ + "type": "content_block_delta", "index": contentIndex, + "delta": map[string]string{"type": "thinking_delta", "thinking": thinkingOutput}, + }) + h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ + "type": "content_block_stop", "index": contentIndex, + }) + contentIndex++ + } + } + + // 推送文本块 + if outputContent != "" { + h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ + "type": "content_block_start", "index": contentIndex, + "content_block": map[string]string{"type": "text", "text": ""}, + }) + h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ + "type": "content_block_delta", "index": contentIndex, + "delta": map[string]string{"type": "text_delta", "text": outputContent}, + }) + h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ + "type": "content_block_stop", "index": contentIndex, + }) + contentIndex++ + } + + // 推送工具调用块 + for _, tu := range toolUses { + inputJSON, _ := json.Marshal(tu.Input) + h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ + "type": "content_block_start", "index": contentIndex, + "content_block": map[string]interface{}{ + "type": "tool_use", "id": tu.ToolUseID, "name": tu.Name, "input": map[string]interface{}{}, + }, + }) + h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ + "type": "content_block_delta", "index": contentIndex, + "delta": map[string]interface{}{"type": "input_json_delta", "partial_json": string(inputJSON)}, + }) + h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ + "type": "content_block_stop", "index": contentIndex, + }) + contentIndex++ + } + + stopReason := "end_turn" + if len(toolUses) > 0 { + stopReason = "tool_use" + } + + h.sendSSE(w, flusher, "message_delta", map[string]interface{}{ + "type": "message_delta", + "delta": map[string]interface{}{"stop_reason": stopReason}, + "usage": buildClaudeUsageMap(finalInputTokens, outputTokens, cacheUsage, cacheProfile != nil), + }) + h.sendSSE(w, flusher, "message_stop", map[string]interface{}{"type": "message_stop"}) +} + // backgroundStatsSaver 后台定时保存统计数据 func (h *Handler) backgroundStatsSaver() { ticker := time.NewTicker(30 * time.Second) @@ -1103,6 +1403,7 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A var toolUses []KiroToolUse var inputTokens, outputTokens int var credits float64 + var realInputTokens int callback := &KiroStreamCallback{ OnText: func(text string, isThinking bool) { @@ -1125,6 +1426,9 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A OnCredits: func(c float64) { credits = c }, + OnContextUsage: func(pct float64) { + realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0) + }, } err := CallKiroAPI(account, payload, callback) @@ -1145,7 +1449,9 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A thinkingContent = "" } - if inputTokens <= 0 { + if realInputTokens > 0 { + inputTokens = realInputTokens + } else if inputTokens <= 0 { inputTokens = estimatedInputTokens } outputTokens = estimateClaudeOutputTokens(finalContent, thinkingContent, toolUses) @@ -1262,6 +1568,7 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco var toolCallIndex int var inputTokens, outputTokens int var credits float64 + var realInputTokens int var rawContentBuilder strings.Builder var rawReasoningBuilder strings.Builder @@ -1554,6 +1861,9 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco OnCredits: func(c float64) { credits = c }, + OnContextUsage: func(pct float64) { + realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0) + }, } err := CallKiroAPI(account, payload, callback) @@ -1570,7 +1880,9 @@ func (h *Handler) handleOpenAIStream(w http.ResponseWriter, account *config.Acco eventThinkingOpen = false } - if inputTokens <= 0 { + if realInputTokens > 0 { + inputTokens = realInputTokens + } else if inputTokens <= 0 { inputTokens = estimatedInputTokens } outputContent, extractedReasoning := extractThinkingFromContent(rawContentBuilder.String()) @@ -1626,6 +1938,7 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A var toolUses []KiroToolUse var inputTokens, outputTokens int var credits float64 + var realInputTokens int callback := &KiroStreamCallback{ OnText: func(text string, isThinking bool) { @@ -1639,6 +1952,9 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A OnComplete: func(inTok, outTok int) { inputTokens = inTok; outputTokens = outTok }, OnError: func(err error) { h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429")) }, OnCredits: func(c float64) { credits = c }, + OnContextUsage: func(pct float64) { + realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0) + }, } err := CallKiroAPI(account, payload, callback) @@ -1657,7 +1973,9 @@ func (h *Handler) handleOpenAINonStream(w http.ResponseWriter, account *config.A reasoningContent = "" } - if inputTokens <= 0 { + if realInputTokens > 0 { + inputTokens = realInputTokens + } else if inputTokens <= 0 { inputTokens = estimatedInputTokens } outputTokens = estimateOpenAIOutputTokens(finalContent, reasoningContent, toolUses) diff --git a/proxy/kiro.go b/proxy/kiro.go index 7fcaa64..0a57bb2 100644 --- a/proxy/kiro.go +++ b/proxy/kiro.go @@ -136,11 +136,12 @@ type InferenceConfig struct { // KiroStreamCallback 流式响应回调 type KiroStreamCallback struct { - OnText func(text string, isThinking bool) - OnToolUse func(toolUse KiroToolUse) - OnComplete func(inputTokens, outputTokens int) - OnError func(err error) - OnCredits func(credits float64) + OnText func(text string, isThinking bool) + OnToolUse func(toolUse KiroToolUse) + OnComplete func(inputTokens, outputTokens int) + OnError func(err error) + OnCredits func(credits float64) + OnContextUsage func(percentage float64) } // ==================== API 调用 ==================== @@ -306,6 +307,12 @@ func parseEventStream(body io.Reader, callback *KiroStreamCallback) error { if usage, ok := event["usage"].(float64); ok { totalCredits += usage } + case "contextUsageEvent": + if pct, ok := event["contextUsagePercentage"].(float64); ok { + if callback.OnContextUsage != nil { + callback.OnContextUsage(pct) + } + } } } @@ -370,6 +377,16 @@ func updateTokensFromEvent(event map[string]interface{}, currentInputTokens, cur return inputTokens, outputTokens } +// getContextWindowSize 返回模型的上下文窗口大小(token 数) +// Kiro 托管的 Claude 模型窗口由 AWS 硬性规定,此处与官方保持一致 +func getContextWindowSize(model string) int { + m := strings.ToLower(model) + if strings.Contains(m, "4.6") || strings.Contains(m, "4-6") { + return 1_000_000 + } + return 200_000 +} + func collectUsageMaps(v interface{}, out *[]map[string]interface{}) { switch t := v.(type) { case map[string]interface{}: From 0203357b34bbee306a98d380f9b24ba72d841ec7 Mon Sep 17 00:00:00 2001 From: Quorinex Date: Mon, 11 May 2026 19:47:39 +0800 Subject: [PATCH 2/9] refactor: remove buffered stream mode, keep contextUsageEvent for accurate input tokens --- proxy/handler.go | 298 +---------------------------------------------- proxy/kiro.go | 11 +- 2 files changed, 8 insertions(+), 301 deletions(-) diff --git a/proxy/handler.go b/proxy/handler.go index cff78fe..a7fb592 100644 --- a/proxy/handler.go +++ b/proxy/handler.go @@ -262,12 +262,6 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) { return } h.handleClaudeMessages(w, r) - case path == "/cc/v1/messages": - if !h.validateApiKey(r) { - h.sendClaudeError(w, 401, "authentication_error", "Invalid or missing API key") - return - } - h.handleClaudeMessagesBuffered(w, r) case path == "/v1/messages/count_tokens" || path == "/messages/count_tokens": if !h.validateApiKey(r) { h.sendClaudeError(w, 401, "authentication_error", "Invalid or missing API key") @@ -637,13 +631,9 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re // 转换请求 kiroPayload := ClaudeToKiro(&req, thinking) - // 流式或非流式;SDK 客户端(Claude Code、opencode 等)自动使用缓冲模式以获取精确 message_start + // Stream or non-stream if req.Stream { - if isAnthropicSDKRequest(r) { - h.handleClaudeStreamBuffered(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) - } else { - h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) - } + h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) } else { h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) } @@ -1058,290 +1048,6 @@ func (h *Handler) sendSSE(w http.ResponseWriter, flusher http.Flusher, event str flusher.Flush() } -// isAnthropicSDKRequest 检测请求是否来自基于 Anthropic 官方 SDK 的客户端 -// (Claude Code、opencode、Roo Code 等),这类客户端读取 message_start.input_tokens 来展示上下文用量 -func isAnthropicSDKRequest(r *http.Request) bool { - if r.Header.Get("x-stainless-lang") != "" { - return true - } - ua := strings.ToLower(r.Header.Get("User-Agent")) - return strings.Contains(ua, "claude") || strings.Contains(ua, "anthropic-sdk") -} - -// handleClaudeMessagesBuffered Claude API 缓冲模式处理(/cc/v1/messages 及自动识别的 SDK 客户端) -func (h *Handler) handleClaudeMessagesBuffered(w http.ResponseWriter, r *http.Request) { - h.handleClaudeMessagesInternalBuffered(w, r) -} - -func (h *Handler) handleClaudeMessagesInternalBuffered(w http.ResponseWriter, r *http.Request) { - if r.Method != "POST" { - http.Error(w, "Method Not Allowed", 405) - return - } - - body, err := io.ReadAll(r.Body) - if err != nil { - h.sendClaudeError(w, 400, "invalid_request_error", "Failed to read request body") - return - } - - var req ClaudeRequest - if err := json.Unmarshal(body, &req); err != nil { - h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON: "+err.Error()) - return - } - if msg := validateClaudeRequestShape(&req); msg != "" { - h.sendClaudeError(w, 400, "invalid_request_error", msg) - return - } - - account := h.pool.GetNext() - if account == nil { - h.sendClaudeError(w, 503, "api_error", "No available accounts") - return - } - - if err := h.ensureValidToken(account); err != nil { - h.sendClaudeError(w, 503, "api_error", "Token refresh failed: "+err.Error()) - return - } - - thinkingCfg := config.GetThinkingConfig() - actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix) - req.Model = actualModel - estimatedInputTokens := estimateClaudeRequestInputTokens(&req) - cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens) - cacheUsage := h.promptCache.Compute(account.ID, cacheProfile) - - kiroPayload := ClaudeToKiro(&req, thinking) - - if req.Stream { - h.handleClaudeStreamBuffered(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) - } else { - h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) - } -} - -// handleClaudeStreamBuffered Claude 缓冲流式响应 -// 等待上游流完成后得到精确 input_tokens,回填 message_start 后一次性推送所有 SSE 事件 -// 等待期间每 25 秒发送 ping 事件保活 -func (h *Handler) handleClaudeStreamBuffered(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) { - w.Header().Set("Content-Type", "text/event-stream; charset=utf-8") - w.Header().Set("Cache-Control", "no-cache") - w.Header().Set("Connection", "keep-alive") - - flusher, ok := w.(http.Flusher) - if !ok { - h.sendClaudeError(w, 500, "api_error", "Streaming not supported") - return - } - - // ping 保活 goroutine(25 秒间隔,防止客户端超时断开) - pingStop := make(chan struct{}) - var stopOnce sync.Once - stopPing := func() { stopOnce.Do(func() { close(pingStop) }) } - defer stopPing() - - go func() { - ticker := time.NewTicker(25 * time.Second) - defer ticker.Stop() - for { - select { - case <-ticker.C: - fmt.Fprintf(w, "event: ping\ndata: {}\n\n") - flusher.Flush() - case <-pingStop: - return - } - } - }() - - // 缓冲阶段:收集所有内容 - var contentBuilder strings.Builder - var thinkingBuilder strings.Builder - var toolUses []KiroToolUse - var inputTokens, outputTokens int - var credits float64 - var realInputTokens int - - callback := &KiroStreamCallback{ - OnText: func(text string, isThinking bool) { - if isThinking { - thinkingBuilder.WriteString(text) - } else { - contentBuilder.WriteString(text) - } - }, - OnToolUse: func(tu KiroToolUse) { - toolUses = append(toolUses, tu) - }, - OnComplete: func(inTok, outTok int) { - inputTokens = inTok - outputTokens = outTok - }, - OnError: func(err error) { - h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "quota")) - }, - OnCredits: func(c float64) { - credits = c - }, - OnContextUsage: func(pct float64) { - realInputTokens = int(pct * float64(getContextWindowSize(model)) / 100.0) - }, - } - - err := CallKiroAPI(account, payload, callback) - stopPing() - - if err != nil { - h.recordFailure() - h.pool.RecordError(account.ID, strings.Contains(err.Error(), "429") || strings.Contains(err.Error(), "quota")) - h.sendSSE(w, flusher, "error", map[string]interface{}{ - "type": "error", - "error": map[string]string{"type": "api_error", "message": err.Error()}, - }) - return - } - - // 确定精确 input_tokens - finalInputTokens := estimatedInputTokens - if realInputTokens > 0 { - finalInputTokens = realInputTokens - } else if inputTokens > 0 { - finalInputTokens = inputTokens - } - - // 处理 thinking 内容 - thinkingFormat := config.GetThinkingConfig().ClaudeFormat - rawContent := contentBuilder.String() - rawThinking := thinkingBuilder.String() - outputContent, extractedReasoning := extractThinkingFromContent(rawContent) - thinkingOutput := rawThinking - if thinking && thinkingOutput == "" && extractedReasoning != "" { - thinkingOutput = extractedReasoning - } - if !thinking { - thinkingOutput = "" - } - outputTokens = estimateClaudeOutputTokens(outputContent, thinkingOutput, toolUses) - - h.recordSuccess(finalInputTokens, outputTokens, credits) - h.pool.RecordSuccess(account.ID) - h.pool.UpdateStats(account.ID, finalInputTokens+outputTokens, credits) - h.promptCache.Update(account.ID, cacheProfile) - - msgID := "msg_" + uuid.New().String() - contentIndex := 0 - - // 推送阶段:message_start 携带精确 input_tokens - h.sendSSE(w, flusher, "message_start", map[string]interface{}{ - "type": "message_start", - "message": map[string]interface{}{ - "id": msgID, - "type": "message", - "role": "assistant", - "content": []interface{}{}, - "model": model, - "stop_reason": nil, - "stop_sequence": nil, - "usage": buildClaudeUsageMap(finalInputTokens, 0, cacheUsage, cacheProfile != nil), - }, - }) - h.sendSSE(w, flusher, "ping", map[string]interface{}{"type": "ping"}) - - // 推送 thinking 块 - if thinking && thinkingOutput != "" { - switch thinkingFormat { - case "think": - h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ - "type": "content_block_start", "index": contentIndex, - "content_block": map[string]string{"type": "text", "text": ""}, - }) - h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ - "type": "content_block_delta", "index": contentIndex, - "delta": map[string]string{"type": "text_delta", "text": "" + thinkingOutput + ""}, - }) - h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ - "type": "content_block_stop", "index": contentIndex, - }) - contentIndex++ - case "reasoning_content": - h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ - "type": "content_block_start", "index": contentIndex, - "content_block": map[string]string{"type": "text", "text": ""}, - }) - h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ - "type": "content_block_delta", "index": contentIndex, - "delta": map[string]string{"type": "text_delta", "text": thinkingOutput}, - }) - h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ - "type": "content_block_stop", "index": contentIndex, - }) - contentIndex++ - default: // native thinking block - h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ - "type": "content_block_start", "index": contentIndex, - "content_block": map[string]string{"type": "thinking", "thinking": ""}, - }) - h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ - "type": "content_block_delta", "index": contentIndex, - "delta": map[string]string{"type": "thinking_delta", "thinking": thinkingOutput}, - }) - h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ - "type": "content_block_stop", "index": contentIndex, - }) - contentIndex++ - } - } - - // 推送文本块 - if outputContent != "" { - h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ - "type": "content_block_start", "index": contentIndex, - "content_block": map[string]string{"type": "text", "text": ""}, - }) - h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ - "type": "content_block_delta", "index": contentIndex, - "delta": map[string]string{"type": "text_delta", "text": outputContent}, - }) - h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ - "type": "content_block_stop", "index": contentIndex, - }) - contentIndex++ - } - - // 推送工具调用块 - for _, tu := range toolUses { - inputJSON, _ := json.Marshal(tu.Input) - h.sendSSE(w, flusher, "content_block_start", map[string]interface{}{ - "type": "content_block_start", "index": contentIndex, - "content_block": map[string]interface{}{ - "type": "tool_use", "id": tu.ToolUseID, "name": tu.Name, "input": map[string]interface{}{}, - }, - }) - h.sendSSE(w, flusher, "content_block_delta", map[string]interface{}{ - "type": "content_block_delta", "index": contentIndex, - "delta": map[string]interface{}{"type": "input_json_delta", "partial_json": string(inputJSON)}, - }) - h.sendSSE(w, flusher, "content_block_stop", map[string]interface{}{ - "type": "content_block_stop", "index": contentIndex, - }) - contentIndex++ - } - - stopReason := "end_turn" - if len(toolUses) > 0 { - stopReason = "tool_use" - } - - h.sendSSE(w, flusher, "message_delta", map[string]interface{}{ - "type": "message_delta", - "delta": map[string]interface{}{"stop_reason": stopReason}, - "usage": buildClaudeUsageMap(finalInputTokens, outputTokens, cacheUsage, cacheProfile != nil), - }) - h.sendSSE(w, flusher, "message_stop", map[string]interface{}{"type": "message_stop"}) -} - // backgroundStatsSaver 后台定时保存统计数据 func (h *Handler) backgroundStatsSaver() { ticker := time.NewTicker(30 * time.Second) diff --git a/proxy/kiro.go b/proxy/kiro.go index 0a57bb2..bc8233d 100644 --- a/proxy/kiro.go +++ b/proxy/kiro.go @@ -132,9 +132,9 @@ type InferenceConfig struct { TopP float64 `json:"topP,omitempty"` } -// ==================== 流式回调 ==================== +// ==================== Stream Callbacks ==================== -// KiroStreamCallback 流式响应回调 +// KiroStreamCallback stream response callbacks type KiroStreamCallback struct { OnText func(text string, isThinking bool) OnToolUse func(toolUse KiroToolUse) @@ -377,11 +377,12 @@ func updateTokensFromEvent(event map[string]interface{}, currentInputTokens, cur return inputTokens, outputTokens } -// getContextWindowSize 返回模型的上下文窗口大小(token 数) -// Kiro 托管的 Claude 模型窗口由 AWS 硬性规定,此处与官方保持一致 +// getContextWindowSize returns the context window size (in tokens) for a model. func getContextWindowSize(model string) int { m := strings.ToLower(model) - if strings.Contains(m, "4.6") || strings.Contains(m, "4-6") { + // sonnet-4.6, opus-4.6, opus-4.7 all have 1M context windows + if strings.Contains(m, "4.6") || strings.Contains(m, "4-6") || + strings.Contains(m, "4.7") || strings.Contains(m, "4-7") { return 1_000_000 } return 200_000 From 221348b975bde927baa13344518d9da1276b40fe Mon Sep 17 00:00:00 2001 From: Henry Yang <83214045+HenryXiaoYang@users.noreply.github.com> Date: Mon, 11 May 2026 21:01:54 +0800 Subject: [PATCH 3/9] fix: support Claude thinking config routing (#40) --- README.md | 2 +- README_CN.md | 2 +- proxy/handler.go | 143 +++++++++++++++++++---- proxy/handler_test.go | 238 ++++++++++++++++++++++++++++++++++++++- proxy/translator.go | 132 +++++++++++++++++++--- proxy/translator_test.go | 17 +++ 6 files changed, 492 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 49a1263..d0dcf4b 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ curl http://localhost:8080/v1/chat/completions \ ## Thinking Mode -Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Configure output format in the admin panel under Settings - Thinking Mode. +Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Claude-compatible requests that include a top-level `thinking` config such as `{"type":"enabled","budget_tokens":2048}` or `{"type":"adaptive"}` also enable thinking mode automatically. Configure output format in the admin panel under Settings - Thinking Mode. ## Environment Variables diff --git a/README_CN.md b/README_CN.md index b6b79d2..542b2f3 100644 --- a/README_CN.md +++ b/README_CN.md @@ -72,7 +72,7 @@ curl http://localhost:8080/v1/chat/completions \ ## 思考模式 -在模型名后加后缀(默认 `-thinking`)即可启用,例如 `claude-sonnet-4.5-thinking`。输出格式可在管理面板「设置 - Thinking 模式」中配置。 +在模型名后加后缀(默认 `-thinking`)即可启用,例如 `claude-sonnet-4.5-thinking`。Claude 兼容请求如果带有顶层 `thinking` 配置,例如 `{"type":"enabled","budget_tokens":2048}` 或 `{"type":"adaptive"}`,也会自动启用 thinking 模式。输出格式可在管理面板「设置 - Thinking 模式」中配置。 ## 环境变量 diff --git a/proxy/handler.go b/proxy/handler.go index a7fb592..2c42be9 100644 --- a/proxy/handler.go +++ b/proxy/handler.go @@ -66,6 +66,9 @@ func validateClaudeRequestShape(req *ClaudeRequest) string { if len(req.Messages) == 0 { return "messages must not be empty" } + if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" { + return msg + } hasUserContext := false lastRole := "" @@ -94,6 +97,75 @@ func validateClaudeRequestShape(req *ClaudeRequest) string { return "" } +func validateClaudeThinkingConfig(thinking *ClaudeThinkingConfig, maxTokens int) string { + if thinking == nil { + return "" + } + + kind := strings.ToLower(strings.TrimSpace(thinking.Type)) + switch kind { + case "enabled": + if maxTokens == 0 { + return "thinking.type enabled cannot be used with max_tokens=0" + } + if thinking.BudgetTokens <= 0 { + return "thinking.budget_tokens is required when thinking.type is enabled" + } + if thinking.BudgetTokens < 1024 { + return "thinking.budget_tokens must be at least 1024" + } + if maxTokens > 0 && thinking.BudgetTokens >= maxTokens { + return "thinking.budget_tokens must be less than max_tokens" + } + case "adaptive": + if thinking.BudgetTokens != 0 { + return "thinking.budget_tokens is not supported when thinking.type is adaptive" + } + case "disabled": + if thinking.BudgetTokens != 0 { + return "thinking.budget_tokens is not supported when thinking.type is disabled" + } + default: + return "thinking.type must be one of: enabled, adaptive, disabled" + } + + display := strings.ToLower(strings.TrimSpace(thinking.Display)) + if display != "" && display != "summarized" && display != "omitted" { + return "thinking.display must be one of: summarized, omitted" + } + if kind == "disabled" && display != "" { + return "thinking.display is not supported when thinking.type is disabled" + } + + return "" +} + +type claudeThinkingResponseOptions struct { + Format string + OmitDisplay bool +} + +func resolveClaudeThinkingResponseOptions(thinking *ClaudeThinkingConfig, defaultFormat string) claudeThinkingResponseOptions { + opts := claudeThinkingResponseOptions{Format: defaultFormat} + if opts.Format == "" { + opts.Format = "thinking" + } + if thinking == nil { + return opts + } + + display := strings.ToLower(strings.TrimSpace(thinking.Display)) + switch display { + case "summarized": + opts.Format = "thinking" + case "omitted": + opts.Format = "thinking" + opts.OmitDisplay = true + } + + return opts +} + func validateOpenAIRequestShape(req *OpenAIRequest) string { if len(req.Messages) == 0 { return "messages must not be empty" @@ -569,8 +641,17 @@ func (h *Handler) handleCountTokens(w http.ResponseWriter, r *http.Request) { h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON") return } + if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" { + h.sendClaudeError(w, 400, "invalid_request_error", msg) + return + } - estimatedTokens := estimateClaudeRequestInputTokens(&req) + thinkingCfg := config.GetThinkingConfig() + actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix) + req.Model = actualModel + effectiveReq := cloneClaudeRequestForThinking(&req, thinking) + + estimatedTokens := estimateClaudeRequestInputTokens(effectiveReq) if estimatedTokens < 1 { estimatedTokens = 1 } @@ -622,10 +703,12 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re // 解析模型和 thinking 模式 thinkingCfg := config.GetThinkingConfig() - actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix) + actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix) req.Model = actualModel - estimatedInputTokens := estimateClaudeRequestInputTokens(&req) - cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens) + effectiveReq := cloneClaudeRequestForThinking(&req, thinking) + thinkingResponseOpts := resolveClaudeThinkingResponseOptions(req.Thinking, thinkingCfg.ClaudeFormat) + estimatedInputTokens := estimateClaudeRequestInputTokens(effectiveReq) + cacheProfile := h.promptCache.BuildClaudeProfile(effectiveReq, estimatedInputTokens) cacheUsage := h.promptCache.Compute(account.ID, cacheProfile) // 转换请求 @@ -633,14 +716,14 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re // Stream or non-stream if req.Stream { - h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) + h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile) } else { - h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile) + h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile) } } // handleClaudeStream Claude 流式响应 -func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) { +func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) { w.Header().Set("Content-Type", "text/event-stream; charset=utf-8") w.Header().Set("Cache-Control", "no-cache") w.Header().Set("Connection", "keep-alive") @@ -652,7 +735,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco } // 获取 thinking 输出格式配置 - thinkingFormat := config.GetThinkingConfig().ClaudeFormat + thinkingFormat := thinkingOpts.Format msgID := "msg_" + uuid.New().String() var inputTokens, outputTokens int @@ -769,6 +852,19 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco "delta": map[string]string{"type": "text_delta", "text": text}, }) default: + if thinkingOpts.OmitDisplay { + if thinkingState == 1 { + startContentBlock("thinking") + return + } + if thinkingState == 3 { + if activeBlockType != "thinking" { + startContentBlock("thinking") + } + closeActiveBlock() + } + return + } if thinkingState == 3 && text == "" { if activeBlockType == "thinking" { closeActiveBlock() @@ -1103,7 +1199,7 @@ func (h *Handler) recordFailure() { } // handleClaudeNonStream Claude 非流式响应 -func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) { +func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) { var content string var thinkingContent string var toolUses []KiroToolUse @@ -1146,13 +1242,14 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A } // 合并 thinking 内容(如果有 reasoningContentEvent 的内容) - thinkingFormat := config.GetThinkingConfig().ClaudeFormat + thinkingFormat := thinkingOpts.Format finalContent, extractedReasoning := extractThinkingFromContent(content) - if thinking && thinkingContent == "" && extractedReasoning != "" { - thinkingContent = extractedReasoning + rawThinkingContent := thinkingContent + if thinking && rawThinkingContent == "" && extractedReasoning != "" { + rawThinkingContent = extractedReasoning } if !thinking { - thinkingContent = "" + rawThinkingContent = "" } if realInputTokens > 0 { @@ -1160,26 +1257,32 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A } else if inputTokens <= 0 { inputTokens = estimatedInputTokens } - outputTokens = estimateClaudeOutputTokens(finalContent, thinkingContent, toolUses) + outputTokens = estimateClaudeOutputTokens(finalContent, rawThinkingContent, toolUses) h.recordSuccess(inputTokens, outputTokens, credits) h.pool.RecordSuccess(account.ID) h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits) h.promptCache.Update(account.ID, cacheProfile) - if thinking && thinkingContent != "" { + responseThinkingContent := rawThinkingContent + includeEmptyThinkingBlock := thinking && thinkingOpts.OmitDisplay && rawThinkingContent != "" + if includeEmptyThinkingBlock { + responseThinkingContent = "" + } + + if thinking && responseThinkingContent != "" { switch thinkingFormat { case "think": - finalContent = "" + thinkingContent + "" + finalContent - thinkingContent = "" + finalContent = "" + responseThinkingContent + "" + finalContent + responseThinkingContent = "" case "reasoning_content": - finalContent = thinkingContent + finalContent // Claude 格式不支持 reasoning_content,直接拼接 - thinkingContent = "" + finalContent = responseThinkingContent + finalContent // Claude 格式不支持 reasoning_content,直接拼接 + responseThinkingContent = "" default: } } - resp := KiroToClaudeResponse(finalContent, thinkingContent, toolUses, inputTokens, outputTokens, model) + resp := KiroToClaudeResponse(finalContent, responseThinkingContent, includeEmptyThinkingBlock, toolUses, inputTokens, outputTokens, model) resp.Usage.InputTokens = billedClaudeInputTokens(inputTokens, cacheUsage) resp.Usage.CacheCreationInputTokens = cacheUsage.CacheCreationInputTokens resp.Usage.CacheReadInputTokens = cacheUsage.CacheReadInputTokens diff --git a/proxy/handler_test.go b/proxy/handler_test.go index 672092a..e905bf1 100644 --- a/proxy/handler_test.go +++ b/proxy/handler_test.go @@ -1,8 +1,6 @@ package proxy -import ( - "testing" -) +import "testing" func TestThinkingSourceReasoningFirst(t *testing.T) { var source thinkingStreamSource @@ -101,6 +99,240 @@ func TestValidateClaudeRequestShapeRejectsAssistantPrefill(t *testing.T) { } } +func TestResolveClaudeThinkingModeHonorsRequestThinking(t *testing.T) { + tests := []struct { + name string + model string + thinking *ClaudeThinkingConfig + wantModel string + wantThinking bool + }{ + { + name: "adaptive request enables thinking", + model: "claude-sonnet-4.6", + thinking: &ClaudeThinkingConfig{Type: "adaptive"}, + wantModel: "claude-sonnet-4.6", + wantThinking: true, + }, + { + name: "enabled request enables thinking", + model: "claude-opus-4.5", + thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 2048}, + wantModel: "claude-opus-4.5", + wantThinking: true, + }, + { + name: "disabled request keeps thinking off", + model: "claude-opus-4.7", + thinking: &ClaudeThinkingConfig{Type: "disabled"}, + wantModel: "claude-opus-4.7", + wantThinking: false, + }, + { + name: "suffix remains supported when thinking is disabled", + model: "claude-sonnet-4.5-thinking", + thinking: &ClaudeThinkingConfig{Type: "disabled"}, + wantModel: "claude-sonnet-4.5", + wantThinking: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + gotModel, gotThinking := resolveClaudeThinkingMode(tc.model, tc.thinking, "-thinking") + if gotModel != tc.wantModel { + t.Fatalf("expected model %q, got %q", tc.wantModel, gotModel) + } + if gotThinking != tc.wantThinking { + t.Fatalf("expected thinking=%v, got %v", tc.wantThinking, gotThinking) + } + }) + } +} + +func TestCloneClaudeRequestForThinkingInjectsPromptWithoutMutatingOriginal(t *testing.T) { + req := &ClaudeRequest{ + Model: "claude-sonnet-4.6", + System: "Follow the user instructions.", + } + + cloned := cloneClaudeRequestForThinking(req, true) + blocks, ok := cloned.System.([]interface{}) + if !ok { + t.Fatalf("expected cloned system prompt to be structured blocks, got %T", cloned.System) + } + if len(blocks) != 2 { + t.Fatalf("expected 2 system blocks after prepend, got %d", len(blocks)) + } + gotPrompt := extractSystemPrompt(cloned.System) + expected := ThinkingModePrompt + "\n\nFollow the user instructions." + if gotPrompt != expected { + t.Fatalf("expected injected system prompt %q, got %q", expected, gotPrompt) + } + if original, ok := req.System.(string); !ok || original != "Follow the user instructions." { + t.Fatalf("expected original request system prompt to stay unchanged, got %#v", req.System) + } +} + +func TestCloneClaudeRequestForThinkingPreservesStructuredSystemBlocks(t *testing.T) { + req := &ClaudeRequest{ + Model: "claude-sonnet-4.6", + System: []interface{}{ + map[string]interface{}{ + "type": "text", + "text": "cached system", + "cache_control": map[string]interface{}{ + "type": "ephemeral", + "ttl": "5m", + }, + }, + }, + } + + cloned := cloneClaudeRequestForThinking(req, true) + blocks, ok := cloned.System.([]interface{}) + if !ok { + t.Fatalf("expected structured system blocks, got %T", cloned.System) + } + if len(blocks) != 2 { + t.Fatalf("expected 2 system blocks after prepend, got %d", len(blocks)) + } + first, ok := blocks[0].(map[string]interface{}) + if !ok || first["text"] != ThinkingModePrompt+"\n" { + t.Fatalf("expected first block to be thinking prompt, got %#v", blocks[0]) + } + second, ok := blocks[1].(map[string]interface{}) + if !ok { + t.Fatalf("expected original system block to remain a map, got %T", blocks[1]) + } + cacheControl, ok := second["cache_control"].(map[string]interface{}) + if !ok || cacheControl["type"] != "ephemeral" { + t.Fatalf("expected original cache_control to be preserved, got %#v", second["cache_control"]) + } +} + +func TestThinkingPromptAffectsClaudeTokenEstimate(t *testing.T) { + req := &ClaudeRequest{ + Model: "claude-sonnet-4.6", + Messages: []ClaudeMessage{{Role: "user", Content: "hello"}}, + } + + baseTokens := estimateClaudeRequestInputTokens(req) + thinkingTokens := estimateClaudeRequestInputTokens(cloneClaudeRequestForThinking(req, true)) + + if thinkingTokens <= baseTokens { + t.Fatalf("expected thinking tokens (%d) to exceed base tokens (%d)", thinkingTokens, baseTokens) + } +} + +func TestValidateClaudeThinkingConfig(t *testing.T) { + tests := []struct { + name string + thinking *ClaudeThinkingConfig + maxTokens int + expectError bool + }{ + { + name: "adaptive is valid", + thinking: &ClaudeThinkingConfig{Type: "adaptive"}, + maxTokens: 4096, + expectError: false, + }, + { + name: "enabled requires budget", + thinking: &ClaudeThinkingConfig{Type: "enabled"}, + maxTokens: 4096, + expectError: true, + }, + { + name: "enabled requires at least 1024 budget tokens", + thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 512}, + maxTokens: 4096, + expectError: true, + }, + { + name: "enabled rejects max tokens zero", + thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 2048}, + maxTokens: 0, + expectError: true, + }, + { + name: "enabled budget must stay below max tokens", + thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 4096}, + maxTokens: 4096, + expectError: true, + }, + { + name: "disabled rejects display", + thinking: &ClaudeThinkingConfig{Type: "disabled", Display: "summarized"}, + maxTokens: 4096, + expectError: true, + }, + { + name: "missing type is rejected", + thinking: &ClaudeThinkingConfig{}, + maxTokens: 4096, + expectError: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + errMsg := validateClaudeThinkingConfig(tc.thinking, tc.maxTokens) + if tc.expectError && errMsg == "" { + t.Fatalf("expected validation error") + } + if !tc.expectError && errMsg != "" { + t.Fatalf("expected thinking config to be valid, got %q", errMsg) + } + }) + } +} + +func TestResolveClaudeThinkingResponseOptions(t *testing.T) { + tests := []struct { + name string + thinking *ClaudeThinkingConfig + defaultFmt string + wantFmt string + wantOmit bool + }{ + { + name: "default config is preserved when display unset", + thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 2048}, + defaultFmt: "think", + wantFmt: "think", + wantOmit: false, + }, + { + name: "summarized forces official thinking blocks", + thinking: &ClaudeThinkingConfig{Type: "adaptive", Display: "summarized"}, + defaultFmt: "reasoning_content", + wantFmt: "thinking", + wantOmit: false, + }, + { + name: "omitted forces official thinking blocks and hides content", + thinking: &ClaudeThinkingConfig{Type: "adaptive", Display: "omitted"}, + defaultFmt: "think", + wantFmt: "thinking", + wantOmit: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + opts := resolveClaudeThinkingResponseOptions(tc.thinking, tc.defaultFmt) + if opts.Format != tc.wantFmt { + t.Fatalf("expected format %q, got %q", tc.wantFmt, opts.Format) + } + if opts.OmitDisplay != tc.wantOmit { + t.Fatalf("expected omitDisplay=%v, got %v", tc.wantOmit, opts.OmitDisplay) + } + }) + } +} + func TestMergeUniqueModelsPreservesUnionAcrossAccounts(t *testing.T) { base := []ModelInfo{ {ModelId: "claude-sonnet-4.5", InputTypes: []string{"TEXT"}}, diff --git a/proxy/translator.go b/proxy/translator.go index 500b74e..38b562e 100644 --- a/proxy/translator.go +++ b/proxy/translator.go @@ -76,6 +76,19 @@ func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) { return model, thinking } +func resolveClaudeThinkingMode(model string, thinkingCfg *ClaudeThinkingConfig, thinkingSuffix string) (string, bool) { + actualModel, suffixThinking := ParseModelAndThinking(model, thinkingSuffix) + return actualModel, suffixThinking || isClaudeThinkingRequested(thinkingCfg) +} + +func isClaudeThinkingRequested(thinkingCfg *ClaudeThinkingConfig) bool { + if thinkingCfg == nil { + return false + } + kind := strings.ToLower(strings.TrimSpace(thinkingCfg.Type)) + return kind == "enabled" || kind == "adaptive" +} + func MapModel(model string) string { mapped, _ := ParseModelAndThinking(model, "-thinking") return mapped @@ -84,15 +97,22 @@ func MapModel(model string) string { // ==================== Claude API 类型 ==================== type ClaudeRequest struct { - Model string `json:"model"` - Messages []ClaudeMessage `json:"messages"` - MaxTokens int `json:"max_tokens"` - Temperature float64 `json:"temperature,omitempty"` - TopP float64 `json:"top_p,omitempty"` - Stream bool `json:"stream,omitempty"` - System interface{} `json:"system,omitempty"` // string or []SystemBlock - Tools []ClaudeTool `json:"tools,omitempty"` - ToolChoice interface{} `json:"tool_choice,omitempty"` + Model string `json:"model"` + Messages []ClaudeMessage `json:"messages"` + MaxTokens int `json:"max_tokens"` + Temperature float64 `json:"temperature,omitempty"` + TopP float64 `json:"top_p,omitempty"` + Stream bool `json:"stream,omitempty"` + System interface{} `json:"system,omitempty"` // string or []SystemBlock + Thinking *ClaudeThinkingConfig `json:"thinking,omitempty"` + Tools []ClaudeTool `json:"tools,omitempty"` + ToolChoice interface{} `json:"tool_choice,omitempty"` +} + +type ClaudeThinkingConfig struct { + Type string `json:"type,omitempty"` + BudgetTokens int `json:"budget_tokens,omitempty"` + Display string `json:"display,omitempty"` } type ClaudeMessage struct { @@ -104,6 +124,7 @@ type ClaudeContentBlock struct { Type string `json:"type"` Text string `json:"text,omitempty"` Thinking string `json:"thinking,omitempty"` + Signature string `json:"signature,omitempty"` ID string `json:"id,omitempty"` Name string `json:"name,omitempty"` Input interface{} `json:"input,omitempty"` @@ -157,12 +178,7 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload { origin := "AI_EDITOR" // 提取系统提示 - systemPrompt := extractSystemPrompt(req.System) - - // 如果启用 thinking 模式,注入 thinking 提示 - if thinking { - systemPrompt = ThinkingModePrompt + "\n\n" + systemPrompt - } + systemPrompt := buildClaudeSystemPrompt(req.System, thinking) // 构建历史消息 history := make([]KiroHistoryMessage, 0) @@ -263,6 +279,88 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload { return payload } +func buildClaudeSystemPrompt(system interface{}, thinking bool) string { + systemPrompt := extractSystemPrompt(system) + if !thinking { + return systemPrompt + } + if systemPrompt == "" { + return ThinkingModePrompt + } + return ThinkingModePrompt + "\n\n" + systemPrompt +} + +func cloneClaudeRequestForThinking(req *ClaudeRequest, thinking bool) *ClaudeRequest { + if req == nil { + return nil + } + + cloned := *req + if thinking { + cloned.System = prependThinkingSystem(req.System) + } + return &cloned +} + +func prependThinkingSystem(system interface{}) interface{} { + thinkingText := ThinkingModePrompt + if hasClaudeSystemContent(system) { + thinkingText += "\n" + } + thinkingBlock := map[string]interface{}{ + "type": "text", + "text": thinkingText, + } + + switch v := system.(type) { + case nil: + return []interface{}{thinkingBlock} + case string: + if v == "" { + return []interface{}{thinkingBlock} + } + return []interface{}{ + thinkingBlock, + map[string]interface{}{ + "type": "text", + "text": v, + }, + } + case []interface{}: + blocks := make([]interface{}, 0, len(v)+1) + blocks = append(blocks, thinkingBlock) + blocks = append(blocks, v...) + return blocks + case []string: + blocks := make([]interface{}, 0, len(v)+1) + blocks = append(blocks, thinkingBlock) + for _, block := range v { + blocks = append(blocks, map[string]interface{}{ + "type": "text", + "text": block, + }) + } + return blocks + default: + return []interface{}{thinkingBlock} + } +} + +func hasClaudeSystemContent(system interface{}) bool { + switch v := system.(type) { + case nil: + return false + case string: + return v != "" + case []interface{}: + return len(v) > 0 + case []string: + return len(v) > 0 + default: + return true + } +} + func extractSystemPrompt(system interface{}) string { if system == nil { return "" @@ -459,10 +557,10 @@ func shortenToolName(name string) string { // ==================== Kiro -> Claude 转换 ==================== -func KiroToClaudeResponse(content, thinkingContent string, toolUses []KiroToolUse, inputTokens, outputTokens int, model string) *ClaudeResponse { +func KiroToClaudeResponse(content, thinkingContent string, includeEmptyThinkingBlock bool, toolUses []KiroToolUse, inputTokens, outputTokens int, model string) *ClaudeResponse { blocks := make([]ClaudeContentBlock, 0) - if thinkingContent != "" { + if thinkingContent != "" || includeEmptyThinkingBlock { blocks = append(blocks, ClaudeContentBlock{ Type: "thinking", Thinking: thinkingContent, diff --git a/proxy/translator_test.go b/proxy/translator_test.go index 7c5dc43..e0f276f 100644 --- a/proxy/translator_test.go +++ b/proxy/translator_test.go @@ -233,6 +233,23 @@ func TestClaudeToKiroDropsLeadingAssistantHistory(t *testing.T) { } } +func TestKiroToClaudeResponseCanEmitEmptyThinkingBlock(t *testing.T) { + resp := KiroToClaudeResponse("final answer", "", true, nil, 10, 20, "claude-sonnet-4.6") + + if len(resp.Content) != 2 { + t.Fatalf("expected empty thinking block plus text block, got %d blocks", len(resp.Content)) + } + if resp.Content[0].Type != "thinking" { + t.Fatalf("expected first block to be thinking, got %#v", resp.Content[0]) + } + if resp.Content[0].Thinking != "" { + t.Fatalf("expected omitted thinking block to have empty content, got %#v", resp.Content[0].Thinking) + } + if resp.Content[1].Type != "text" || resp.Content[1].Text != "final answer" { + t.Fatalf("expected text block to be preserved, got %#v", resp.Content[1]) + } +} + func TestToolResultsContinuationIncludesInstructionPrefix(t *testing.T) { req := &OpenAIRequest{ Model: "claude-sonnet-4.5", From 404e2425fa6a660520187d86762a1cfb86f03dfd Mon Sep 17 00:00:00 2001 From: Quorinex Date: Mon, 11 May 2026 21:40:45 +0800 Subject: [PATCH 4/9] feat: add outbound proxy support (socks5/http) for restricted networks --- auth/builderid.go | 4 ++-- auth/http_client.go | 50 +++++++++++++++++++++++++++++--------- auth/iam_sso.go | 4 ++-- auth/oidc.go | 4 ++-- auth/sso_token.go | 16 ++++++------- config/config.go | 21 ++++++++++++++++ proxy/handler.go | 55 ++++++++++++++++++++++++++++++++++++++++++ proxy/kiro.go | 58 ++++++++++++++++++++++++++++++++------------- web/index.html | 35 +++++++++++++++++++++++++++ 9 files changed, 205 insertions(+), 42 deletions(-) diff --git a/auth/builderid.go b/auth/builderid.go index 460ad6b..21a74d9 100644 --- a/auth/builderid.go +++ b/auth/builderid.go @@ -57,7 +57,7 @@ func StartBuilderIdLogin(region string) (*BuilderIdSession, error) { regReq, _ := http.NewRequest("POST", oidcBase+"/client/register", bytes.NewReader(regBody)) regReq.Header.Set("Content-Type", "application/json") - client := httpClient + client := httpClient() regResp, err := client.Do(regReq) if err != nil { return nil, fmt.Errorf("register client failed: %v", err) @@ -175,7 +175,7 @@ func PollBuilderIdAuth(sessionID string) (accessToken, refreshToken, clientID, c tokenReq, _ := http.NewRequest("POST", oidcBase+"/token", bytes.NewReader(tokenBody)) tokenReq.Header.Set("Content-Type", "application/json") - client := httpClient + client := httpClient() tokenResp, err := client.Do(tokenReq) if err != nil { return "", "", "", "", "", 0, "", fmt.Errorf("token request failed: %v", err) diff --git a/auth/http_client.go b/auth/http_client.go index 836fb7c..fa5443e 100644 --- a/auth/http_client.go +++ b/auth/http_client.go @@ -3,18 +3,46 @@ package auth import ( "net/http" + "net/url" + "sync/atomic" "time" ) -// 全局 HTTP 客户端,复用连接池 -// 用于所有 auth 模块的 HTTP 请求 -var httpClient = &http.Client{ - Timeout: 30 * time.Second, - Transport: &http.Transport{ - MaxIdleConns: 50, // 最大空闲连接数 - MaxIdleConnsPerHost: 10, // 每个 Host 最大空闲连接数 - IdleConnTimeout: 90 * time.Second, // 空闲连接超时 - DisableCompression: false, // 启用压缩 - ForceAttemptHTTP2: true, // 尝试使用 HTTP/2 - }, +// 全局 HTTP 客户端存储,支持运行时代理重配置 +var httpClientStore atomic.Pointer[http.Client] + +// httpClient 返回当前全局 auth HTTP 客户端 +func httpClient() *http.Client { + return httpClientStore.Load() +} + +func init() { + InitHttpClient("") +} + +// buildAuthTransport 构建带可选代理的 Transport +func buildAuthTransport(proxyURL string) *http.Transport { + t := &http.Transport{ + MaxIdleConns: 50, + MaxIdleConnsPerHost: 10, + IdleConnTimeout: 90 * time.Second, + DisableCompression: false, + ForceAttemptHTTP2: true, + } + if proxyURL != "" { + if u, err := url.Parse(proxyURL); err == nil { + t.Proxy = http.ProxyURL(u) + t.ForceAttemptHTTP2 = false + } + } + return t +} + +// InitHttpClient 初始化(或重新初始化)auth 模块的全局 HTTP 客户端 +func InitHttpClient(proxyURL string) { + client := &http.Client{ + Timeout: 30 * time.Second, + Transport: buildAuthTransport(proxyURL), + } + httpClientStore.Store(client) } diff --git a/auth/iam_sso.go b/auth/iam_sso.go index e17e4eb..bfd4a4a 100644 --- a/auth/iam_sso.go +++ b/auth/iam_sso.go @@ -170,7 +170,7 @@ func registerOIDCClient(oidcBase, startUrl, redirectUri string) (clientID, clien req, _ := http.NewRequest("POST", oidcBase+"/client/register", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") - resp, err := httpClient.Do(req) + resp, err := httpClient().Do(req) if err != nil { return "", "", err } @@ -207,7 +207,7 @@ func exchangeToken(oidcBase, clientID, clientSecret, code, codeVerifier, redirec req, _ := http.NewRequest("POST", oidcBase+"/token", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") - resp, err := httpClient.Do(req) + resp, err := httpClient().Do(req) if err != nil { return "", "", 0, err } diff --git a/auth/oidc.go b/auth/oidc.go index 5a405d6..7dcb494 100644 --- a/auth/oidc.go +++ b/auth/oidc.go @@ -40,7 +40,7 @@ func refreshOIDCToken(refreshToken, clientID, clientSecret, region string) (stri req, _ := http.NewRequest("POST", url, bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") - resp, err := httpClient.Do(req) + resp, err := httpClient().Do(req) if err != nil { return "", "", 0, err } @@ -77,7 +77,7 @@ func refreshSocialToken(refreshToken string) (string, string, int64, error) { req, _ := http.NewRequest("POST", url, bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") - resp, err := httpClient.Do(req) + resp, err := httpClient().Do(req) if err != nil { return "", "", 0, err } diff --git a/auth/sso_token.go b/auth/sso_token.go index 22da746..dee0540 100644 --- a/auth/sso_token.go +++ b/auth/sso_token.go @@ -79,7 +79,7 @@ func registerDeviceClient(oidcBase, startUrl string) (clientID, clientSecret str req, _ := http.NewRequest("POST", oidcBase+"/client/register", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") - client := httpClient + client := httpClient() resp, err := client.Do(req) if err != nil { return "", "", err @@ -110,7 +110,7 @@ func startDeviceAuth(oidcBase, clientID, clientSecret, startUrl string) (deviceC req, _ := http.NewRequest("POST", oidcBase+"/device_authorization", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") - client := httpClient + client := httpClient() resp, err := client.Do(req) if err != nil { return "", "", 0, err @@ -139,7 +139,7 @@ func verifyBearerToken(portalBase, bearerToken string) error { req.Header.Set("Authorization", "Bearer "+bearerToken) req.Header.Set("Accept", "application/json") - client := httpClient + client := httpClient() resp, err := client.Do(req) if err != nil { return err @@ -157,7 +157,7 @@ func getDeviceSessionToken(portalBase, bearerToken string) (string, error) { req.Header.Set("Authorization", "Bearer "+bearerToken) req.Header.Set("Content-Type", "application/json") - client := httpClient + client := httpClient() resp, err := client.Do(req) if err != nil { return "", err @@ -193,7 +193,7 @@ func acceptUserCode(oidcBase, userCode, deviceSessionToken string) (*deviceConte req.Header.Set("Content-Type", "application/json") req.Header.Set("Referer", "https://view.awsapps.com/") - client := httpClient + client := httpClient() resp, err := client.Do(req) if err != nil { return nil, err @@ -227,7 +227,7 @@ func approveAuth(oidcBase string, deviceContext *deviceContextInfo, deviceSessio req.Header.Set("Content-Type", "application/json") req.Header.Set("Referer", "https://view.awsapps.com/") - client := httpClient + client := httpClient() resp, err := client.Do(req) if err != nil { return err @@ -262,7 +262,7 @@ func pollForToken(oidcBase, clientID, clientSecret, deviceCode string, interval req, _ := http.NewRequest("POST", oidcBase+"/token", bytes.NewReader(body)) req.Header.Set("Content-Type", "application/json") - client := httpClient + client := httpClient() resp, err := client.Do(req) if err != nil { continue @@ -311,7 +311,7 @@ func GetUserInfo(accessToken string) (email, userID string, err error) { req.Header.Set("User-Agent", "aws-sdk-js/1.0.18 KiroAPIProxy") req.Header.Set("x-amz-user-agent", "aws-sdk-js/1.0.18 KiroAPIProxy") - client := httpClient + client := httpClient() resp, err := client.Do(req) if err != nil { return "", "", err diff --git a/config/config.go b/config/config.go index f3ab37e..7195fde 100644 --- a/config/config.go +++ b/config/config.go @@ -108,6 +108,12 @@ type Config struct { // Endpoint configuration: "auto", "codewhisperer", or "amazonq" PreferredEndpoint string `json:"preferredEndpoint,omitempty"` + // Proxy configuration: optional outbound proxy for Kiro API requests + // Format: "socks5://host:port", "socks5://user:pass@host:port", + // "http://host:port", "http://user:pass@host:port" + // Leave empty to connect directly. + ProxyURL string `json:"proxyURL,omitempty"` + // Global statistics (persisted across restarts) TotalRequests int `json:"totalRequests,omitempty"` // Total API requests received SuccessRequests int `json:"successRequests,omitempty"` // Successful requests count @@ -445,6 +451,21 @@ func UpdatePreferredEndpoint(endpoint string) error { return Save() } +// GetProxyURL 获取出站代理地址 +func GetProxyURL() string { + cfgLock.RLock() + defer cfgLock.RUnlock() + return cfg.ProxyURL +} + +// UpdateProxySettings 更新出站代理配置 +func UpdateProxySettings(proxyURL string) error { + cfgLock.Lock() + defer cfgLock.Unlock() + cfg.ProxyURL = proxyURL + return Save() +} + type KiroClientConfig struct { KiroVersion string SystemVersion string diff --git a/proxy/handler.go b/proxy/handler.go index 2c42be9..8b1c783 100644 --- a/proxy/handler.go +++ b/proxy/handler.go @@ -206,6 +206,9 @@ func validateOpenAIRequestShape(req *OpenAIRequest) string { } func NewHandler() *Handler { + // 启动时应用代理配置 + applyProxyConfig(config.GetProxyURL()) + totalReq, successReq, failedReq, totalTokens, totalCredits := config.GetStats() h := &Handler{ pool: pool.GetPool(), @@ -1908,6 +1911,10 @@ func (h *Handler) handleAdminAPI(w http.ResponseWriter, r *http.Request) { h.apiGetEndpointConfig(w, r) case path == "/endpoint" && r.Method == "POST": h.apiUpdateEndpointConfig(w, r) + case path == "/proxy" && r.Method == "GET": + h.apiGetProxy(w, r) + case path == "/proxy" && r.Method == "POST": + h.apiUpdateProxy(w, r) case path == "/version" && r.Method == "GET": h.apiGetVersion(w, r) case path == "/export" && r.Method == "POST": @@ -2872,6 +2879,54 @@ func (h *Handler) apiUpdateEndpointConfig(w http.ResponseWriter, r *http.Request json.NewEncoder(w).Encode(map[string]bool{"success": true}) } +// applyProxyConfig 将代理配置应用到所有出站 HTTP 客户端(Kiro API + auth 模块) +func applyProxyConfig(proxyURL string) { + InitKiroHttpClient(proxyURL) + auth.InitHttpClient(proxyURL) +} + +// apiGetProxy 获取当前代理配置 +func (h *Handler) apiGetProxy(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(map[string]string{ + "proxyURL": config.GetProxyURL(), + }) +} + +// apiUpdateProxy 更新代理配置并立即生效 +func (h *Handler) apiUpdateProxy(w http.ResponseWriter, r *http.Request) { + var req struct { + ProxyURL string `json:"proxyURL"` + } + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + w.WriteHeader(400) + json.NewEncoder(w).Encode(map[string]string{"error": "Invalid JSON"}) + return + } + + // 验证代理 URL 格式(非空时) + if req.ProxyURL != "" { + if !strings.HasPrefix(req.ProxyURL, "http://") && + !strings.HasPrefix(req.ProxyURL, "https://") && + !strings.HasPrefix(req.ProxyURL, "socks5://") && + !strings.HasPrefix(req.ProxyURL, "socks5h://") { + w.WriteHeader(400) + json.NewEncoder(w).Encode(map[string]string{"error": "proxyURL must start with http://, https://, socks5://, or socks5h://"}) + return + } + } + + if err := config.UpdateProxySettings(req.ProxyURL); err != nil { + w.WriteHeader(500) + json.NewEncoder(w).Encode(map[string]string{"error": err.Error()}) + return + } + + // 立即应用新的代理配置 + applyProxyConfig(req.ProxyURL) + + json.NewEncoder(w).Encode(map[string]bool{"success": true}) +} + // apiGetVersion 获取版本信息 func (h *Handler) apiGetVersion(w http.ResponseWriter, r *http.Request) { json.NewEncoder(w).Encode(map[string]string{ diff --git a/proxy/kiro.go b/proxy/kiro.go index bc8233d..370cb08 100644 --- a/proxy/kiro.go +++ b/proxy/kiro.go @@ -12,6 +12,7 @@ import ( "net/url" "strconv" "strings" + "sync/atomic" "time" "github.com/google/uuid" @@ -40,16 +41,39 @@ var kiroEndpoints = []kiroEndpoint{ }, } -// 全局 HTTP 客户端,复用连接池 -var kiroHttpClient = &http.Client{ - Timeout: 5 * time.Minute, - Transport: &http.Transport{ - MaxIdleConns: 100, // 最大空闲连接数 - MaxIdleConnsPerHost: 20, // 每个 Host 最大空闲连接数 - IdleConnTimeout: 90 * time.Second, // 空闲连接超时 - DisableCompression: false, // 启用压缩 - ForceAttemptHTTP2: true, // 尝试使用 HTTP/2 - }, +// 全局 HTTP 客户端,支持运行时更换(代理重配置) +var kiroHttpStore atomic.Pointer[http.Client] + +func init() { + InitKiroHttpClient("") +} + +// buildKiroTransport 构建带可选代理的 Transport +func buildKiroTransport(proxyURL string) *http.Transport { + t := &http.Transport{ + MaxIdleConns: 100, + MaxIdleConnsPerHost: 20, + IdleConnTimeout: 90 * time.Second, + DisableCompression: false, + ForceAttemptHTTP2: true, + } + if proxyURL != "" { + if u, err := url.Parse(proxyURL); err == nil { + t.Proxy = http.ProxyURL(u) + // 代理不支持 HTTP/2 协议升级 + t.ForceAttemptHTTP2 = false + } + } + return t +} + +// InitKiroHttpClient 初始化(或重新初始化)Kiro API 的 HTTP 客户端 +func InitKiroHttpClient(proxyURL string) { + client := &http.Client{ + Timeout: 5 * time.Minute, + Transport: buildKiroTransport(proxyURL), + } + kiroHttpStore.Store(client) } // ==================== 请求结构 ==================== @@ -136,12 +160,12 @@ type InferenceConfig struct { // KiroStreamCallback stream response callbacks type KiroStreamCallback struct { - OnText func(text string, isThinking bool) - OnToolUse func(toolUse KiroToolUse) - OnComplete func(inputTokens, outputTokens int) - OnError func(err error) - OnCredits func(credits float64) - OnContextUsage func(percentage float64) + OnText func(text string, isThinking bool) + OnToolUse func(toolUse KiroToolUse) + OnComplete func(inputTokens, outputTokens int) + OnError func(err error) + OnCredits func(credits float64) + OnContextUsage func(percentage float64) } // ==================== API 调用 ==================== @@ -194,7 +218,7 @@ func CallKiroAPI(account *config.Account, payload *KiroPayload, callback *KiroSt req.Header.Set("Amz-Sdk-Request", "attempt=1; max=3") req.Header.Set("Amz-Sdk-Invocation-Id", uuid.New().String()) - resp, err := kiroHttpClient.Do(req) + resp, err := kiroHttpStore.Load().Do(req) if err != nil { lastErr = err fmt.Printf("[KiroAPI] Endpoint %s failed: %v\n", ep.Name, err) diff --git a/web/index.html b/web/index.html index bda7341..8b0790b 100644 --- a/web/index.html +++ b/web/index.html @@ -1017,6 +1017,15 @@ id="newPassword" data-i18n-placeholder="settings.newPasswordPlaceholder"> +
+
+
+ + + +
+ +
@@ -1146,6 +1155,12 @@ 'settings.statistics': '统计', 'settings.resetStats': '重置统计', 'settings.confirmReset': '确定重置统计?', + 'settings.proxySettings': '出站代理设置', + 'settings.proxyURL': '代理地址', + 'settings.proxyURLPlaceholder': '留空则直连(不使用代理)', + 'settings.proxyURLHint': '支持 socks5://host:port、socks5://user:pass@host:port、http://host:port 格式,适用于网络受限地区的用户', + 'settings.saveProxy': '保存代理设置', + 'settings.proxySaved': '代理设置已保存,已即时生效', 'api.endpoints': 'API 端点', 'api.modelList': '模型列表', 'api.stats': '统计数据', @@ -1352,6 +1367,12 @@ 'settings.statistics': 'Statistics', 'settings.resetStats': 'Reset Statistics', 'settings.confirmReset': 'Confirm reset statistics?', + 'settings.proxySettings': 'Outbound Proxy Settings', + 'settings.proxyURL': 'Proxy URL', + 'settings.proxyURLPlaceholder': 'Leave empty to connect directly', + 'settings.proxyURLHint': 'Supports socks5://host:port, socks5://user:pass@host:port, http://host:port. For users in restricted network regions.', + 'settings.saveProxy': 'Save Proxy Settings', + 'settings.proxySaved': 'Proxy settings saved and applied', 'api.endpoints': 'API Endpoints', 'api.modelList': 'Model List', 'api.stats': 'Statistics', @@ -1991,6 +2012,7 @@ document.getElementById('apiKeyInput').value = d.apiKey || ''; loadThinkingConfig(); loadEndpointConfig(); + loadProxyConfig(); } async function loadThinkingConfig() { const res = await fetch('/admin/api/thinking', { headers: { 'X-Admin-Password': password } }); @@ -2020,6 +2042,19 @@ const d = await res.json(); if (d.success) { alert(t('settings.endpointSaved')); } else { alert(t('common.saveFailed') + ': ' + d.error); } } + async function loadProxyConfig() { + const res = await fetch('/admin/api/proxy', { headers: { 'X-Admin-Password': password } }); + const d = await res.json(); + document.getElementById('proxyURLInput').value = d.proxyURL || ''; + } + async function saveProxyConfig() { + const res = await fetch('/admin/api/proxy', { + method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Admin-Password': password }, + body: JSON.stringify({ proxyURL: document.getElementById('proxyURLInput').value.trim() }) + }); + const d = await res.json(); + if (d.success) { alert(t('settings.proxySaved')); } else { alert(t('common.saveFailed') + ': ' + d.error); } + } function generateApiKey() { const chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'; let key = 'sk-'; From 50f1a7e5ad7bea964fd52fa9081f3953e0f83709 Mon Sep 17 00:00:00 2001 From: Quorinex Date: Mon, 11 May 2026 21:54:42 +0800 Subject: [PATCH 5/9] refactor: improve proxy settings UI with type selector and structured fields --- web/index.html | 82 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 11 deletions(-) diff --git a/web/index.html b/web/index.html index 8b0790b..2dd725c 100644 --- a/web/index.html +++ b/web/index.html @@ -1020,9 +1020,28 @@
- - - + + +
+
@@ -1156,9 +1175,13 @@ 'settings.resetStats': '重置统计', 'settings.confirmReset': '确定重置统计?', 'settings.proxySettings': '出站代理设置', - 'settings.proxyURL': '代理地址', - 'settings.proxyURLPlaceholder': '留空则直连(不使用代理)', - 'settings.proxyURLHint': '支持 socks5://host:port、socks5://user:pass@host:port、http://host:port 格式,适用于网络受限地区的用户', + 'settings.proxyType': '代理类型', + 'settings.proxyNone': '直连(不使用代理)', + 'settings.proxyHost': '地址 / 端口', + 'settings.proxyAuth': '认证(可选)', + 'settings.proxyUsername': '用户名', + 'settings.proxyPassword': '密码', + 'settings.proxyHostRequired': '请填写代理地址和端口', 'settings.saveProxy': '保存代理设置', 'settings.proxySaved': '代理设置已保存,已即时生效', 'api.endpoints': 'API 端点', @@ -1368,9 +1391,13 @@ 'settings.resetStats': 'Reset Statistics', 'settings.confirmReset': 'Confirm reset statistics?', 'settings.proxySettings': 'Outbound Proxy Settings', - 'settings.proxyURL': 'Proxy URL', - 'settings.proxyURLPlaceholder': 'Leave empty to connect directly', - 'settings.proxyURLHint': 'Supports socks5://host:port, socks5://user:pass@host:port, http://host:port. For users in restricted network regions.', + 'settings.proxyType': 'Proxy Type', + 'settings.proxyNone': 'Direct (no proxy)', + 'settings.proxyHost': 'Host / Port', + 'settings.proxyAuth': 'Authentication (optional)', + 'settings.proxyUsername': 'Username', + 'settings.proxyPassword': 'Password', + 'settings.proxyHostRequired': 'Please enter proxy host and port', 'settings.saveProxy': 'Save Proxy Settings', 'settings.proxySaved': 'Proxy settings saved and applied', 'api.endpoints': 'API Endpoints', @@ -2045,12 +2072,45 @@ async function loadProxyConfig() { const res = await fetch('/admin/api/proxy', { headers: { 'X-Admin-Password': password } }); const d = await res.json(); - document.getElementById('proxyURLInput').value = d.proxyURL || ''; + const proxyURL = d.proxyURL || ''; + if (!proxyURL) { + document.getElementById('proxyType').value = 'none'; + document.getElementById('proxyFields').style.display = 'none'; + return; + } + try { + const u = new URL(proxyURL); + const scheme = u.protocol.replace(':', ''); + document.getElementById('proxyType').value = scheme.startsWith('socks5') ? 'socks5' : 'http'; + document.getElementById('proxyHost').value = u.hostname; + document.getElementById('proxyPort').value = u.port; + document.getElementById('proxyUsername').value = decodeURIComponent(u.username); + document.getElementById('proxyPassword').value = decodeURIComponent(u.password); + document.getElementById('proxyFields').style.display = ''; + } catch(e) { + document.getElementById('proxyType').value = 'none'; + document.getElementById('proxyFields').style.display = 'none'; + } + } + function onProxyTypeChange() { + const type = document.getElementById('proxyType').value; + document.getElementById('proxyFields').style.display = type === 'none' ? 'none' : ''; } async function saveProxyConfig() { + const type = document.getElementById('proxyType').value; + let proxyURL = ''; + if (type !== 'none') { + const host = document.getElementById('proxyHost').value.trim(); + const port = document.getElementById('proxyPort').value.trim(); + if (!host || !port) { alert(t('settings.proxyHostRequired')); return; } + const user = document.getElementById('proxyUsername').value.trim(); + const pass = document.getElementById('proxyPassword').value.trim(); + const auth = user ? (pass ? `${encodeURIComponent(user)}:${encodeURIComponent(pass)}@` : `${encodeURIComponent(user)}@`) : ''; + proxyURL = `${type}://${auth}${host}:${port}`; + } const res = await fetch('/admin/api/proxy', { method: 'POST', headers: { 'Content-Type': 'application/json', 'X-Admin-Password': password }, - body: JSON.stringify({ proxyURL: document.getElementById('proxyURLInput').value.trim() }) + body: JSON.stringify({ proxyURL }) }); const d = await res.json(); if (d.success) { alert(t('settings.proxySaved')); } else { alert(t('common.saveFailed') + ': ' + d.error); } From 0e03808b0daea6f6489337bf6b36ed17e1612f8f Mon Sep 17 00:00:00 2001 From: Quorinex Date: Mon, 11 May 2026 22:01:23 +0800 Subject: [PATCH 6/9] ci: parallel native arm64/amd64 builds, add Go BuildKit cache mounts --- .github/workflows/docker.yml | 100 ++++++++++++++++++++++++++++++----- Dockerfile | 7 ++- 2 files changed, 91 insertions(+), 16 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 4727b6d..442430d 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -14,18 +14,27 @@ env: jobs: build: - runs-on: ubuntu-latest + name: Build (${{ matrix.platform }}) + runs-on: ${{ matrix.runner }} permissions: contents: read packages: write + outputs: + digest-amd64: ${{ steps.digest.outputs.digest-linux-amd64 }} + digest-arm64: ${{ steps.digest.outputs.digest-linux-arm64 }} + strategy: + fail-fast: false + matrix: + include: + - platform: linux/amd64 + runner: ubuntu-latest + - platform: linux/arm64 + runner: ubuntu-24.04-arm steps: - name: Checkout uses: actions/checkout@v4 - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -37,6 +46,70 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v6 + with: + context: . + platforms: ${{ matrix.platform }} + push: ${{ github.event_name != 'pull_request' }} + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }} + cache-from: type=gha,scope=${{ matrix.platform }} + cache-to: type=gha,mode=max,scope=${{ matrix.platform }} + provenance: false + + - name: Export digest + if: github.event_name != 'pull_request' + id: digest + run: | + PLATFORM_SAFE=$(echo "${{ matrix.platform }}" | tr '/' '-') + echo "digest-${PLATFORM_SAFE}=${{ steps.build.outputs.digest }}" >> "$GITHUB_OUTPUT" + mkdir -p /tmp/digests + echo "${{ steps.build.outputs.digest }}" > "/tmp/digests/${PLATFORM_SAFE}.txt" + + - name: Upload digest artifact + if: github.event_name != 'pull_request' + uses: actions/upload-artifact@v4 + with: + name: digest-${{ matrix.runner }} + path: /tmp/digests/ + if-no-files-found: error + retention-days: 1 + + merge: + name: Merge manifests + runs-on: ubuntu-latest + if: github.event_name != 'pull_request' + needs: build + permissions: + contents: read + packages: write + + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + pattern: digest-* + path: /tmp/digests + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Extract metadata id: meta uses: docker/metadata-action@v5 @@ -49,13 +122,12 @@ jobs: type=semver,pattern={{major}}.{{minor}} type=sha,prefix= - - name: Build and push - uses: docker/build-push-action@v5 - with: - context: . - platforms: linux/amd64,linux/arm64 - push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - cache-from: type=gha - cache-to: type=gha,mode=max + - name: Create and push manifest + run: | + DIGESTS=$(find /tmp/digests -name '*.txt' -exec cat {} \; | xargs -I{} echo "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@{}") + TAGS=$(echo "${{ steps.meta.outputs.tags }}" | xargs -I{} echo "--tag {}") + docker buildx imagetools create $TAGS $DIGESTS + + - name: Inspect manifest + run: | + docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }} diff --git a/Dockerfile b/Dockerfile index db8766c..dedb35c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,10 +2,13 @@ FROM golang:1.21-alpine AS builder WORKDIR /app COPY go.mod go.sum ./ -RUN go mod download +RUN --mount=type=cache,target=/go/pkg/mod \ + go mod download COPY . . -RUN CGO_ENABLED=0 GOOS=linux go build -o kiro-go . +RUN --mount=type=cache,target=/go/pkg/mod \ + --mount=type=cache,target=/root/.cache/go-build \ + CGO_ENABLED=0 GOOS=linux go build -o kiro-go . FROM alpine:latest RUN apk --no-cache add ca-certificates From fdbf511b11328915910152fcb2f337246378d6fd Mon Sep 17 00:00:00 2001 From: Quorinex Date: Mon, 11 May 2026 22:05:37 +0800 Subject: [PATCH 7/9] ci: fix image name must be lowercase for ghcr.io --- .github/workflows/docker.yml | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 442430d..739b7fd 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -35,6 +35,10 @@ jobs: - name: Checkout uses: actions/checkout@v4 + - name: Set lowercase image name + id: image + run: echo "name=$(echo '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}' | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_OUTPUT" + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -50,7 +54,7 @@ jobs: id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + images: ${{ steps.image.outputs.name }} - name: Build and push by digest id: build @@ -60,7 +64,7 @@ jobs: platforms: ${{ matrix.platform }} push: ${{ github.event_name != 'pull_request' }} labels: ${{ steps.meta.outputs.labels }} - outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }} + outputs: type=image,name=${{ steps.image.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }} cache-from: type=gha,scope=${{ matrix.platform }} cache-to: type=gha,mode=max,scope=${{ matrix.platform }} provenance: false @@ -100,6 +104,10 @@ jobs: path: /tmp/digests merge-multiple: true + - name: Set lowercase image name + id: image + run: echo "name=$(echo '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}' | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_OUTPUT" + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -114,7 +122,7 @@ jobs: id: meta uses: docker/metadata-action@v5 with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + images: ${{ steps.image.outputs.name }} tags: | type=raw,value=latest,enable={{is_default_branch}} type=ref,event=branch @@ -124,10 +132,10 @@ jobs: - name: Create and push manifest run: | - DIGESTS=$(find /tmp/digests -name '*.txt' -exec cat {} \; | xargs -I{} echo "${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}@{}") + DIGESTS=$(find /tmp/digests -name '*.txt' -exec cat {} \; | xargs -I{} echo "${{ steps.image.outputs.name }}@{}") TAGS=$(echo "${{ steps.meta.outputs.tags }}" | xargs -I{} echo "--tag {}") docker buildx imagetools create $TAGS $DIGESTS - name: Inspect manifest run: | - docker buildx imagetools inspect ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.meta.outputs.version }} + docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }} From 5cf2cce1d103c53ad19f35bf4b812a1df08bf40d Mon Sep 17 00:00:00 2001 From: Quorinex Date: Mon, 11 May 2026 22:10:35 +0800 Subject: [PATCH 8/9] ci: use Go cross-compilation to eliminate slow arm64 runner --- .github/workflows/docker.yml | 106 ++++++----------------------------- Dockerfile | 8 ++- 2 files changed, 23 insertions(+), 91 deletions(-) diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 739b7fd..125fecc 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -14,22 +14,10 @@ env: jobs: build: - name: Build (${{ matrix.platform }}) - runs-on: ${{ matrix.runner }} + runs-on: ubuntu-latest permissions: contents: read packages: write - outputs: - digest-amd64: ${{ steps.digest.outputs.digest-linux-amd64 }} - digest-arm64: ${{ steps.digest.outputs.digest-linux-arm64 }} - strategy: - fail-fast: false - matrix: - include: - - platform: linux/amd64 - runner: ubuntu-latest - - platform: linux/arm64 - runner: ubuntu-24.04-arm steps: - name: Checkout @@ -39,6 +27,11 @@ jobs: id: image run: echo "name=$(echo '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}' | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_OUTPUT" + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + with: + platforms: arm64 + - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 @@ -50,74 +43,6 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Extract metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ steps.image.outputs.name }} - - - name: Build and push by digest - id: build - uses: docker/build-push-action@v6 - with: - context: . - platforms: ${{ matrix.platform }} - push: ${{ github.event_name != 'pull_request' }} - labels: ${{ steps.meta.outputs.labels }} - outputs: type=image,name=${{ steps.image.outputs.name }},push-by-digest=true,name-canonical=true,push=${{ github.event_name != 'pull_request' }} - cache-from: type=gha,scope=${{ matrix.platform }} - cache-to: type=gha,mode=max,scope=${{ matrix.platform }} - provenance: false - - - name: Export digest - if: github.event_name != 'pull_request' - id: digest - run: | - PLATFORM_SAFE=$(echo "${{ matrix.platform }}" | tr '/' '-') - echo "digest-${PLATFORM_SAFE}=${{ steps.build.outputs.digest }}" >> "$GITHUB_OUTPUT" - mkdir -p /tmp/digests - echo "${{ steps.build.outputs.digest }}" > "/tmp/digests/${PLATFORM_SAFE}.txt" - - - name: Upload digest artifact - if: github.event_name != 'pull_request' - uses: actions/upload-artifact@v4 - with: - name: digest-${{ matrix.runner }} - path: /tmp/digests/ - if-no-files-found: error - retention-days: 1 - - merge: - name: Merge manifests - runs-on: ubuntu-latest - if: github.event_name != 'pull_request' - needs: build - permissions: - contents: read - packages: write - - steps: - - name: Download digests - uses: actions/download-artifact@v4 - with: - pattern: digest-* - path: /tmp/digests - merge-multiple: true - - - name: Set lowercase image name - id: image - run: echo "name=$(echo '${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}' | tr '[:upper:]' '[:lower:]')" >> "$GITHUB_OUTPUT" - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Login to GitHub Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - name: Extract metadata id: meta uses: docker/metadata-action@v5 @@ -130,12 +55,15 @@ jobs: type=semver,pattern={{major}}.{{minor}} type=sha,prefix= - - name: Create and push manifest - run: | - DIGESTS=$(find /tmp/digests -name '*.txt' -exec cat {} \; | xargs -I{} echo "${{ steps.image.outputs.name }}@{}") - TAGS=$(echo "${{ steps.meta.outputs.tags }}" | xargs -I{} echo "--tag {}") - docker buildx imagetools create $TAGS $DIGESTS + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + platforms: linux/amd64,linux/arm64 + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max + provenance: false - - name: Inspect manifest - run: | - docker buildx imagetools inspect ${{ steps.image.outputs.name }}:${{ steps.meta.outputs.version }} diff --git a/Dockerfile b/Dockerfile index dedb35c..7c6cfa4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,8 @@ -FROM golang:1.21-alpine AS builder +# builder 阶段始终运行在构建机原生平台(amd64),用 Go 交叉编译目标平台二进制 +FROM --platform=$BUILDPLATFORM golang:1.21-alpine AS builder + +ARG TARGETOS +ARG TARGETARCH WORKDIR /app COPY go.mod go.sum ./ @@ -8,7 +12,7 @@ RUN --mount=type=cache,target=/go/pkg/mod \ COPY . . RUN --mount=type=cache,target=/go/pkg/mod \ --mount=type=cache,target=/root/.cache/go-build \ - CGO_ENABLED=0 GOOS=linux go build -o kiro-go . + CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o kiro-go . FROM alpine:latest RUN apk --no-cache add ca-certificates From 940dc782cb0a9a0d095abc6f407adf21ccc24ae2 Mon Sep 17 00:00:00 2001 From: Quorinex Date: Mon, 11 May 2026 22:25:27 +0800 Subject: [PATCH 9/9] chore: bump version to 1.0.6 --- README.md | 7 +++++++ README_CN.md | 7 +++++++ config/config.go | 2 +- version.json | 2 +- web/index.html | 4 ++-- 5 files changed, 18 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index d0dcf4b..17a8649 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ If this project helps you, a Star would mean a lot. - Auto token refresh, SSE streaming, Web admin panel - Multiple auth: AWS Builder ID, IAM Identity Center (Enterprise SSO), SSO Token, local cache, credentials JSON - Usage tracking, account import/export, i18n (CN / EN) +- Support configuring outbound proxy (SOCKS5 / HTTP) ## Quick Start @@ -74,6 +75,12 @@ curl http://localhost:8080/v1/chat/completions \ Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Claude-compatible requests that include a top-level `thinking` config such as `{"type":"enabled","budget_tokens":2048}` or `{"type":"adaptive"}` also enable thinking mode automatically. Configure output format in the admin panel under Settings - Thinking Mode. +## Outbound Proxy + +For users in restricted network regions, configure an outbound proxy in the admin panel under **Settings - Outbound Proxy Settings**. Supports SOCKS5 and HTTP proxies. + +The setting takes effect immediately without restarting. + ## Environment Variables | Variable | Description | Default | diff --git a/README_CN.md b/README_CN.md index 542b2f3..8e9fdf6 100644 --- a/README_CN.md +++ b/README_CN.md @@ -17,6 +17,7 @@ - 自动 Token 刷新、SSE 流式输出、Web 管理面板 - 多种认证方式:AWS Builder ID、IAM Identity Center (企业 SSO)、SSO Token、本地缓存、凭证 JSON - 用量追踪、账号导入导出、中英双语 +- 支持设置出站代理(SOCKS5 / HTTP) ## 快速开始 @@ -74,6 +75,12 @@ curl http://localhost:8080/v1/chat/completions \ 在模型名后加后缀(默认 `-thinking`)即可启用,例如 `claude-sonnet-4.5-thinking`。Claude 兼容请求如果带有顶层 `thinking` 配置,例如 `{"type":"enabled","budget_tokens":2048}` 或 `{"type":"adaptive"}`,也会自动启用 thinking 模式。输出格式可在管理面板「设置 - Thinking 模式」中配置。 +## 出站代理 + +可在管理面板「设置 - 出站代理设置」中配置代理。支持 SOCKS5 和 HTTP 代理。 + +设置保存后即时生效,无需重启服务。 + ## 环境变量 | 变量 | 说明 | 默认值 | diff --git a/config/config.go b/config/config.go index 7195fde..dfce5df 100644 --- a/config/config.go +++ b/config/config.go @@ -143,7 +143,7 @@ type AccountInfo struct { } // Version current version -const Version = "1.0.5" +const Version = "1.0.6" var ( cfg *Config diff --git a/version.json b/version.json index 3942b7b..14e14d7 100644 --- a/version.json +++ b/version.json @@ -1,5 +1,5 @@ { - "version": "1.0.5", + "version": "1.0.6", "changelog": "✨ Added and fixed several improvements across the project.\n✨ 新增并修复了一些内容,包含若干功能改进与问题修复。", "download": "https://github.com/Quorinex/Kiro-Go" } diff --git a/web/index.html b/web/index.html index 2dd725c..0d6985b 100644 --- a/web/index.html +++ b/web/index.html @@ -1183,7 +1183,7 @@ 'settings.proxyPassword': '密码', 'settings.proxyHostRequired': '请填写代理地址和端口', 'settings.saveProxy': '保存代理设置', - 'settings.proxySaved': '代理设置已保存,已即时生效', + 'settings.proxySaved': '代理设置已保存', 'api.endpoints': 'API 端点', 'api.modelList': '模型列表', 'api.stats': '统计数据', @@ -1399,7 +1399,7 @@ 'settings.proxyPassword': 'Password', 'settings.proxyHostRequired': 'Please enter proxy host and port', 'settings.saveProxy': 'Save Proxy Settings', - 'settings.proxySaved': 'Proxy settings saved and applied', + 'settings.proxySaved': 'Proxy settings saved', 'api.endpoints': 'API Endpoints', 'api.modelList': 'Model List', 'api.stats': 'Statistics',