fix: support Claude thinking config routing (#40)

This commit is contained in:
Henry Yang
2026-05-11 21:01:54 +08:00
committed by GitHub
parent 0203357b34
commit 221348b975
6 changed files with 492 additions and 42 deletions

View File

@@ -66,6 +66,9 @@ func validateClaudeRequestShape(req *ClaudeRequest) string {
if len(req.Messages) == 0 {
return "messages must not be empty"
}
if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" {
return msg
}
hasUserContext := false
lastRole := ""
@@ -94,6 +97,75 @@ func validateClaudeRequestShape(req *ClaudeRequest) string {
return ""
}
func validateClaudeThinkingConfig(thinking *ClaudeThinkingConfig, maxTokens int) string {
if thinking == nil {
return ""
}
kind := strings.ToLower(strings.TrimSpace(thinking.Type))
switch kind {
case "enabled":
if maxTokens == 0 {
return "thinking.type enabled cannot be used with max_tokens=0"
}
if thinking.BudgetTokens <= 0 {
return "thinking.budget_tokens is required when thinking.type is enabled"
}
if thinking.BudgetTokens < 1024 {
return "thinking.budget_tokens must be at least 1024"
}
if maxTokens > 0 && thinking.BudgetTokens >= maxTokens {
return "thinking.budget_tokens must be less than max_tokens"
}
case "adaptive":
if thinking.BudgetTokens != 0 {
return "thinking.budget_tokens is not supported when thinking.type is adaptive"
}
case "disabled":
if thinking.BudgetTokens != 0 {
return "thinking.budget_tokens is not supported when thinking.type is disabled"
}
default:
return "thinking.type must be one of: enabled, adaptive, disabled"
}
display := strings.ToLower(strings.TrimSpace(thinking.Display))
if display != "" && display != "summarized" && display != "omitted" {
return "thinking.display must be one of: summarized, omitted"
}
if kind == "disabled" && display != "" {
return "thinking.display is not supported when thinking.type is disabled"
}
return ""
}
type claudeThinkingResponseOptions struct {
Format string
OmitDisplay bool
}
func resolveClaudeThinkingResponseOptions(thinking *ClaudeThinkingConfig, defaultFormat string) claudeThinkingResponseOptions {
opts := claudeThinkingResponseOptions{Format: defaultFormat}
if opts.Format == "" {
opts.Format = "thinking"
}
if thinking == nil {
return opts
}
display := strings.ToLower(strings.TrimSpace(thinking.Display))
switch display {
case "summarized":
opts.Format = "thinking"
case "omitted":
opts.Format = "thinking"
opts.OmitDisplay = true
}
return opts
}
func validateOpenAIRequestShape(req *OpenAIRequest) string {
if len(req.Messages) == 0 {
return "messages must not be empty"
@@ -569,8 +641,17 @@ func (h *Handler) handleCountTokens(w http.ResponseWriter, r *http.Request) {
h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON")
return
}
if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" {
h.sendClaudeError(w, 400, "invalid_request_error", msg)
return
}
estimatedTokens := estimateClaudeRequestInputTokens(&req)
thinkingCfg := config.GetThinkingConfig()
actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix)
req.Model = actualModel
effectiveReq := cloneClaudeRequestForThinking(&req, thinking)
estimatedTokens := estimateClaudeRequestInputTokens(effectiveReq)
if estimatedTokens < 1 {
estimatedTokens = 1
}
@@ -622,10 +703,12 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
// 解析模型和 thinking 模式
thinkingCfg := config.GetThinkingConfig()
actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix)
actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix)
req.Model = actualModel
estimatedInputTokens := estimateClaudeRequestInputTokens(&req)
cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens)
effectiveReq := cloneClaudeRequestForThinking(&req, thinking)
thinkingResponseOpts := resolveClaudeThinkingResponseOptions(req.Thinking, thinkingCfg.ClaudeFormat)
estimatedInputTokens := estimateClaudeRequestInputTokens(effectiveReq)
cacheProfile := h.promptCache.BuildClaudeProfile(effectiveReq, estimatedInputTokens)
cacheUsage := h.promptCache.Compute(account.ID, cacheProfile)
// 转换请求
@@ -633,14 +716,14 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
// Stream or non-stream
if req.Stream {
h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile)
} else {
h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile)
}
}
// handleClaudeStream Claude 流式响应
func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
w.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
w.Header().Set("Cache-Control", "no-cache")
w.Header().Set("Connection", "keep-alive")
@@ -652,7 +735,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
}
// 获取 thinking 输出格式配置
thinkingFormat := config.GetThinkingConfig().ClaudeFormat
thinkingFormat := thinkingOpts.Format
msgID := "msg_" + uuid.New().String()
var inputTokens, outputTokens int
@@ -769,6 +852,19 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
"delta": map[string]string{"type": "text_delta", "text": text},
})
default:
if thinkingOpts.OmitDisplay {
if thinkingState == 1 {
startContentBlock("thinking")
return
}
if thinkingState == 3 {
if activeBlockType != "thinking" {
startContentBlock("thinking")
}
closeActiveBlock()
}
return
}
if thinkingState == 3 && text == "" {
if activeBlockType == "thinking" {
closeActiveBlock()
@@ -1103,7 +1199,7 @@ func (h *Handler) recordFailure() {
}
// handleClaudeNonStream Claude 非流式响应
func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
var content string
var thinkingContent string
var toolUses []KiroToolUse
@@ -1146,13 +1242,14 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
}
// 合并 thinking 内容(如果有 reasoningContentEvent 的内容)
thinkingFormat := config.GetThinkingConfig().ClaudeFormat
thinkingFormat := thinkingOpts.Format
finalContent, extractedReasoning := extractThinkingFromContent(content)
if thinking && thinkingContent == "" && extractedReasoning != "" {
thinkingContent = extractedReasoning
rawThinkingContent := thinkingContent
if thinking && rawThinkingContent == "" && extractedReasoning != "" {
rawThinkingContent = extractedReasoning
}
if !thinking {
thinkingContent = ""
rawThinkingContent = ""
}
if realInputTokens > 0 {
@@ -1160,26 +1257,32 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
} else if inputTokens <= 0 {
inputTokens = estimatedInputTokens
}
outputTokens = estimateClaudeOutputTokens(finalContent, thinkingContent, toolUses)
outputTokens = estimateClaudeOutputTokens(finalContent, rawThinkingContent, toolUses)
h.recordSuccess(inputTokens, outputTokens, credits)
h.pool.RecordSuccess(account.ID)
h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
h.promptCache.Update(account.ID, cacheProfile)
if thinking && thinkingContent != "" {
responseThinkingContent := rawThinkingContent
includeEmptyThinkingBlock := thinking && thinkingOpts.OmitDisplay && rawThinkingContent != ""
if includeEmptyThinkingBlock {
responseThinkingContent = ""
}
if thinking && responseThinkingContent != "" {
switch thinkingFormat {
case "think":
finalContent = "<think>" + thinkingContent + "</think>" + finalContent
thinkingContent = ""
finalContent = "<think>" + responseThinkingContent + "</think>" + finalContent
responseThinkingContent = ""
case "reasoning_content":
finalContent = thinkingContent + finalContent // Claude 格式不支持 reasoning_content直接拼接
thinkingContent = ""
finalContent = responseThinkingContent + finalContent // Claude 格式不支持 reasoning_content直接拼接
responseThinkingContent = ""
default:
}
}
resp := KiroToClaudeResponse(finalContent, thinkingContent, toolUses, inputTokens, outputTokens, model)
resp := KiroToClaudeResponse(finalContent, responseThinkingContent, includeEmptyThinkingBlock, toolUses, inputTokens, outputTokens, model)
resp.Usage.InputTokens = billedClaudeInputTokens(inputTokens, cacheUsage)
resp.Usage.CacheCreationInputTokens = cacheUsage.CacheCreationInputTokens
resp.Usage.CacheReadInputTokens = cacheUsage.CacheReadInputTokens