fix: support Claude thinking config routing (#40)
This commit is contained in:
143
proxy/handler.go
143
proxy/handler.go
@@ -66,6 +66,9 @@ func validateClaudeRequestShape(req *ClaudeRequest) string {
|
||||
if len(req.Messages) == 0 {
|
||||
return "messages must not be empty"
|
||||
}
|
||||
if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" {
|
||||
return msg
|
||||
}
|
||||
|
||||
hasUserContext := false
|
||||
lastRole := ""
|
||||
@@ -94,6 +97,75 @@ func validateClaudeRequestShape(req *ClaudeRequest) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func validateClaudeThinkingConfig(thinking *ClaudeThinkingConfig, maxTokens int) string {
|
||||
if thinking == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
kind := strings.ToLower(strings.TrimSpace(thinking.Type))
|
||||
switch kind {
|
||||
case "enabled":
|
||||
if maxTokens == 0 {
|
||||
return "thinking.type enabled cannot be used with max_tokens=0"
|
||||
}
|
||||
if thinking.BudgetTokens <= 0 {
|
||||
return "thinking.budget_tokens is required when thinking.type is enabled"
|
||||
}
|
||||
if thinking.BudgetTokens < 1024 {
|
||||
return "thinking.budget_tokens must be at least 1024"
|
||||
}
|
||||
if maxTokens > 0 && thinking.BudgetTokens >= maxTokens {
|
||||
return "thinking.budget_tokens must be less than max_tokens"
|
||||
}
|
||||
case "adaptive":
|
||||
if thinking.BudgetTokens != 0 {
|
||||
return "thinking.budget_tokens is not supported when thinking.type is adaptive"
|
||||
}
|
||||
case "disabled":
|
||||
if thinking.BudgetTokens != 0 {
|
||||
return "thinking.budget_tokens is not supported when thinking.type is disabled"
|
||||
}
|
||||
default:
|
||||
return "thinking.type must be one of: enabled, adaptive, disabled"
|
||||
}
|
||||
|
||||
display := strings.ToLower(strings.TrimSpace(thinking.Display))
|
||||
if display != "" && display != "summarized" && display != "omitted" {
|
||||
return "thinking.display must be one of: summarized, omitted"
|
||||
}
|
||||
if kind == "disabled" && display != "" {
|
||||
return "thinking.display is not supported when thinking.type is disabled"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
type claudeThinkingResponseOptions struct {
|
||||
Format string
|
||||
OmitDisplay bool
|
||||
}
|
||||
|
||||
func resolveClaudeThinkingResponseOptions(thinking *ClaudeThinkingConfig, defaultFormat string) claudeThinkingResponseOptions {
|
||||
opts := claudeThinkingResponseOptions{Format: defaultFormat}
|
||||
if opts.Format == "" {
|
||||
opts.Format = "thinking"
|
||||
}
|
||||
if thinking == nil {
|
||||
return opts
|
||||
}
|
||||
|
||||
display := strings.ToLower(strings.TrimSpace(thinking.Display))
|
||||
switch display {
|
||||
case "summarized":
|
||||
opts.Format = "thinking"
|
||||
case "omitted":
|
||||
opts.Format = "thinking"
|
||||
opts.OmitDisplay = true
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
||||
|
||||
func validateOpenAIRequestShape(req *OpenAIRequest) string {
|
||||
if len(req.Messages) == 0 {
|
||||
return "messages must not be empty"
|
||||
@@ -569,8 +641,17 @@ func (h *Handler) handleCountTokens(w http.ResponseWriter, r *http.Request) {
|
||||
h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON")
|
||||
return
|
||||
}
|
||||
if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" {
|
||||
h.sendClaudeError(w, 400, "invalid_request_error", msg)
|
||||
return
|
||||
}
|
||||
|
||||
estimatedTokens := estimateClaudeRequestInputTokens(&req)
|
||||
thinkingCfg := config.GetThinkingConfig()
|
||||
actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix)
|
||||
req.Model = actualModel
|
||||
effectiveReq := cloneClaudeRequestForThinking(&req, thinking)
|
||||
|
||||
estimatedTokens := estimateClaudeRequestInputTokens(effectiveReq)
|
||||
if estimatedTokens < 1 {
|
||||
estimatedTokens = 1
|
||||
}
|
||||
@@ -622,10 +703,12 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
|
||||
|
||||
// 解析模型和 thinking 模式
|
||||
thinkingCfg := config.GetThinkingConfig()
|
||||
actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix)
|
||||
actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix)
|
||||
req.Model = actualModel
|
||||
estimatedInputTokens := estimateClaudeRequestInputTokens(&req)
|
||||
cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens)
|
||||
effectiveReq := cloneClaudeRequestForThinking(&req, thinking)
|
||||
thinkingResponseOpts := resolveClaudeThinkingResponseOptions(req.Thinking, thinkingCfg.ClaudeFormat)
|
||||
estimatedInputTokens := estimateClaudeRequestInputTokens(effectiveReq)
|
||||
cacheProfile := h.promptCache.BuildClaudeProfile(effectiveReq, estimatedInputTokens)
|
||||
cacheUsage := h.promptCache.Compute(account.ID, cacheProfile)
|
||||
|
||||
// 转换请求
|
||||
@@ -633,14 +716,14 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
|
||||
|
||||
// Stream or non-stream
|
||||
if req.Stream {
|
||||
h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
|
||||
h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile)
|
||||
} else {
|
||||
h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
|
||||
h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile)
|
||||
}
|
||||
}
|
||||
|
||||
// handleClaudeStream Claude 流式响应
|
||||
func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
|
||||
func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
|
||||
w.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
|
||||
w.Header().Set("Cache-Control", "no-cache")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
@@ -652,7 +735,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
|
||||
}
|
||||
|
||||
// 获取 thinking 输出格式配置
|
||||
thinkingFormat := config.GetThinkingConfig().ClaudeFormat
|
||||
thinkingFormat := thinkingOpts.Format
|
||||
|
||||
msgID := "msg_" + uuid.New().String()
|
||||
var inputTokens, outputTokens int
|
||||
@@ -769,6 +852,19 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
|
||||
"delta": map[string]string{"type": "text_delta", "text": text},
|
||||
})
|
||||
default:
|
||||
if thinkingOpts.OmitDisplay {
|
||||
if thinkingState == 1 {
|
||||
startContentBlock("thinking")
|
||||
return
|
||||
}
|
||||
if thinkingState == 3 {
|
||||
if activeBlockType != "thinking" {
|
||||
startContentBlock("thinking")
|
||||
}
|
||||
closeActiveBlock()
|
||||
}
|
||||
return
|
||||
}
|
||||
if thinkingState == 3 && text == "" {
|
||||
if activeBlockType == "thinking" {
|
||||
closeActiveBlock()
|
||||
@@ -1103,7 +1199,7 @@ func (h *Handler) recordFailure() {
|
||||
}
|
||||
|
||||
// handleClaudeNonStream Claude 非流式响应
|
||||
func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
|
||||
func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
|
||||
var content string
|
||||
var thinkingContent string
|
||||
var toolUses []KiroToolUse
|
||||
@@ -1146,13 +1242,14 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
|
||||
}
|
||||
|
||||
// 合并 thinking 内容(如果有 reasoningContentEvent 的内容)
|
||||
thinkingFormat := config.GetThinkingConfig().ClaudeFormat
|
||||
thinkingFormat := thinkingOpts.Format
|
||||
finalContent, extractedReasoning := extractThinkingFromContent(content)
|
||||
if thinking && thinkingContent == "" && extractedReasoning != "" {
|
||||
thinkingContent = extractedReasoning
|
||||
rawThinkingContent := thinkingContent
|
||||
if thinking && rawThinkingContent == "" && extractedReasoning != "" {
|
||||
rawThinkingContent = extractedReasoning
|
||||
}
|
||||
if !thinking {
|
||||
thinkingContent = ""
|
||||
rawThinkingContent = ""
|
||||
}
|
||||
|
||||
if realInputTokens > 0 {
|
||||
@@ -1160,26 +1257,32 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
|
||||
} else if inputTokens <= 0 {
|
||||
inputTokens = estimatedInputTokens
|
||||
}
|
||||
outputTokens = estimateClaudeOutputTokens(finalContent, thinkingContent, toolUses)
|
||||
outputTokens = estimateClaudeOutputTokens(finalContent, rawThinkingContent, toolUses)
|
||||
|
||||
h.recordSuccess(inputTokens, outputTokens, credits)
|
||||
h.pool.RecordSuccess(account.ID)
|
||||
h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
|
||||
h.promptCache.Update(account.ID, cacheProfile)
|
||||
|
||||
if thinking && thinkingContent != "" {
|
||||
responseThinkingContent := rawThinkingContent
|
||||
includeEmptyThinkingBlock := thinking && thinkingOpts.OmitDisplay && rawThinkingContent != ""
|
||||
if includeEmptyThinkingBlock {
|
||||
responseThinkingContent = ""
|
||||
}
|
||||
|
||||
if thinking && responseThinkingContent != "" {
|
||||
switch thinkingFormat {
|
||||
case "think":
|
||||
finalContent = "<think>" + thinkingContent + "</think>" + finalContent
|
||||
thinkingContent = ""
|
||||
finalContent = "<think>" + responseThinkingContent + "</think>" + finalContent
|
||||
responseThinkingContent = ""
|
||||
case "reasoning_content":
|
||||
finalContent = thinkingContent + finalContent // Claude 格式不支持 reasoning_content,直接拼接
|
||||
thinkingContent = ""
|
||||
finalContent = responseThinkingContent + finalContent // Claude 格式不支持 reasoning_content,直接拼接
|
||||
responseThinkingContent = ""
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
resp := KiroToClaudeResponse(finalContent, thinkingContent, toolUses, inputTokens, outputTokens, model)
|
||||
resp := KiroToClaudeResponse(finalContent, responseThinkingContent, includeEmptyThinkingBlock, toolUses, inputTokens, outputTokens, model)
|
||||
resp.Usage.InputTokens = billedClaudeInputTokens(inputTokens, cacheUsage)
|
||||
resp.Usage.CacheCreationInputTokens = cacheUsage.CacheCreationInputTokens
|
||||
resp.Usage.CacheReadInputTokens = cacheUsage.CacheReadInputTokens
|
||||
|
||||
Reference in New Issue
Block a user