fix: support Claude thinking config routing (#40)
This commit is contained in:
@@ -72,7 +72,7 @@ curl http://localhost:8080/v1/chat/completions \
|
||||
|
||||
## Thinking Mode
|
||||
|
||||
Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Configure output format in the admin panel under Settings - Thinking Mode.
|
||||
Append a suffix (default `-thinking`) to the model name, e.g. `claude-sonnet-4.5-thinking`. Claude-compatible requests that include a top-level `thinking` config such as `{"type":"enabled","budget_tokens":2048}` or `{"type":"adaptive"}` also enable thinking mode automatically. Configure output format in the admin panel under Settings - Thinking Mode.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
|
||||
@@ -72,7 +72,7 @@ curl http://localhost:8080/v1/chat/completions \
|
||||
|
||||
## 思考模式
|
||||
|
||||
在模型名后加后缀(默认 `-thinking`)即可启用,例如 `claude-sonnet-4.5-thinking`。输出格式可在管理面板「设置 - Thinking 模式」中配置。
|
||||
在模型名后加后缀(默认 `-thinking`)即可启用,例如 `claude-sonnet-4.5-thinking`。Claude 兼容请求如果带有顶层 `thinking` 配置,例如 `{"type":"enabled","budget_tokens":2048}` 或 `{"type":"adaptive"}`,也会自动启用 thinking 模式。输出格式可在管理面板「设置 - Thinking 模式」中配置。
|
||||
|
||||
## 环境变量
|
||||
|
||||
|
||||
143
proxy/handler.go
143
proxy/handler.go
@@ -66,6 +66,9 @@ func validateClaudeRequestShape(req *ClaudeRequest) string {
|
||||
if len(req.Messages) == 0 {
|
||||
return "messages must not be empty"
|
||||
}
|
||||
if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" {
|
||||
return msg
|
||||
}
|
||||
|
||||
hasUserContext := false
|
||||
lastRole := ""
|
||||
@@ -94,6 +97,75 @@ func validateClaudeRequestShape(req *ClaudeRequest) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func validateClaudeThinkingConfig(thinking *ClaudeThinkingConfig, maxTokens int) string {
|
||||
if thinking == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
kind := strings.ToLower(strings.TrimSpace(thinking.Type))
|
||||
switch kind {
|
||||
case "enabled":
|
||||
if maxTokens == 0 {
|
||||
return "thinking.type enabled cannot be used with max_tokens=0"
|
||||
}
|
||||
if thinking.BudgetTokens <= 0 {
|
||||
return "thinking.budget_tokens is required when thinking.type is enabled"
|
||||
}
|
||||
if thinking.BudgetTokens < 1024 {
|
||||
return "thinking.budget_tokens must be at least 1024"
|
||||
}
|
||||
if maxTokens > 0 && thinking.BudgetTokens >= maxTokens {
|
||||
return "thinking.budget_tokens must be less than max_tokens"
|
||||
}
|
||||
case "adaptive":
|
||||
if thinking.BudgetTokens != 0 {
|
||||
return "thinking.budget_tokens is not supported when thinking.type is adaptive"
|
||||
}
|
||||
case "disabled":
|
||||
if thinking.BudgetTokens != 0 {
|
||||
return "thinking.budget_tokens is not supported when thinking.type is disabled"
|
||||
}
|
||||
default:
|
||||
return "thinking.type must be one of: enabled, adaptive, disabled"
|
||||
}
|
||||
|
||||
display := strings.ToLower(strings.TrimSpace(thinking.Display))
|
||||
if display != "" && display != "summarized" && display != "omitted" {
|
||||
return "thinking.display must be one of: summarized, omitted"
|
||||
}
|
||||
if kind == "disabled" && display != "" {
|
||||
return "thinking.display is not supported when thinking.type is disabled"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
type claudeThinkingResponseOptions struct {
|
||||
Format string
|
||||
OmitDisplay bool
|
||||
}
|
||||
|
||||
func resolveClaudeThinkingResponseOptions(thinking *ClaudeThinkingConfig, defaultFormat string) claudeThinkingResponseOptions {
|
||||
opts := claudeThinkingResponseOptions{Format: defaultFormat}
|
||||
if opts.Format == "" {
|
||||
opts.Format = "thinking"
|
||||
}
|
||||
if thinking == nil {
|
||||
return opts
|
||||
}
|
||||
|
||||
display := strings.ToLower(strings.TrimSpace(thinking.Display))
|
||||
switch display {
|
||||
case "summarized":
|
||||
opts.Format = "thinking"
|
||||
case "omitted":
|
||||
opts.Format = "thinking"
|
||||
opts.OmitDisplay = true
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
||||
|
||||
func validateOpenAIRequestShape(req *OpenAIRequest) string {
|
||||
if len(req.Messages) == 0 {
|
||||
return "messages must not be empty"
|
||||
@@ -569,8 +641,17 @@ func (h *Handler) handleCountTokens(w http.ResponseWriter, r *http.Request) {
|
||||
h.sendClaudeError(w, 400, "invalid_request_error", "Invalid JSON")
|
||||
return
|
||||
}
|
||||
if msg := validateClaudeThinkingConfig(req.Thinking, req.MaxTokens); msg != "" {
|
||||
h.sendClaudeError(w, 400, "invalid_request_error", msg)
|
||||
return
|
||||
}
|
||||
|
||||
estimatedTokens := estimateClaudeRequestInputTokens(&req)
|
||||
thinkingCfg := config.GetThinkingConfig()
|
||||
actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix)
|
||||
req.Model = actualModel
|
||||
effectiveReq := cloneClaudeRequestForThinking(&req, thinking)
|
||||
|
||||
estimatedTokens := estimateClaudeRequestInputTokens(effectiveReq)
|
||||
if estimatedTokens < 1 {
|
||||
estimatedTokens = 1
|
||||
}
|
||||
@@ -622,10 +703,12 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
|
||||
|
||||
// 解析模型和 thinking 模式
|
||||
thinkingCfg := config.GetThinkingConfig()
|
||||
actualModel, thinking := ParseModelAndThinking(req.Model, thinkingCfg.Suffix)
|
||||
actualModel, thinking := resolveClaudeThinkingMode(req.Model, req.Thinking, thinkingCfg.Suffix)
|
||||
req.Model = actualModel
|
||||
estimatedInputTokens := estimateClaudeRequestInputTokens(&req)
|
||||
cacheProfile := h.promptCache.BuildClaudeProfile(&req, estimatedInputTokens)
|
||||
effectiveReq := cloneClaudeRequestForThinking(&req, thinking)
|
||||
thinkingResponseOpts := resolveClaudeThinkingResponseOptions(req.Thinking, thinkingCfg.ClaudeFormat)
|
||||
estimatedInputTokens := estimateClaudeRequestInputTokens(effectiveReq)
|
||||
cacheProfile := h.promptCache.BuildClaudeProfile(effectiveReq, estimatedInputTokens)
|
||||
cacheUsage := h.promptCache.Compute(account.ID, cacheProfile)
|
||||
|
||||
// 转换请求
|
||||
@@ -633,14 +716,14 @@ func (h *Handler) handleClaudeMessagesInternal(w http.ResponseWriter, r *http.Re
|
||||
|
||||
// Stream or non-stream
|
||||
if req.Stream {
|
||||
h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
|
||||
h.handleClaudeStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile)
|
||||
} else {
|
||||
h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, estimatedInputTokens, cacheUsage, cacheProfile)
|
||||
h.handleClaudeNonStream(w, account, kiroPayload, req.Model, thinking, thinkingResponseOpts, estimatedInputTokens, cacheUsage, cacheProfile)
|
||||
}
|
||||
}
|
||||
|
||||
// handleClaudeStream Claude 流式响应
|
||||
func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
|
||||
func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
|
||||
w.Header().Set("Content-Type", "text/event-stream; charset=utf-8")
|
||||
w.Header().Set("Cache-Control", "no-cache")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
@@ -652,7 +735,7 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
|
||||
}
|
||||
|
||||
// 获取 thinking 输出格式配置
|
||||
thinkingFormat := config.GetThinkingConfig().ClaudeFormat
|
||||
thinkingFormat := thinkingOpts.Format
|
||||
|
||||
msgID := "msg_" + uuid.New().String()
|
||||
var inputTokens, outputTokens int
|
||||
@@ -769,6 +852,19 @@ func (h *Handler) handleClaudeStream(w http.ResponseWriter, account *config.Acco
|
||||
"delta": map[string]string{"type": "text_delta", "text": text},
|
||||
})
|
||||
default:
|
||||
if thinkingOpts.OmitDisplay {
|
||||
if thinkingState == 1 {
|
||||
startContentBlock("thinking")
|
||||
return
|
||||
}
|
||||
if thinkingState == 3 {
|
||||
if activeBlockType != "thinking" {
|
||||
startContentBlock("thinking")
|
||||
}
|
||||
closeActiveBlock()
|
||||
}
|
||||
return
|
||||
}
|
||||
if thinkingState == 3 && text == "" {
|
||||
if activeBlockType == "thinking" {
|
||||
closeActiveBlock()
|
||||
@@ -1103,7 +1199,7 @@ func (h *Handler) recordFailure() {
|
||||
}
|
||||
|
||||
// handleClaudeNonStream Claude 非流式响应
|
||||
func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
|
||||
func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.Account, payload *KiroPayload, model string, thinking bool, thinkingOpts claudeThinkingResponseOptions, estimatedInputTokens int, cacheUsage promptCacheUsage, cacheProfile *promptCacheProfile) {
|
||||
var content string
|
||||
var thinkingContent string
|
||||
var toolUses []KiroToolUse
|
||||
@@ -1146,13 +1242,14 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
|
||||
}
|
||||
|
||||
// 合并 thinking 内容(如果有 reasoningContentEvent 的内容)
|
||||
thinkingFormat := config.GetThinkingConfig().ClaudeFormat
|
||||
thinkingFormat := thinkingOpts.Format
|
||||
finalContent, extractedReasoning := extractThinkingFromContent(content)
|
||||
if thinking && thinkingContent == "" && extractedReasoning != "" {
|
||||
thinkingContent = extractedReasoning
|
||||
rawThinkingContent := thinkingContent
|
||||
if thinking && rawThinkingContent == "" && extractedReasoning != "" {
|
||||
rawThinkingContent = extractedReasoning
|
||||
}
|
||||
if !thinking {
|
||||
thinkingContent = ""
|
||||
rawThinkingContent = ""
|
||||
}
|
||||
|
||||
if realInputTokens > 0 {
|
||||
@@ -1160,26 +1257,32 @@ func (h *Handler) handleClaudeNonStream(w http.ResponseWriter, account *config.A
|
||||
} else if inputTokens <= 0 {
|
||||
inputTokens = estimatedInputTokens
|
||||
}
|
||||
outputTokens = estimateClaudeOutputTokens(finalContent, thinkingContent, toolUses)
|
||||
outputTokens = estimateClaudeOutputTokens(finalContent, rawThinkingContent, toolUses)
|
||||
|
||||
h.recordSuccess(inputTokens, outputTokens, credits)
|
||||
h.pool.RecordSuccess(account.ID)
|
||||
h.pool.UpdateStats(account.ID, inputTokens+outputTokens, credits)
|
||||
h.promptCache.Update(account.ID, cacheProfile)
|
||||
|
||||
if thinking && thinkingContent != "" {
|
||||
responseThinkingContent := rawThinkingContent
|
||||
includeEmptyThinkingBlock := thinking && thinkingOpts.OmitDisplay && rawThinkingContent != ""
|
||||
if includeEmptyThinkingBlock {
|
||||
responseThinkingContent = ""
|
||||
}
|
||||
|
||||
if thinking && responseThinkingContent != "" {
|
||||
switch thinkingFormat {
|
||||
case "think":
|
||||
finalContent = "<think>" + thinkingContent + "</think>" + finalContent
|
||||
thinkingContent = ""
|
||||
finalContent = "<think>" + responseThinkingContent + "</think>" + finalContent
|
||||
responseThinkingContent = ""
|
||||
case "reasoning_content":
|
||||
finalContent = thinkingContent + finalContent // Claude 格式不支持 reasoning_content,直接拼接
|
||||
thinkingContent = ""
|
||||
finalContent = responseThinkingContent + finalContent // Claude 格式不支持 reasoning_content,直接拼接
|
||||
responseThinkingContent = ""
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
resp := KiroToClaudeResponse(finalContent, thinkingContent, toolUses, inputTokens, outputTokens, model)
|
||||
resp := KiroToClaudeResponse(finalContent, responseThinkingContent, includeEmptyThinkingBlock, toolUses, inputTokens, outputTokens, model)
|
||||
resp.Usage.InputTokens = billedClaudeInputTokens(inputTokens, cacheUsage)
|
||||
resp.Usage.CacheCreationInputTokens = cacheUsage.CacheCreationInputTokens
|
||||
resp.Usage.CacheReadInputTokens = cacheUsage.CacheReadInputTokens
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
package proxy
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
import "testing"
|
||||
|
||||
func TestThinkingSourceReasoningFirst(t *testing.T) {
|
||||
var source thinkingStreamSource
|
||||
@@ -101,6 +99,240 @@ func TestValidateClaudeRequestShapeRejectsAssistantPrefill(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveClaudeThinkingModeHonorsRequestThinking(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
model string
|
||||
thinking *ClaudeThinkingConfig
|
||||
wantModel string
|
||||
wantThinking bool
|
||||
}{
|
||||
{
|
||||
name: "adaptive request enables thinking",
|
||||
model: "claude-sonnet-4.6",
|
||||
thinking: &ClaudeThinkingConfig{Type: "adaptive"},
|
||||
wantModel: "claude-sonnet-4.6",
|
||||
wantThinking: true,
|
||||
},
|
||||
{
|
||||
name: "enabled request enables thinking",
|
||||
model: "claude-opus-4.5",
|
||||
thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 2048},
|
||||
wantModel: "claude-opus-4.5",
|
||||
wantThinking: true,
|
||||
},
|
||||
{
|
||||
name: "disabled request keeps thinking off",
|
||||
model: "claude-opus-4.7",
|
||||
thinking: &ClaudeThinkingConfig{Type: "disabled"},
|
||||
wantModel: "claude-opus-4.7",
|
||||
wantThinking: false,
|
||||
},
|
||||
{
|
||||
name: "suffix remains supported when thinking is disabled",
|
||||
model: "claude-sonnet-4.5-thinking",
|
||||
thinking: &ClaudeThinkingConfig{Type: "disabled"},
|
||||
wantModel: "claude-sonnet-4.5",
|
||||
wantThinking: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
gotModel, gotThinking := resolveClaudeThinkingMode(tc.model, tc.thinking, "-thinking")
|
||||
if gotModel != tc.wantModel {
|
||||
t.Fatalf("expected model %q, got %q", tc.wantModel, gotModel)
|
||||
}
|
||||
if gotThinking != tc.wantThinking {
|
||||
t.Fatalf("expected thinking=%v, got %v", tc.wantThinking, gotThinking)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCloneClaudeRequestForThinkingInjectsPromptWithoutMutatingOriginal(t *testing.T) {
|
||||
req := &ClaudeRequest{
|
||||
Model: "claude-sonnet-4.6",
|
||||
System: "Follow the user instructions.",
|
||||
}
|
||||
|
||||
cloned := cloneClaudeRequestForThinking(req, true)
|
||||
blocks, ok := cloned.System.([]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("expected cloned system prompt to be structured blocks, got %T", cloned.System)
|
||||
}
|
||||
if len(blocks) != 2 {
|
||||
t.Fatalf("expected 2 system blocks after prepend, got %d", len(blocks))
|
||||
}
|
||||
gotPrompt := extractSystemPrompt(cloned.System)
|
||||
expected := ThinkingModePrompt + "\n\nFollow the user instructions."
|
||||
if gotPrompt != expected {
|
||||
t.Fatalf("expected injected system prompt %q, got %q", expected, gotPrompt)
|
||||
}
|
||||
if original, ok := req.System.(string); !ok || original != "Follow the user instructions." {
|
||||
t.Fatalf("expected original request system prompt to stay unchanged, got %#v", req.System)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCloneClaudeRequestForThinkingPreservesStructuredSystemBlocks(t *testing.T) {
|
||||
req := &ClaudeRequest{
|
||||
Model: "claude-sonnet-4.6",
|
||||
System: []interface{}{
|
||||
map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": "cached system",
|
||||
"cache_control": map[string]interface{}{
|
||||
"type": "ephemeral",
|
||||
"ttl": "5m",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
cloned := cloneClaudeRequestForThinking(req, true)
|
||||
blocks, ok := cloned.System.([]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("expected structured system blocks, got %T", cloned.System)
|
||||
}
|
||||
if len(blocks) != 2 {
|
||||
t.Fatalf("expected 2 system blocks after prepend, got %d", len(blocks))
|
||||
}
|
||||
first, ok := blocks[0].(map[string]interface{})
|
||||
if !ok || first["text"] != ThinkingModePrompt+"\n" {
|
||||
t.Fatalf("expected first block to be thinking prompt, got %#v", blocks[0])
|
||||
}
|
||||
second, ok := blocks[1].(map[string]interface{})
|
||||
if !ok {
|
||||
t.Fatalf("expected original system block to remain a map, got %T", blocks[1])
|
||||
}
|
||||
cacheControl, ok := second["cache_control"].(map[string]interface{})
|
||||
if !ok || cacheControl["type"] != "ephemeral" {
|
||||
t.Fatalf("expected original cache_control to be preserved, got %#v", second["cache_control"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestThinkingPromptAffectsClaudeTokenEstimate(t *testing.T) {
|
||||
req := &ClaudeRequest{
|
||||
Model: "claude-sonnet-4.6",
|
||||
Messages: []ClaudeMessage{{Role: "user", Content: "hello"}},
|
||||
}
|
||||
|
||||
baseTokens := estimateClaudeRequestInputTokens(req)
|
||||
thinkingTokens := estimateClaudeRequestInputTokens(cloneClaudeRequestForThinking(req, true))
|
||||
|
||||
if thinkingTokens <= baseTokens {
|
||||
t.Fatalf("expected thinking tokens (%d) to exceed base tokens (%d)", thinkingTokens, baseTokens)
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateClaudeThinkingConfig(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
thinking *ClaudeThinkingConfig
|
||||
maxTokens int
|
||||
expectError bool
|
||||
}{
|
||||
{
|
||||
name: "adaptive is valid",
|
||||
thinking: &ClaudeThinkingConfig{Type: "adaptive"},
|
||||
maxTokens: 4096,
|
||||
expectError: false,
|
||||
},
|
||||
{
|
||||
name: "enabled requires budget",
|
||||
thinking: &ClaudeThinkingConfig{Type: "enabled"},
|
||||
maxTokens: 4096,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "enabled requires at least 1024 budget tokens",
|
||||
thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 512},
|
||||
maxTokens: 4096,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "enabled rejects max tokens zero",
|
||||
thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 2048},
|
||||
maxTokens: 0,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "enabled budget must stay below max tokens",
|
||||
thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 4096},
|
||||
maxTokens: 4096,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "disabled rejects display",
|
||||
thinking: &ClaudeThinkingConfig{Type: "disabled", Display: "summarized"},
|
||||
maxTokens: 4096,
|
||||
expectError: true,
|
||||
},
|
||||
{
|
||||
name: "missing type is rejected",
|
||||
thinking: &ClaudeThinkingConfig{},
|
||||
maxTokens: 4096,
|
||||
expectError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
errMsg := validateClaudeThinkingConfig(tc.thinking, tc.maxTokens)
|
||||
if tc.expectError && errMsg == "" {
|
||||
t.Fatalf("expected validation error")
|
||||
}
|
||||
if !tc.expectError && errMsg != "" {
|
||||
t.Fatalf("expected thinking config to be valid, got %q", errMsg)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveClaudeThinkingResponseOptions(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
thinking *ClaudeThinkingConfig
|
||||
defaultFmt string
|
||||
wantFmt string
|
||||
wantOmit bool
|
||||
}{
|
||||
{
|
||||
name: "default config is preserved when display unset",
|
||||
thinking: &ClaudeThinkingConfig{Type: "enabled", BudgetTokens: 2048},
|
||||
defaultFmt: "think",
|
||||
wantFmt: "think",
|
||||
wantOmit: false,
|
||||
},
|
||||
{
|
||||
name: "summarized forces official thinking blocks",
|
||||
thinking: &ClaudeThinkingConfig{Type: "adaptive", Display: "summarized"},
|
||||
defaultFmt: "reasoning_content",
|
||||
wantFmt: "thinking",
|
||||
wantOmit: false,
|
||||
},
|
||||
{
|
||||
name: "omitted forces official thinking blocks and hides content",
|
||||
thinking: &ClaudeThinkingConfig{Type: "adaptive", Display: "omitted"},
|
||||
defaultFmt: "think",
|
||||
wantFmt: "thinking",
|
||||
wantOmit: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
opts := resolveClaudeThinkingResponseOptions(tc.thinking, tc.defaultFmt)
|
||||
if opts.Format != tc.wantFmt {
|
||||
t.Fatalf("expected format %q, got %q", tc.wantFmt, opts.Format)
|
||||
}
|
||||
if opts.OmitDisplay != tc.wantOmit {
|
||||
t.Fatalf("expected omitDisplay=%v, got %v", tc.wantOmit, opts.OmitDisplay)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMergeUniqueModelsPreservesUnionAcrossAccounts(t *testing.T) {
|
||||
base := []ModelInfo{
|
||||
{ModelId: "claude-sonnet-4.5", InputTypes: []string{"TEXT"}},
|
||||
|
||||
@@ -76,6 +76,19 @@ func ParseModelAndThinking(model string, thinkingSuffix string) (string, bool) {
|
||||
return model, thinking
|
||||
}
|
||||
|
||||
func resolveClaudeThinkingMode(model string, thinkingCfg *ClaudeThinkingConfig, thinkingSuffix string) (string, bool) {
|
||||
actualModel, suffixThinking := ParseModelAndThinking(model, thinkingSuffix)
|
||||
return actualModel, suffixThinking || isClaudeThinkingRequested(thinkingCfg)
|
||||
}
|
||||
|
||||
func isClaudeThinkingRequested(thinkingCfg *ClaudeThinkingConfig) bool {
|
||||
if thinkingCfg == nil {
|
||||
return false
|
||||
}
|
||||
kind := strings.ToLower(strings.TrimSpace(thinkingCfg.Type))
|
||||
return kind == "enabled" || kind == "adaptive"
|
||||
}
|
||||
|
||||
func MapModel(model string) string {
|
||||
mapped, _ := ParseModelAndThinking(model, "-thinking")
|
||||
return mapped
|
||||
@@ -84,15 +97,22 @@ func MapModel(model string) string {
|
||||
// ==================== Claude API 类型 ====================
|
||||
|
||||
type ClaudeRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []ClaudeMessage `json:"messages"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
TopP float64 `json:"top_p,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
System interface{} `json:"system,omitempty"` // string or []SystemBlock
|
||||
Tools []ClaudeTool `json:"tools,omitempty"`
|
||||
ToolChoice interface{} `json:"tool_choice,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Messages []ClaudeMessage `json:"messages"`
|
||||
MaxTokens int `json:"max_tokens"`
|
||||
Temperature float64 `json:"temperature,omitempty"`
|
||||
TopP float64 `json:"top_p,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
System interface{} `json:"system,omitempty"` // string or []SystemBlock
|
||||
Thinking *ClaudeThinkingConfig `json:"thinking,omitempty"`
|
||||
Tools []ClaudeTool `json:"tools,omitempty"`
|
||||
ToolChoice interface{} `json:"tool_choice,omitempty"`
|
||||
}
|
||||
|
||||
type ClaudeThinkingConfig struct {
|
||||
Type string `json:"type,omitempty"`
|
||||
BudgetTokens int `json:"budget_tokens,omitempty"`
|
||||
Display string `json:"display,omitempty"`
|
||||
}
|
||||
|
||||
type ClaudeMessage struct {
|
||||
@@ -104,6 +124,7 @@ type ClaudeContentBlock struct {
|
||||
Type string `json:"type"`
|
||||
Text string `json:"text,omitempty"`
|
||||
Thinking string `json:"thinking,omitempty"`
|
||||
Signature string `json:"signature,omitempty"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Name string `json:"name,omitempty"`
|
||||
Input interface{} `json:"input,omitempty"`
|
||||
@@ -157,12 +178,7 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
|
||||
origin := "AI_EDITOR"
|
||||
|
||||
// 提取系统提示
|
||||
systemPrompt := extractSystemPrompt(req.System)
|
||||
|
||||
// 如果启用 thinking 模式,注入 thinking 提示
|
||||
if thinking {
|
||||
systemPrompt = ThinkingModePrompt + "\n\n" + systemPrompt
|
||||
}
|
||||
systemPrompt := buildClaudeSystemPrompt(req.System, thinking)
|
||||
|
||||
// 构建历史消息
|
||||
history := make([]KiroHistoryMessage, 0)
|
||||
@@ -263,6 +279,88 @@ func ClaudeToKiro(req *ClaudeRequest, thinking bool) *KiroPayload {
|
||||
return payload
|
||||
}
|
||||
|
||||
func buildClaudeSystemPrompt(system interface{}, thinking bool) string {
|
||||
systemPrompt := extractSystemPrompt(system)
|
||||
if !thinking {
|
||||
return systemPrompt
|
||||
}
|
||||
if systemPrompt == "" {
|
||||
return ThinkingModePrompt
|
||||
}
|
||||
return ThinkingModePrompt + "\n\n" + systemPrompt
|
||||
}
|
||||
|
||||
func cloneClaudeRequestForThinking(req *ClaudeRequest, thinking bool) *ClaudeRequest {
|
||||
if req == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
cloned := *req
|
||||
if thinking {
|
||||
cloned.System = prependThinkingSystem(req.System)
|
||||
}
|
||||
return &cloned
|
||||
}
|
||||
|
||||
func prependThinkingSystem(system interface{}) interface{} {
|
||||
thinkingText := ThinkingModePrompt
|
||||
if hasClaudeSystemContent(system) {
|
||||
thinkingText += "\n"
|
||||
}
|
||||
thinkingBlock := map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": thinkingText,
|
||||
}
|
||||
|
||||
switch v := system.(type) {
|
||||
case nil:
|
||||
return []interface{}{thinkingBlock}
|
||||
case string:
|
||||
if v == "" {
|
||||
return []interface{}{thinkingBlock}
|
||||
}
|
||||
return []interface{}{
|
||||
thinkingBlock,
|
||||
map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": v,
|
||||
},
|
||||
}
|
||||
case []interface{}:
|
||||
blocks := make([]interface{}, 0, len(v)+1)
|
||||
blocks = append(blocks, thinkingBlock)
|
||||
blocks = append(blocks, v...)
|
||||
return blocks
|
||||
case []string:
|
||||
blocks := make([]interface{}, 0, len(v)+1)
|
||||
blocks = append(blocks, thinkingBlock)
|
||||
for _, block := range v {
|
||||
blocks = append(blocks, map[string]interface{}{
|
||||
"type": "text",
|
||||
"text": block,
|
||||
})
|
||||
}
|
||||
return blocks
|
||||
default:
|
||||
return []interface{}{thinkingBlock}
|
||||
}
|
||||
}
|
||||
|
||||
func hasClaudeSystemContent(system interface{}) bool {
|
||||
switch v := system.(type) {
|
||||
case nil:
|
||||
return false
|
||||
case string:
|
||||
return v != ""
|
||||
case []interface{}:
|
||||
return len(v) > 0
|
||||
case []string:
|
||||
return len(v) > 0
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func extractSystemPrompt(system interface{}) string {
|
||||
if system == nil {
|
||||
return ""
|
||||
@@ -459,10 +557,10 @@ func shortenToolName(name string) string {
|
||||
|
||||
// ==================== Kiro -> Claude 转换 ====================
|
||||
|
||||
func KiroToClaudeResponse(content, thinkingContent string, toolUses []KiroToolUse, inputTokens, outputTokens int, model string) *ClaudeResponse {
|
||||
func KiroToClaudeResponse(content, thinkingContent string, includeEmptyThinkingBlock bool, toolUses []KiroToolUse, inputTokens, outputTokens int, model string) *ClaudeResponse {
|
||||
blocks := make([]ClaudeContentBlock, 0)
|
||||
|
||||
if thinkingContent != "" {
|
||||
if thinkingContent != "" || includeEmptyThinkingBlock {
|
||||
blocks = append(blocks, ClaudeContentBlock{
|
||||
Type: "thinking",
|
||||
Thinking: thinkingContent,
|
||||
|
||||
@@ -233,6 +233,23 @@ func TestClaudeToKiroDropsLeadingAssistantHistory(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestKiroToClaudeResponseCanEmitEmptyThinkingBlock(t *testing.T) {
|
||||
resp := KiroToClaudeResponse("final answer", "", true, nil, 10, 20, "claude-sonnet-4.6")
|
||||
|
||||
if len(resp.Content) != 2 {
|
||||
t.Fatalf("expected empty thinking block plus text block, got %d blocks", len(resp.Content))
|
||||
}
|
||||
if resp.Content[0].Type != "thinking" {
|
||||
t.Fatalf("expected first block to be thinking, got %#v", resp.Content[0])
|
||||
}
|
||||
if resp.Content[0].Thinking != "" {
|
||||
t.Fatalf("expected omitted thinking block to have empty content, got %#v", resp.Content[0].Thinking)
|
||||
}
|
||||
if resp.Content[1].Type != "text" || resp.Content[1].Text != "final answer" {
|
||||
t.Fatalf("expected text block to be preserved, got %#v", resp.Content[1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestToolResultsContinuationIncludesInstructionPrefix(t *testing.T) {
|
||||
req := &OpenAIRequest{
|
||||
Model: "claude-sonnet-4.5",
|
||||
|
||||
Reference in New Issue
Block a user