Merge pull request #1172 from alfadb/fix/openai-messages-effort-max-to-xhigh

fix(apicompat): 修正 Anthropic→OpenAI 推理级别映射
2026-03-20 13:48:41 +08:00
parent 94bba415b1 8afa8c1091
commit 94e26dee4f
2 changed files with 45 additions and 27 deletions
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -632,8 +632,8 @@ func TestAnthropicToResponses_ThinkingEnabled(t *testing.T) {
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	require.NotNil(t, resp.Reasoning)
-	// thinking.type is ignored for effort; default xhigh applies.
+	// thinking.type is ignored for effort; default high applies.
-	assert.Equal(t, "xhigh", resp.Reasoning.Effort)
+	assert.Equal(t, "high", resp.Reasoning.Effort)
 	assert.Equal(t, "auto", resp.Reasoning.Summary)
 	assert.Contains(t, resp.Include, "reasoning.encrypted_content")
 	assert.NotContains(t, resp.Include, "reasoning.summary")
@@ -650,8 +650,8 @@ func TestAnthropicToResponses_ThinkingAdaptive(t *testing.T) {
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	require.NotNil(t, resp.Reasoning)
-	// thinking.type is ignored for effort; default xhigh applies.
+	// thinking.type is ignored for effort; default high applies.
-	assert.Equal(t, "xhigh", resp.Reasoning.Effort)
+	assert.Equal(t, "high", resp.Reasoning.Effort)
 	assert.Equal(t, "auto", resp.Reasoning.Summary)
 	assert.NotContains(t, resp.Include, "reasoning.summary")
 }
@@ -666,9 +666,9 @@ func TestAnthropicToResponses_ThinkingDisabled(t *testing.T) {
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
-	// Default effort applies (high → xhigh) even when thinking is disabled.
+	// Default effort applies (high → high) even when thinking is disabled.
 	require.NotNil(t, resp.Reasoning)
-	assert.Equal(t, "xhigh", resp.Reasoning.Effort)
+	assert.Equal(t, "high", resp.Reasoning.Effort)
 }
 func TestAnthropicToResponses_NoThinking(t *testing.T) {
@@ -680,9 +680,9 @@ func TestAnthropicToResponses_NoThinking(t *testing.T) {
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
-	// Default effort applies (high → xhigh) when no thinking/output_config is set.
+	// Default effort applies (high → high) when no thinking/output_config is set.
 	require.NotNil(t, resp.Reasoning)
-	assert.Equal(t, "xhigh", resp.Reasoning.Effort)
+	assert.Equal(t, "high", resp.Reasoning.Effort)
 }
 // ---------------------------------------------------------------------------
@@ -690,7 +690,7 @@ func TestAnthropicToResponses_NoThinking(t *testing.T) {
 // ---------------------------------------------------------------------------
 func TestAnthropicToResponses_OutputConfigOverridesDefault(t *testing.T) {
-	// Default is xhigh, but output_config.effort="low" overrides. low→low after mapping.
+	// Default is high, but output_config.effort="low" overrides. low→low after mapping.
 	req := &AnthropicRequest{
 		Model:        "gpt-5.2",
 		MaxTokens:    1024,
@@ -708,7 +708,7 @@ func TestAnthropicToResponses_OutputConfigOverridesDefault(t *testing.T) {
 func TestAnthropicToResponses_OutputConfigWithoutThinking(t *testing.T) {
 	// No thinking field, but output_config.effort="medium" → creates reasoning.
-	// medium→high after mapping.
+	// medium→medium after 1:1 mapping.
 	req := &AnthropicRequest{
 		Model:        "gpt-5.2",
 		MaxTokens:    1024,
@@ -719,12 +719,12 @@ func TestAnthropicToResponses_OutputConfigWithoutThinking(t *testing.T) {
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	require.NotNil(t, resp.Reasoning)
-	assert.Equal(t, "high", resp.Reasoning.Effort)
+	assert.Equal(t, "medium", resp.Reasoning.Effort)
 	assert.Equal(t, "auto", resp.Reasoning.Summary)
 }
 func TestAnthropicToResponses_OutputConfigHigh(t *testing.T) {
-	// output_config.effort="high" → mapped to "xhigh".
+	// output_config.effort="high" → mapped to "high" (1:1, both sides' default).
 	req := &AnthropicRequest{
 		Model:        "gpt-5.2",
 		MaxTokens:    1024,
@@ -732,6 +732,22 @@ func TestAnthropicToResponses_OutputConfigHigh(t *testing.T) {
 		OutputConfig: &AnthropicOutputConfig{Effort: "high"},
 	}
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	require.NotNil(t, resp.Reasoning)
 	assert.Equal(t, "high", resp.Reasoning.Effort)
 	assert.Equal(t, "auto", resp.Reasoning.Summary)
 }
 func TestAnthropicToResponses_OutputConfigMax(t *testing.T) {
 	// output_config.effort="max" → mapped to OpenAI's highest supported level "xhigh".
 	req := &AnthropicRequest{
 		Model:        "gpt-5.2",
 		MaxTokens:    1024,
 		Messages:     []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
 		OutputConfig: &AnthropicOutputConfig{Effort: "max"},
 	}
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	require.NotNil(t, resp.Reasoning)
@@ -740,7 +756,7 @@ func TestAnthropicToResponses_OutputConfigHigh(t *testing.T) {
 }
 func TestAnthropicToResponses_NoOutputConfig(t *testing.T) {
-	// No output_config → default xhigh regardless of thinking.type.
+	// No output_config → default high regardless of thinking.type.
 	req := &AnthropicRequest{
 		Model:     "gpt-5.2",
 		MaxTokens: 1024,
@@ -751,11 +767,11 @@ func TestAnthropicToResponses_NoOutputConfig(t *testing.T) {
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	require.NotNil(t, resp.Reasoning)
-	assert.Equal(t, "xhigh", resp.Reasoning.Effort)
+	assert.Equal(t, "high", resp.Reasoning.Effort)
 }
 func TestAnthropicToResponses_OutputConfigWithoutEffort(t *testing.T) {
-	// output_config present but effort empty (e.g. only format set) → default xhigh.
+	// output_config present but effort empty (e.g. only format set) → default high.
 	req := &AnthropicRequest{
 		Model:        "gpt-5.2",
 		MaxTokens:    1024,
@@ -766,7 +782,7 @@ func TestAnthropicToResponses_OutputConfigWithoutEffort(t *testing.T) {
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	require.NotNil(t, resp.Reasoning)
-	assert.Equal(t, "xhigh", resp.Reasoning.Effort)
+	assert.Equal(t, "high", resp.Reasoning.Effort)
 }
 // ---------------------------------------------------------------------------
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -46,9 +46,10 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) {
 	}
 	// Determine reasoning effort: only output_config.effort controls the
-	// level; thinking.type is ignored. Default is xhigh when unset.
+	// level; thinking.type is ignored. Default is high when unset (both
-	// Anthropic levels map to OpenAI: low→low, medium→high, high→xhigh.
+	// Anthropic and OpenAI default to high).
-	effort := "high" // default → maps to xhigh
+	// Anthropic levels map 1:1 to OpenAI: low→low, medium→medium, high→high, max→xhigh.
 	effort := "high" // default → both sides' default
 	if req.OutputConfig != nil && req.OutputConfig.Effort != "" {
 		effort = req.OutputConfig.Effort
 	}
@@ -380,18 +381,19 @@ func extractAnthropicTextFromBlocks(blocks []AnthropicContentBlock) string {
 // mapAnthropicEffortToResponses converts Anthropic reasoning effort levels to
 // OpenAI Responses API effort levels.
 //
 // Both APIs default to "high". The mapping is 1:1 for shared levels;
 // only Anthropic's "max" (Opus 4.6 exclusive) maps to OpenAI's "xhigh"
 // (GPT-5.2+ exclusive) as both represent the highest reasoning tier.
 //
 //	low    → low
-//	medium → high
+//	medium → medium
-//	high   → xhigh
+//	high   → high
 //	max    → xhigh
 func mapAnthropicEffortToResponses(effort string) string {
-	switch effort {
+	if effort == "max" {
 	case "medium":
 		return "high"
 	case "high":
 		return "xhigh"
 	default:
 		return effort // "low" and any unknown values pass through unchanged
 	}
 	return effort // low→low, medium→medium, high→high, unknown→passthrough
 }
 // convertAnthropicToolsToResponses maps Anthropic tool definitions to