feat: /v1/messages端点适配codex账号池

2026-03-06 22:39:33 +08:00
parent afbe8bf001
commit 921599948b
7 changed files with 341 additions and 297 deletions
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -670,8 +670,14 @@ func (h *OpenAIGatewayHandler) anthropicStreamingAwareError(c *gin.Context, stat
 	if streamStarted {
 		flusher, ok := c.Writer.(http.Flusher)
 		if ok {
-			errorEvent := "event: error\ndata: " + `{"type":"error","error":{"type":` + strconv.Quote(errType) + `,"message":` + strconv.Quote(message) + `}}` + "\n\n"
+			errPayload, _ := json.Marshal(gin.H{
-			fmt.Fprint(c.Writer, errorEvent) //nolint:errcheck
+				"type": "error",
 				"error": gin.H{
 					"type":    errType,
 					"message": message,
 				},
 			})
 			fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errPayload) //nolint:errcheck
 			flusher.Flush()
 		}
 		return
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -532,3 +532,204 @@ func TestResponsesAnthropicEventToSSE(t *testing.T) {
 	assert.Contains(t, sse, "data: ")
 	assert.Contains(t, sse, `"resp_1"`)
 }
 // ---------------------------------------------------------------------------
 // response.failed tests
 // ---------------------------------------------------------------------------
 func TestStreamingFailed(t *testing.T) {
 	state := NewResponsesEventToAnthropicState()
 	// 1. response.created
 	ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
 		Type:     "response.created",
 		Response: &ResponsesResponse{ID: "resp_fail_1", Model: "gpt-5.2"},
 	}, state)
 	// 2. Some text output before failure
 	ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
 		Type:  "response.output_text.delta",
 		Delta: "Partial output before failure",
 	}, state)
 	// 3. response.failed
 	events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
 		Type: "response.failed",
 		Response: &ResponsesResponse{
 			Status: "failed",
 			Error:  &ResponsesError{Code: "server_error", Message: "Internal error"},
 			Usage:  &ResponsesUsage{InputTokens: 50, OutputTokens: 10},
 		},
 	}, state)
 	// Should close text block + message_delta + message_stop
 	require.Len(t, events, 3)
 	assert.Equal(t, "content_block_stop", events[0].Type)
 	assert.Equal(t, "message_delta", events[1].Type)
 	assert.Equal(t, "end_turn", events[1].Delta.StopReason)
 	assert.Equal(t, 50, events[1].Usage.InputTokens)
 	assert.Equal(t, 10, events[1].Usage.OutputTokens)
 	assert.Equal(t, "message_stop", events[2].Type)
 }
 func TestStreamingFailedNoOutput(t *testing.T) {
 	state := NewResponsesEventToAnthropicState()
 	// 1. response.created
 	ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
 		Type:     "response.created",
 		Response: &ResponsesResponse{ID: "resp_fail_2", Model: "gpt-5.2"},
 	}, state)
 	// 2. response.failed with no prior output
 	events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
 		Type: "response.failed",
 		Response: &ResponsesResponse{
 			Status: "failed",
 			Error:  &ResponsesError{Code: "rate_limit_error", Message: "Too many requests"},
 			Usage:  &ResponsesUsage{InputTokens: 20, OutputTokens: 0},
 		},
 	}, state)
 	// Should emit message_delta + message_stop (no block to close)
 	require.Len(t, events, 2)
 	assert.Equal(t, "message_delta", events[0].Type)
 	assert.Equal(t, "end_turn", events[0].Delta.StopReason)
 	assert.Equal(t, "message_stop", events[1].Type)
 }
 func TestResponsesToAnthropic_Failed(t *testing.T) {
 	resp := &ResponsesResponse{
 		ID:     "resp_fail_3",
 		Model:  "gpt-5.2",
 		Status: "failed",
 		Error:  &ResponsesError{Code: "server_error", Message: "Something went wrong"},
 		Output: []ResponsesOutput{},
 		Usage:  &ResponsesUsage{InputTokens: 30, OutputTokens: 0},
 	}
 	anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
 	// Failed status defaults to "end_turn" stop reason
 	assert.Equal(t, "end_turn", anth.StopReason)
 	// Should have at least an empty text block
 	require.Len(t, anth.Content, 1)
 	assert.Equal(t, "text", anth.Content[0].Type)
 }
 // ---------------------------------------------------------------------------
 // thinking → reasoning conversion tests
 // ---------------------------------------------------------------------------
 func TestAnthropicToResponses_ThinkingEnabled(t *testing.T) {
 	req := &AnthropicRequest{
 		Model:     "gpt-5.2",
 		MaxTokens: 1024,
 		Messages:  []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
 		Thinking:  &AnthropicThinking{Type: "enabled", BudgetTokens: 10000},
 	}
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	require.NotNil(t, resp.Reasoning)
 	assert.Equal(t, "high", resp.Reasoning.Effort)
 	assert.Equal(t, "auto", resp.Reasoning.Summary)
 	assert.Contains(t, resp.Include, "reasoning.encrypted_content")
 	assert.NotContains(t, resp.Include, "reasoning.summary")
 }
 func TestAnthropicToResponses_ThinkingAdaptive(t *testing.T) {
 	req := &AnthropicRequest{
 		Model:     "gpt-5.2",
 		MaxTokens: 1024,
 		Messages:  []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
 		Thinking:  &AnthropicThinking{Type: "adaptive", BudgetTokens: 5000},
 	}
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	require.NotNil(t, resp.Reasoning)
 	assert.Equal(t, "medium", resp.Reasoning.Effort)
 	assert.Equal(t, "auto", resp.Reasoning.Summary)
 	assert.NotContains(t, resp.Include, "reasoning.summary")
 }
 func TestAnthropicToResponses_ThinkingDisabled(t *testing.T) {
 	req := &AnthropicRequest{
 		Model:     "gpt-5.2",
 		MaxTokens: 1024,
 		Messages:  []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
 		Thinking:  &AnthropicThinking{Type: "disabled"},
 	}
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	assert.Nil(t, resp.Reasoning)
 	assert.NotContains(t, resp.Include, "reasoning.summary")
 }
 func TestAnthropicToResponses_NoThinking(t *testing.T) {
 	req := &AnthropicRequest{
 		Model:     "gpt-5.2",
 		MaxTokens: 1024,
 		Messages:  []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
 	}
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	assert.Nil(t, resp.Reasoning)
 }
 // ---------------------------------------------------------------------------
 // tool_choice conversion tests
 // ---------------------------------------------------------------------------
 func TestAnthropicToResponses_ToolChoiceAuto(t *testing.T) {
 	req := &AnthropicRequest{
 		Model:      "gpt-5.2",
 		MaxTokens:  1024,
 		Messages:   []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
 		ToolChoice: json.RawMessage(`{"type":"auto"}`),
 	}
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	var tc string
 	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
 	assert.Equal(t, "auto", tc)
 }
 func TestAnthropicToResponses_ToolChoiceAny(t *testing.T) {
 	req := &AnthropicRequest{
 		Model:      "gpt-5.2",
 		MaxTokens:  1024,
 		Messages:   []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
 		ToolChoice: json.RawMessage(`{"type":"any"}`),
 	}
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	var tc string
 	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
 	assert.Equal(t, "required", tc)
 }
 func TestAnthropicToResponses_ToolChoiceSpecific(t *testing.T) {
 	req := &AnthropicRequest{
 		Model:      "gpt-5.2",
 		MaxTokens:  1024,
 		Messages:   []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
 		ToolChoice: json.RawMessage(`{"type":"tool","name":"get_weather"}`),
 	}
 	resp, err := AnthropicToResponses(req)
 	require.NoError(t, err)
 	var tc map[string]any
 	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
 	assert.Equal(t, "function", tc["type"])
 	fn, ok := tc["function"].(map[string]any)
 	require.True(t, ok)
 	assert.Equal(t, "get_weather", fn["name"])
 }
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -2,6 +2,7 @@ package apicompat
 import (
 	"encoding/json"
 	"fmt"
 	"strings"
 )
@@ -44,9 +45,65 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) {
 		out.Tools = convertAnthropicToolsToResponses(req.Tools)
 	}
 	// Convert thinking → reasoning.
 	// generate_summary="auto" causes the upstream to emit reasoning_summary_text
 	// streaming events; the include array only needs reasoning.encrypted_content
 	// (already set above) for content continuity.
 	if req.Thinking != nil {
 		switch req.Thinking.Type {
 		case "enabled":
 			out.Reasoning = &ResponsesReasoning{Effort: "high", Summary: "auto"}
 		case "adaptive":
 			out.Reasoning = &ResponsesReasoning{Effort: "medium", Summary: "auto"}
 		}
 		// "disabled" or unknown → omit reasoning
 	}
 	// Convert tool_choice
 	if len(req.ToolChoice) > 0 {
 		tc, err := convertAnthropicToolChoiceToResponses(req.ToolChoice)
 		if err != nil {
 			return nil, fmt.Errorf("convert tool_choice: %w", err)
 		}
 		out.ToolChoice = tc
 	}
 	return out, nil
 }
 // convertAnthropicToolChoiceToResponses maps Anthropic tool_choice to Responses format.
 //
 //	{"type":"auto"}            → "auto"
 //	{"type":"any"}             → "required"
 //	{"type":"none"}            → "none"
 //	{"type":"tool","name":"X"} → {"type":"function","function":{"name":"X"}}
 func convertAnthropicToolChoiceToResponses(raw json.RawMessage) (json.RawMessage, error) {
 	var tc struct {
 		Type string `json:"type"`
 		Name string `json:"name"`
 	}
 	if err := json.Unmarshal(raw, &tc); err != nil {
 		return nil, err
 	}
 	switch tc.Type {
 	case "auto":
 		return json.Marshal("auto")
 	case "any":
 		return json.Marshal("required")
 	case "none":
 		return json.Marshal("none")
 	case "tool":
 		return json.Marshal(map[string]any{
 			"type":     "function",
 			"function": map[string]string{"name": tc.Name},
 		})
 	default:
 		// Pass through unknown types as-is
 		return raw, nil
 	}
 }
 // convertAnthropicToResponsesInput builds the Responses API input items array
 // from the Anthropic system field and message list.
 func convertAnthropicToResponsesInput(system json.RawMessage, msgs []AnthropicMessage) ([]ResponsesInputItem, error) {
--- a/backend/internal/pkg/apicompat/responses_to_anthropic.go
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic.go
@@ -153,7 +153,7 @@ func ResponsesEventToAnthropicEvents(
 		return resToAnthHandleReasoningDelta(evt, state)
 	case "response.reasoning_summary_text.done":
 		return resToAnthHandleBlockDone(state)
-	case "response.completed", "response.incomplete":
+	case "response.completed", "response.incomplete", "response.failed":
 		return resToAnthHandleCompleted(evt, state)
 	default:
 		return nil
--- a/backend/internal/pkg/apicompat/types.go
+++ b/backend/internal/pkg/apicompat/types.go
@@ -1,7 +1,7 @@
 // Package apicompat provides type definitions and conversion utilities for
-// translating between Anthropic Messages, OpenAI Chat Completions, and OpenAI
+// translating between Anthropic Messages and OpenAI Responses API formats.
-// Responses API formats. It enables multi-protocol support so that clients
+// It enables multi-protocol support so that clients using different API
-// using different API formats can be served through a unified gateway.
+// formats can be served through a unified gateway.
 package apicompat
 import "encoding/json"
@@ -21,6 +21,14 @@ type AnthropicRequest struct {
 	Temperature *float64           `json:"temperature,omitempty"`
 	TopP        *float64           `json:"top_p,omitempty"`
 	StopSeqs    []string           `json:"stop_sequences,omitempty"`
 	Thinking    *AnthropicThinking `json:"thinking,omitempty"`
 	ToolChoice  json.RawMessage    `json:"tool_choice,omitempty"`
 }
 // AnthropicThinking configures extended thinking in the Anthropic API.
 type AnthropicThinking struct {
 	Type         string `json:"type"`                    // "enabled" | "adaptive" | "disabled"
 	BudgetTokens int    `json:"budget_tokens,omitempty"` // max thinking tokens
 }
 // AnthropicMessage is a single message in the Anthropic conversation.
@@ -120,143 +128,29 @@ type AnthropicDelta struct {
 	StopSequence *string `json:"stop_sequence,omitempty"`
 }
 // ---------------------------------------------------------------------------
 // OpenAI Chat Completions API types
 // ---------------------------------------------------------------------------
 // ChatRequest is the request body for POST /v1/chat/completions.
 type ChatRequest struct {
 	Model       string          `json:"model"`
 	Messages    []ChatMessage   `json:"messages"`
 	MaxTokens   *int            `json:"max_tokens,omitempty"`
 	Temperature *float64        `json:"temperature,omitempty"`
 	TopP        *float64        `json:"top_p,omitempty"`
 	Stream      bool            `json:"stream,omitempty"`
 	Tools       []ChatTool      `json:"tools,omitempty"`
 	Stop        json.RawMessage `json:"stop,omitempty"` // string or []string
 }
 // ChatMessage is a single message in the Chat Completions conversation.
 type ChatMessage struct {
 	Role    string          `json:"role"`              // "system" | "user" | "assistant" | "tool"
 	Content json.RawMessage `json:"content,omitempty"` // string or []ChatContentPart
 	// assistant fields
 	ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
 	// tool fields
 	ToolCallID string `json:"tool_call_id,omitempty"`
 	// Copilot-specific reasoning passthrough
 	ReasoningText   string `json:"reasoning_text,omitempty"`
 	ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
 }
 // ChatContentPart is a typed content part in a multi-part message.
 type ChatContentPart struct {
 	Type string `json:"type"` // "text" | "image_url"
 	Text string `json:"text,omitempty"`
 }
 // ChatToolCall represents a tool invocation in an assistant message.
 // In streaming deltas, Index identifies which tool call is being updated.
 type ChatToolCall struct {
 	Index    int              `json:"index"`
 	ID       string           `json:"id,omitempty"`
 	Type     string           `json:"type,omitempty"` // "function"
 	Function ChatFunctionCall `json:"function"`
 }
 // ChatFunctionCall holds the function name and arguments.
 type ChatFunctionCall struct {
 	Name      string `json:"name"`
 	Arguments string `json:"arguments"`
 }
 // ChatTool describes a tool available to the model.
 type ChatTool struct {
 	Type     string       `json:"type"` // "function"
 	Function ChatFunction `json:"function"`
 }
 // ChatFunction is the function definition inside a ChatTool.
 type ChatFunction struct {
 	Name        string          `json:"name"`
 	Description string          `json:"description,omitempty"`
 	Parameters  json.RawMessage `json:"parameters,omitempty"` // JSON Schema
 }
 // ChatResponse is the non-streaming response from POST /v1/chat/completions.
 type ChatResponse struct {
 	ID      string       `json:"id"`
 	Object  string       `json:"object"` // "chat.completion"
 	Created int64        `json:"created"`
 	Model   string       `json:"model"`
 	Choices []ChatChoice `json:"choices"`
 	Usage   *ChatUsage   `json:"usage,omitempty"`
 }
 // ChatChoice is one completion choice.
 type ChatChoice struct {
 	Index        int         `json:"index"`
 	Message      ChatMessage `json:"message"`
 	FinishReason string      `json:"finish_reason"`
 }
 // ChatUsage holds token counts in Chat Completions format.
 type ChatUsage struct {
 	PromptTokens     int `json:"prompt_tokens"`
 	CompletionTokens int `json:"completion_tokens"`
 	TotalTokens      int `json:"total_tokens"`
 }
 // ---------------------------------------------------------------------------
 // Chat Completions SSE types
 // ---------------------------------------------------------------------------
 // ChatStreamChunk is a single SSE chunk in the Chat Completions streaming protocol.
 type ChatStreamChunk struct {
 	ID      string             `json:"id"`
 	Object  string             `json:"object"` // "chat.completion.chunk"
 	Created int64              `json:"created"`
 	Model   string             `json:"model"`
 	Choices []ChatStreamChoice `json:"choices"`
 	Usage   *ChatUsage         `json:"usage,omitempty"`
 }
 // ChatStreamChoice is one choice inside a streaming chunk.
 type ChatStreamChoice struct {
 	Index        int             `json:"index"`
 	Delta        ChatStreamDelta `json:"delta"`
 	FinishReason *string         `json:"finish_reason"`
 }
 // ChatStreamDelta carries incremental content in a streaming chunk.
 type ChatStreamDelta struct {
 	Role      string         `json:"role,omitempty"`
 	Content   string         `json:"content,omitempty"`
 	ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
 	// Copilot-specific reasoning passthrough (streaming)
 	ReasoningText   string `json:"reasoning_text,omitempty"`
 	ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
 }
 // ---------------------------------------------------------------------------
 // OpenAI Responses API types
 // ---------------------------------------------------------------------------
 // ResponsesRequest is the request body for POST /v1/responses.
 type ResponsesRequest struct {
-	Model           string          `json:"model"`
+	Model           string              `json:"model"`
-	Input           json.RawMessage `json:"input"` // string or []ResponsesInputItem
+	Input           json.RawMessage     `json:"input"` // string or []ResponsesInputItem
-	MaxOutputTokens *int            `json:"max_output_tokens,omitempty"`
+	MaxOutputTokens *int                `json:"max_output_tokens,omitempty"`
-	Temperature     *float64        `json:"temperature,omitempty"`
+	Temperature     *float64            `json:"temperature,omitempty"`
-	TopP            *float64        `json:"top_p,omitempty"`
+	TopP            *float64            `json:"top_p,omitempty"`
-	Stream          bool            `json:"stream,omitempty"`
+	Stream          bool                `json:"stream,omitempty"`
-	Tools           []ResponsesTool `json:"tools,omitempty"`
+	Tools           []ResponsesTool     `json:"tools,omitempty"`
-	Include         []string        `json:"include,omitempty"`
+	Include         []string            `json:"include,omitempty"`
-	Store           *bool           `json:"store,omitempty"`
+	Store           *bool               `json:"store,omitempty"`
 	Reasoning       *ResponsesReasoning `json:"reasoning,omitempty"`
 	ToolChoice      json.RawMessage     `json:"tool_choice,omitempty"`
 }
 // ResponsesReasoning configures reasoning effort in the Responses API.
 type ResponsesReasoning struct {
 	Effort  string `json:"effort"`            // "low" | "medium" | "high"
 	Summary string `json:"summary,omitempty"` // "auto" | "concise" | "detailed"
 }
 // ResponsesInputItem is one item in the Responses API input array.
@@ -305,6 +199,15 @@ type ResponsesResponse struct {
 	// incomplete_details is present when status="incomplete"
 	IncompleteDetails *ResponsesIncompleteDetails `json:"incomplete_details,omitempty"`
 	// Error is present when status="failed"
 	Error *ResponsesError `json:"error,omitempty"`
 }
 // ResponsesError describes an error in a failed response.
 type ResponsesError struct {
 	Code    string `json:"code"`
 	Message string `json:"message"`
 }
 // ResponsesIncompleteDetails explains why a response is incomplete.
@@ -349,6 +252,16 @@ type ResponsesUsage struct {
 	OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
 }
 // ResponsesInputTokensDetails breaks down input token usage.
 type ResponsesInputTokensDetails struct {
 	CachedTokens int `json:"cached_tokens,omitempty"`
 }
 // ResponsesOutputTokensDetails breaks down output token usage.
 type ResponsesOutputTokensDetails struct {
 	ReasoningTokens int `json:"reasoning_tokens,omitempty"`
 }
 // ---------------------------------------------------------------------------
 // Responses SSE event types
 // ---------------------------------------------------------------------------
@@ -388,153 +301,6 @@ type ResponsesStreamEvent struct {
 	SequenceNumber int `json:"sequence_number,omitempty"`
 }
 // ResponsesOutputReasoning is a reasoning output item in the Responses API.
 // This type represents the "type":"reasoning" output item that contains
 // extended thinking from the model.
 type ResponsesOutputReasoning struct {
 	ID               string                      `json:"id,omitempty"`
 	Type             string                      `json:"type"`             // "reasoning"
 	Status           string                      `json:"status,omitempty"` // "in_progress" | "completed" | "incomplete"
 	EncryptedContent string                      `json:"encrypted_content,omitempty"`
 	Summary          []ResponsesReasoningSummary `json:"summary,omitempty"`
 }
 // ResponsesReasoningSummary is a summary text block inside a reasoning output.
 type ResponsesReasoningSummary struct {
 	Type string `json:"type"` // "summary_text"
 	Text string `json:"text"`
 }
 // ResponsesStreamState maintains the state for converting Responses streaming
 // events to Chat Completions format. It tracks content blocks, tool calls,
 // reasoning blocks, and other streaming artifacts.
 type ResponsesStreamState struct {
 	// Response metadata
 	ID      string
 	Model   string
 	Created int64
 	// Content tracking
 	ContentIndex  int
 	CurrentText   string
 	CurrentItemID string
 	PendingText   []string // Text to accumulate before emitting
 	// Tool call tracking
 	ToolCalls       []ResponsesToolCallState
 	CurrentToolCall *ResponsesToolCallState
 	// Reasoning tracking
 	ReasoningBlocks  []ResponsesReasoningState
 	CurrentReasoning *ResponsesReasoningState
 	// Usage tracking
 	InputTokens  int
 	OutputTokens int
 	// Status tracking
 	Status       string
 	FinishReason string
 }
 // ResponsesToolCallState tracks a single tool call during streaming.
 type ResponsesToolCallState struct {
 	Index      int
 	ItemID     string
 	CallID     string
 	Name       string
 	Arguments  string
 	Status     string
 	IsComplete bool
 }
 // ResponsesReasoningState tracks a reasoning block during streaming.
 type ResponsesReasoningState struct {
 	ItemID       string
 	SummaryIndex int
 	SummaryText  string
 	Status       string
 	IsComplete   bool
 }
 // ResponsesUsageDetail provides additional token usage details in Responses format.
 type ResponsesUsageDetail struct {
 	InputTokens  int `json:"input_tokens"`
 	OutputTokens int `json:"output_tokens"`
 	TotalTokens  int `json:"total_tokens"`
 	// Optional detailed breakdown
 	InputTokensDetails  *ResponsesInputTokensDetails  `json:"input_tokens_details,omitempty"`
 	OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
 }
 // ResponsesInputTokensDetails breaks down input token usage.
 type ResponsesInputTokensDetails struct {
 	CachedTokens int `json:"cached_tokens,omitempty"`
 }
 // ResponsesOutputTokensDetails breaks down output token usage.
 type ResponsesOutputTokensDetails struct {
 	ReasoningTokens int `json:"reasoning_tokens,omitempty"`
 }
 // ---------------------------------------------------------------------------
 // Finish reason mapping helpers
 // ---------------------------------------------------------------------------
 // ChatFinishToAnthropic maps a Chat Completions finish_reason to an Anthropic stop_reason.
 func ChatFinishToAnthropic(reason string) string {
 	switch reason {
 	case "stop":
 		return "end_turn"
 	case "tool_calls":
 		return "tool_use"
 	case "length":
 		return "max_tokens"
 	default:
 		return "end_turn"
 	}
 }
 // AnthropicStopToChat maps an Anthropic stop_reason to a Chat Completions finish_reason.
 func AnthropicStopToChat(reason string) string {
 	switch reason {
 	case "end_turn":
 		return "stop"
 	case "tool_use":
 		return "tool_calls"
 	case "max_tokens":
 		return "length"
 	default:
 		return "stop"
 	}
 }
 // ResponsesStatusToChat maps a Responses API status to a Chat Completions finish_reason.
 func ResponsesStatusToChat(status string, details *ResponsesIncompleteDetails) string {
 	switch status {
 	case "completed":
 		return "stop"
 	case "incomplete":
 		if details != nil && details.Reason == "max_output_tokens" {
 			return "length"
 		}
 		return "stop"
 	default:
 		return "stop"
 	}
 }
 // ChatFinishToResponsesStatus maps a Chat Completions finish_reason to a Responses status.
 func ChatFinishToResponsesStatus(reason string) string {
 	switch reason {
 	case "length":
 		return "incomplete"
 	default:
 		return "completed"
 	}
 }
 // ---------------------------------------------------------------------------
 // Shared constants
 // ---------------------------------------------------------------------------
--- a/backend/internal/service/openai_gateway_messages.go
+++ b/backend/internal/service/openai_gateway_messages.go
@@ -49,7 +49,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
 	mappedModel := account.GetMappedModel(originalModel)
 	responsesReq.Model = mappedModel
-	logger.L().Info("openai messages: model mapping applied",
+	logger.L().Debug("openai messages: model mapping applied",
 		zap.Int64("account_id", account.ID),
 		zap.String("original_model", originalModel),
 		zap.String("mapped_model", mappedModel),
@@ -67,7 +67,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
 		if err := json.Unmarshal(responsesBody, &reqBody); err != nil {
 			return nil, fmt.Errorf("unmarshal for codex transform: %w", err)
 		}
-		applyCodexOAuthTransform(reqBody, false)
+		applyCodexOAuthTransform(reqBody, false, false)
 		// OAuth codex transform forces stream=true upstream, so always use
 		// the streaming response handler regardless of what the client asked.
 		isStream = true
@@ -148,9 +148,9 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
 	// 9. Handle normal response
 	if isStream {
-		return s.handleAnthropicStreamingResponse(resp, c, originalModel, startTime)
+		return s.handleAnthropicStreamingResponse(resp, c, originalModel, mappedModel, startTime)
 	}
-	return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, startTime)
+	return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, mappedModel, startTime)
 }
 // handleAnthropicErrorResponse reads an upstream error and returns it in
@@ -200,6 +200,7 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
 	resp *http.Response,
 	c *gin.Context,
 	originalModel string,
 	mappedModel string,
 	startTime time.Time,
 ) (*OpenAIForwardResult, error) {
 	requestID := resp.Header.Get("x-request-id")
@@ -233,11 +234,12 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
 	c.JSON(http.StatusOK, anthropicResp)
 	return &OpenAIForwardResult{
-		RequestID: requestID,
+		RequestID:    requestID,
-		Usage:     usage,
+		Usage:        usage,
-		Model:     originalModel,
+		Model:        originalModel,
-		Stream:    false,
+		BillingModel: mappedModel,
-		Duration:  time.Since(startTime),
+		Stream:       false,
 		Duration:     time.Since(startTime),
 	}, nil
 }
@@ -247,6 +249,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
 	resp *http.Response,
 	c *gin.Context,
 	originalModel string,
 	mappedModel string,
 	startTime time.Time,
 ) (*OpenAIForwardResult, error) {
 	requestID := resp.Header.Get("x-request-id")
@@ -293,7 +296,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
 		}
 		// Extract usage from completion events
-		if (event.Type == "response.completed" || event.Type == "response.incomplete") &&
+		if (event.Type == "response.completed" || event.Type == "response.incomplete" || event.Type == "response.failed") &&
 			event.Response != nil && event.Response.Usage != nil {
 			usage = OpenAIUsage{
 				InputTokens:  event.Response.Usage.InputTokens,
@@ -324,6 +327,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
 					RequestID:    requestID,
 					Usage:        usage,
 					Model:        originalModel,
 					BillingModel: mappedModel,
 					Stream:       true,
 					Duration:     time.Since(startTime),
 					FirstTokenMs: firstTokenMs,
@@ -360,6 +364,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
 		RequestID:    requestID,
 		Usage:        usage,
 		Model:        originalModel,
 		BillingModel: mappedModel,
 		Stream:       true,
 		Duration:     time.Since(startTime),
 		FirstTokenMs: firstTokenMs,
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -207,7 +207,12 @@ type OpenAIUsage struct {
 type OpenAIForwardResult struct {
 	RequestID string
 	Usage     OpenAIUsage
-	Model     string
+	Model     string // 原始模型（用于响应和日志显示）
 	// BillingModel is the model used for cost calculation.
 	// When non-empty, CalculateCost uses this instead of Model.
 	// This is set by the Anthropic Messages conversion path where
 	// the mapped upstream model differs from the client-facing model.
 	BillingModel string
 	// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
 	// Stored for usage records display; nil means not provided / not applicable.
 	ReasoningEffort *string
@@ -3610,7 +3615,11 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
 		multiplier = resolver.Resolve(ctx, user.ID, *apiKey.GroupID, apiKey.Group.RateMultiplier)
 	}
-	cost, err := s.billingService.CalculateCost(result.Model, tokens, multiplier)
+	billingModel := result.Model
 	if result.BillingModel != "" {
 		billingModel = result.BillingModel
 	}
 	cost, err := s.billingService.CalculateCost(billingModel, tokens, multiplier)
 	if err != nil {
 		cost = &CostBreakdown{ActualCost: 0}
 	}
@@ -3630,7 +3639,7 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
 		APIKeyID:              apiKey.ID,
 		AccountID:             account.ID,
 		RequestID:             result.RequestID,
-		Model:                 result.Model,
+		Model:                 billingModel,
 		ReasoningEffort:       result.ReasoningEffort,
 		InputTokens:           actualInputTokens,
 		OutputTokens:          result.Usage.OutputTokens,