diff --git a/backend/internal/handler/openai_gateway_handler.go b/backend/internal/handler/openai_gateway_handler.go
index ccb61974..483b3032 100644
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -670,8 +670,14 @@ func (h *OpenAIGatewayHandler) anthropicStreamingAwareError(c *gin.Context, stat
 	if streamStarted {
 		flusher, ok := c.Writer.(http.Flusher)
 		if ok {
-			errorEvent := "event: error\ndata: " + `{"type":"error","error":{"type":` + strconv.Quote(errType) + `,"message":` + strconv.Quote(message) + `}}` + "\n\n"
-			fmt.Fprint(c.Writer, errorEvent) //nolint:errcheck
+			errPayload, _ := json.Marshal(gin.H{
+				"type": "error",
+				"error": gin.H{
+					"type":    errType,
+					"message": message,
+				},
+			})
+			fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errPayload) //nolint:errcheck
 			flusher.Flush()
 		}
 		return
diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go
index 20eeb969..60c54539 100644
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -532,3 +532,204 @@ func TestResponsesAnthropicEventToSSE(t *testing.T) {
 	assert.Contains(t, sse, "data: ")
 	assert.Contains(t, sse, `"resp_1"`)
 }
+
+// ---------------------------------------------------------------------------
+// response.failed tests
+// ---------------------------------------------------------------------------
+
+func TestStreamingFailed(t *testing.T) {
+	state := NewResponsesEventToAnthropicState()
+
+	// 1. response.created
+	ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type:     "response.created",
+		Response: &ResponsesResponse{ID: "resp_fail_1", Model: "gpt-5.2"},
+	}, state)
+
+	// 2. Some text output before failure
+	ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type:  "response.output_text.delta",
+		Delta: "Partial output before failure",
+	}, state)
+
+	// 3. response.failed
+	events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type: "response.failed",
+		Response: &ResponsesResponse{
+			Status: "failed",
+			Error:  &ResponsesError{Code: "server_error", Message: "Internal error"},
+			Usage:  &ResponsesUsage{InputTokens: 50, OutputTokens: 10},
+		},
+	}, state)
+
+	// Should close text block + message_delta + message_stop
+	require.Len(t, events, 3)
+	assert.Equal(t, "content_block_stop", events[0].Type)
+	assert.Equal(t, "message_delta", events[1].Type)
+	assert.Equal(t, "end_turn", events[1].Delta.StopReason)
+	assert.Equal(t, 50, events[1].Usage.InputTokens)
+	assert.Equal(t, 10, events[1].Usage.OutputTokens)
+	assert.Equal(t, "message_stop", events[2].Type)
+}
+
+func TestStreamingFailedNoOutput(t *testing.T) {
+	state := NewResponsesEventToAnthropicState()
+
+	// 1. response.created
+	ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type:     "response.created",
+		Response: &ResponsesResponse{ID: "resp_fail_2", Model: "gpt-5.2"},
+	}, state)
+
+	// 2. response.failed with no prior output
+	events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+		Type: "response.failed",
+		Response: &ResponsesResponse{
+			Status: "failed",
+			Error:  &ResponsesError{Code: "rate_limit_error", Message: "Too many requests"},
+			Usage:  &ResponsesUsage{InputTokens: 20, OutputTokens: 0},
+		},
+	}, state)
+
+	// Should emit message_delta + message_stop (no block to close)
+	require.Len(t, events, 2)
+	assert.Equal(t, "message_delta", events[0].Type)
+	assert.Equal(t, "end_turn", events[0].Delta.StopReason)
+	assert.Equal(t, "message_stop", events[1].Type)
+}
+
+func TestResponsesToAnthropic_Failed(t *testing.T) {
+	resp := &ResponsesResponse{
+		ID:     "resp_fail_3",
+		Model:  "gpt-5.2",
+		Status: "failed",
+		Error:  &ResponsesError{Code: "server_error", Message: "Something went wrong"},
+		Output: []ResponsesOutput{},
+		Usage:  &ResponsesUsage{InputTokens: 30, OutputTokens: 0},
+	}
+
+	anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
+	// Failed status defaults to "end_turn" stop reason
+	assert.Equal(t, "end_turn", anth.StopReason)
+	// Should have at least an empty text block
+	require.Len(t, anth.Content, 1)
+	assert.Equal(t, "text", anth.Content[0].Type)
+}
+
+// ---------------------------------------------------------------------------
+// thinking → reasoning conversion tests
+// ---------------------------------------------------------------------------
+
+func TestAnthropicToResponses_ThinkingEnabled(t *testing.T) {
+	req := &AnthropicRequest{
+		Model:     "gpt-5.2",
+		MaxTokens: 1024,
+		Messages:  []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+		Thinking:  &AnthropicThinking{Type: "enabled", BudgetTokens: 10000},
+	}
+
+	resp, err := AnthropicToResponses(req)
+	require.NoError(t, err)
+	require.NotNil(t, resp.Reasoning)
+	assert.Equal(t, "high", resp.Reasoning.Effort)
+	assert.Equal(t, "auto", resp.Reasoning.Summary)
+	assert.Contains(t, resp.Include, "reasoning.encrypted_content")
+	assert.NotContains(t, resp.Include, "reasoning.summary")
+}
+
+func TestAnthropicToResponses_ThinkingAdaptive(t *testing.T) {
+	req := &AnthropicRequest{
+		Model:     "gpt-5.2",
+		MaxTokens: 1024,
+		Messages:  []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+		Thinking:  &AnthropicThinking{Type: "adaptive", BudgetTokens: 5000},
+	}
+
+	resp, err := AnthropicToResponses(req)
+	require.NoError(t, err)
+	require.NotNil(t, resp.Reasoning)
+	assert.Equal(t, "medium", resp.Reasoning.Effort)
+	assert.Equal(t, "auto", resp.Reasoning.Summary)
+	assert.NotContains(t, resp.Include, "reasoning.summary")
+}
+
+func TestAnthropicToResponses_ThinkingDisabled(t *testing.T) {
+	req := &AnthropicRequest{
+		Model:     "gpt-5.2",
+		MaxTokens: 1024,
+		Messages:  []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+		Thinking:  &AnthropicThinking{Type: "disabled"},
+	}
+
+	resp, err := AnthropicToResponses(req)
+	require.NoError(t, err)
+	assert.Nil(t, resp.Reasoning)
+	assert.NotContains(t, resp.Include, "reasoning.summary")
+}
+
+func TestAnthropicToResponses_NoThinking(t *testing.T) {
+	req := &AnthropicRequest{
+		Model:     "gpt-5.2",
+		MaxTokens: 1024,
+		Messages:  []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+	}
+
+	resp, err := AnthropicToResponses(req)
+	require.NoError(t, err)
+	assert.Nil(t, resp.Reasoning)
+}
+
+// ---------------------------------------------------------------------------
+// tool_choice conversion tests
+// ---------------------------------------------------------------------------
+
+func TestAnthropicToResponses_ToolChoiceAuto(t *testing.T) {
+	req := &AnthropicRequest{
+		Model:      "gpt-5.2",
+		MaxTokens:  1024,
+		Messages:   []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+		ToolChoice: json.RawMessage(`{"type":"auto"}`),
+	}
+
+	resp, err := AnthropicToResponses(req)
+	require.NoError(t, err)
+
+	var tc string
+	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
+	assert.Equal(t, "auto", tc)
+}
+
+func TestAnthropicToResponses_ToolChoiceAny(t *testing.T) {
+	req := &AnthropicRequest{
+		Model:      "gpt-5.2",
+		MaxTokens:  1024,
+		Messages:   []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+		ToolChoice: json.RawMessage(`{"type":"any"}`),
+	}
+
+	resp, err := AnthropicToResponses(req)
+	require.NoError(t, err)
+
+	var tc string
+	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
+	assert.Equal(t, "required", tc)
+}
+
+func TestAnthropicToResponses_ToolChoiceSpecific(t *testing.T) {
+	req := &AnthropicRequest{
+		Model:      "gpt-5.2",
+		MaxTokens:  1024,
+		Messages:   []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+		ToolChoice: json.RawMessage(`{"type":"tool","name":"get_weather"}`),
+	}
+
+	resp, err := AnthropicToResponses(req)
+	require.NoError(t, err)
+
+	var tc map[string]any
+	require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
+	assert.Equal(t, "function", tc["type"])
+	fn, ok := tc["function"].(map[string]any)
+	require.True(t, ok)
+	assert.Equal(t, "get_weather", fn["name"])
+}
diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go
index 1a13658d..cc0c9e6c 100644
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -2,6 +2,7 @@ package apicompat
 
 import (
 	"encoding/json"
+	"fmt"
 	"strings"
 )
 
@@ -44,9 +45,65 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) {
 		out.Tools = convertAnthropicToolsToResponses(req.Tools)
 	}
 
+	// Convert thinking → reasoning.
+	// generate_summary="auto" causes the upstream to emit reasoning_summary_text
+	// streaming events; the include array only needs reasoning.encrypted_content
+	// (already set above) for content continuity.
+	if req.Thinking != nil {
+		switch req.Thinking.Type {
+		case "enabled":
+			out.Reasoning = &ResponsesReasoning{Effort: "high", Summary: "auto"}
+		case "adaptive":
+			out.Reasoning = &ResponsesReasoning{Effort: "medium", Summary: "auto"}
+		}
+		// "disabled" or unknown → omit reasoning
+	}
+
+	// Convert tool_choice
+	if len(req.ToolChoice) > 0 {
+		tc, err := convertAnthropicToolChoiceToResponses(req.ToolChoice)
+		if err != nil {
+			return nil, fmt.Errorf("convert tool_choice: %w", err)
+		}
+		out.ToolChoice = tc
+	}
+
 	return out, nil
 }
 
+// convertAnthropicToolChoiceToResponses maps Anthropic tool_choice to Responses format.
+//
+//	{"type":"auto"}            → "auto"
+//	{"type":"any"}             → "required"
+//	{"type":"none"}            → "none"
+//	{"type":"tool","name":"X"} → {"type":"function","function":{"name":"X"}}
+func convertAnthropicToolChoiceToResponses(raw json.RawMessage) (json.RawMessage, error) {
+	var tc struct {
+		Type string `json:"type"`
+		Name string `json:"name"`
+	}
+	if err := json.Unmarshal(raw, &tc); err != nil {
+		return nil, err
+	}
+
+	switch tc.Type {
+	case "auto":
+		return json.Marshal("auto")
+	case "any":
+		return json.Marshal("required")
+	case "none":
+		return json.Marshal("none")
+	case "tool":
+		return json.Marshal(map[string]any{
+			"type":     "function",
+			"function": map[string]string{"name": tc.Name},
+		})
+	default:
+		// Pass through unknown types as-is
+		return raw, nil
+	}
+}
+
 // convertAnthropicToResponsesInput builds the Responses API input items array
 // from the Anthropic system field and message list.
 func convertAnthropicToResponsesInput(system json.RawMessage, msgs []AnthropicMessage) ([]ResponsesInputItem, error) {
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic.go b/backend/internal/pkg/apicompat/responses_to_anthropic.go
index 16770650..39d36cf4 100644
--- a/backend/internal/pkg/apicompat/responses_to_anthropic.go
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic.go
@@ -153,7 +153,7 @@ func ResponsesEventToAnthropicEvents(
 		return resToAnthHandleReasoningDelta(evt, state)
 	case "response.reasoning_summary_text.done":
 		return resToAnthHandleBlockDone(state)
-	case "response.completed", "response.incomplete":
+	case "response.completed", "response.incomplete", "response.failed":
 		return resToAnthHandleCompleted(evt, state)
 	default:
 		return nil
diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go
index 92e85318..435f5032 100644
--- a/backend/internal/pkg/apicompat/types.go
+++ b/backend/internal/pkg/apicompat/types.go
@@ -1,7 +1,7 @@
 // Package apicompat provides type definitions and conversion utilities for
-// translating between Anthropic Messages, OpenAI Chat Completions, and OpenAI
-// Responses API formats. It enables multi-protocol support so that clients
-// using different API formats can be served through a unified gateway.
+// translating between Anthropic Messages and OpenAI Responses API formats.
+// It enables multi-protocol support so that clients using different API
+// formats can be served through a unified gateway.
 package apicompat
 
 import "encoding/json"
@@ -21,6 +21,14 @@ type AnthropicRequest struct {
 	Temperature *float64           `json:"temperature,omitempty"`
 	TopP        *float64           `json:"top_p,omitempty"`
 	StopSeqs    []string           `json:"stop_sequences,omitempty"`
+	Thinking    *AnthropicThinking `json:"thinking,omitempty"`
+	ToolChoice  json.RawMessage    `json:"tool_choice,omitempty"`
+}
+
+// AnthropicThinking configures extended thinking in the Anthropic API.
+type AnthropicThinking struct {
+	Type         string `json:"type"`                    // "enabled" | "adaptive" | "disabled"
+	BudgetTokens int    `json:"budget_tokens,omitempty"` // max thinking tokens
 }
 
 // AnthropicMessage is a single message in the Anthropic conversation.
@@ -120,143 +128,29 @@ type AnthropicDelta struct {
 	StopSequence *string `json:"stop_sequence,omitempty"`
 }
 
-// ---------------------------------------------------------------------------
-// OpenAI Chat Completions API types
-// ---------------------------------------------------------------------------
-
-// ChatRequest is the request body for POST /v1/chat/completions.
-type ChatRequest struct {
-	Model       string          `json:"model"`
-	Messages    []ChatMessage   `json:"messages"`
-	MaxTokens   *int            `json:"max_tokens,omitempty"`
-	Temperature *float64        `json:"temperature,omitempty"`
-	TopP        *float64        `json:"top_p,omitempty"`
-	Stream      bool            `json:"stream,omitempty"`
-	Tools       []ChatTool      `json:"tools,omitempty"`
-	Stop        json.RawMessage `json:"stop,omitempty"` // string or []string
-}
-
-// ChatMessage is a single message in the Chat Completions conversation.
-type ChatMessage struct {
-	Role    string          `json:"role"`              // "system" | "user" | "assistant" | "tool"
-	Content json.RawMessage `json:"content,omitempty"` // string or []ChatContentPart
-
-	// assistant fields
-	ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
-
-	// tool fields
-	ToolCallID string `json:"tool_call_id,omitempty"`
-
-	// Copilot-specific reasoning passthrough
-	ReasoningText   string `json:"reasoning_text,omitempty"`
-	ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
-}
-
-// ChatContentPart is a typed content part in a multi-part message.
-type ChatContentPart struct {
-	Type string `json:"type"` // "text" | "image_url"
-	Text string `json:"text,omitempty"`
-}
-
-// ChatToolCall represents a tool invocation in an assistant message.
-// In streaming deltas, Index identifies which tool call is being updated.
-type ChatToolCall struct {
-	Index    int              `json:"index"`
-	ID       string           `json:"id,omitempty"`
-	Type     string           `json:"type,omitempty"` // "function"
-	Function ChatFunctionCall `json:"function"`
-}
-
-// ChatFunctionCall holds the function name and arguments.
-type ChatFunctionCall struct {
-	Name      string `json:"name"`
-	Arguments string `json:"arguments"`
-}
-
-// ChatTool describes a tool available to the model.
-type ChatTool struct {
-	Type     string       `json:"type"` // "function"
-	Function ChatFunction `json:"function"`
-}
-
-// ChatFunction is the function definition inside a ChatTool.
-type ChatFunction struct {
-	Name        string          `json:"name"`
-	Description string          `json:"description,omitempty"`
-	Parameters  json.RawMessage `json:"parameters,omitempty"` // JSON Schema
-}
-
-// ChatResponse is the non-streaming response from POST /v1/chat/completions.
-type ChatResponse struct {
-	ID      string       `json:"id"`
-	Object  string       `json:"object"` // "chat.completion"
-	Created int64        `json:"created"`
-	Model   string       `json:"model"`
-	Choices []ChatChoice `json:"choices"`
-	Usage   *ChatUsage   `json:"usage,omitempty"`
-}
-
-// ChatChoice is one completion choice.
-type ChatChoice struct {
-	Index        int         `json:"index"`
-	Message      ChatMessage `json:"message"`
-	FinishReason string      `json:"finish_reason"`
-}
-
-// ChatUsage holds token counts in Chat Completions format.
-type ChatUsage struct {
-	PromptTokens     int `json:"prompt_tokens"`
-	CompletionTokens int `json:"completion_tokens"`
-	TotalTokens      int `json:"total_tokens"`
-}
-
-// ---------------------------------------------------------------------------
-// Chat Completions SSE types
-// ---------------------------------------------------------------------------
-
-// ChatStreamChunk is a single SSE chunk in the Chat Completions streaming protocol.
-type ChatStreamChunk struct {
-	ID      string             `json:"id"`
-	Object  string             `json:"object"` // "chat.completion.chunk"
-	Created int64              `json:"created"`
-	Model   string             `json:"model"`
-	Choices []ChatStreamChoice `json:"choices"`
-	Usage   *ChatUsage         `json:"usage,omitempty"`
-}
-
-// ChatStreamChoice is one choice inside a streaming chunk.
-type ChatStreamChoice struct {
-	Index        int             `json:"index"`
-	Delta        ChatStreamDelta `json:"delta"`
-	FinishReason *string         `json:"finish_reason"`
-}
-
-// ChatStreamDelta carries incremental content in a streaming chunk.
-type ChatStreamDelta struct {
-	Role      string         `json:"role,omitempty"`
-	Content   string         `json:"content,omitempty"`
-	ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
-
-	// Copilot-specific reasoning passthrough (streaming)
-	ReasoningText   string `json:"reasoning_text,omitempty"`
-	ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
-}
-
 // ---------------------------------------------------------------------------
 // OpenAI Responses API types
 // ---------------------------------------------------------------------------
 
 // ResponsesRequest is the request body for POST /v1/responses.
 type ResponsesRequest struct {
-	Model           string          `json:"model"`
-	Input           json.RawMessage `json:"input"` // string or []ResponsesInputItem
-	MaxOutputTokens *int            `json:"max_output_tokens,omitempty"`
-	Temperature     *float64        `json:"temperature,omitempty"`
-	TopP            *float64        `json:"top_p,omitempty"`
-	Stream          bool            `json:"stream,omitempty"`
-	Tools           []ResponsesTool `json:"tools,omitempty"`
-	Include         []string        `json:"include,omitempty"`
-	Store           *bool           `json:"store,omitempty"`
+	Model           string              `json:"model"`
+	Input           json.RawMessage     `json:"input"` // string or []ResponsesInputItem
+	MaxOutputTokens *int                `json:"max_output_tokens,omitempty"`
+	Temperature     *float64            `json:"temperature,omitempty"`
+	TopP            *float64            `json:"top_p,omitempty"`
+	Stream          bool                `json:"stream,omitempty"`
+	Tools           []ResponsesTool     `json:"tools,omitempty"`
+	Include         []string            `json:"include,omitempty"`
+	Store           *bool               `json:"store,omitempty"`
+	Reasoning       *ResponsesReasoning `json:"reasoning,omitempty"`
+	ToolChoice      json.RawMessage     `json:"tool_choice,omitempty"`
+}
+
+// ResponsesReasoning configures reasoning effort in the Responses API.
+type ResponsesReasoning struct {
+	Effort  string `json:"effort"`            // "low" | "medium" | "high"
+	Summary string `json:"summary,omitempty"` // "auto" | "concise" | "detailed"
 }
 
 // ResponsesInputItem is one item in the Responses API input array.
@@ -305,6 +199,15 @@ type ResponsesResponse struct {
 
 	// incomplete_details is present when status="incomplete"
 	IncompleteDetails *ResponsesIncompleteDetails `json:"incomplete_details,omitempty"`
+
+	// Error is present when status="failed"
+	Error *ResponsesError `json:"error,omitempty"`
+}
+
+// ResponsesError describes an error in a failed response.
+type ResponsesError struct {
+	Code    string `json:"code"`
+	Message string `json:"message"`
 }
 
 // ResponsesIncompleteDetails explains why a response is incomplete.
@@ -349,6 +252,16 @@ type ResponsesUsage struct {
 	OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
 }
 
+// ResponsesInputTokensDetails breaks down input token usage.
+type ResponsesInputTokensDetails struct {
+	CachedTokens int `json:"cached_tokens,omitempty"`
+}
+
+// ResponsesOutputTokensDetails breaks down output token usage.
+type ResponsesOutputTokensDetails struct {
+	ReasoningTokens int `json:"reasoning_tokens,omitempty"`
+}
+
 // ---------------------------------------------------------------------------
 // Responses SSE event types
 // ---------------------------------------------------------------------------
@@ -388,153 +301,6 @@ type ResponsesStreamEvent struct {
 	SequenceNumber int `json:"sequence_number,omitempty"`
 }
 
-// ResponsesOutputReasoning is a reasoning output item in the Responses API.
-// This type represents the "type":"reasoning" output item that contains
-// extended thinking from the model.
-type ResponsesOutputReasoning struct {
-	ID               string                      `json:"id,omitempty"`
-	Type             string                      `json:"type"`             // "reasoning"
-	Status           string                      `json:"status,omitempty"` // "in_progress" | "completed" | "incomplete"
-	EncryptedContent string                      `json:"encrypted_content,omitempty"`
-	Summary          []ResponsesReasoningSummary `json:"summary,omitempty"`
-}
-
-// ResponsesReasoningSummary is a summary text block inside a reasoning output.
-type ResponsesReasoningSummary struct {
-	Type string `json:"type"` // "summary_text"
-	Text string `json:"text"`
-}
-
-// ResponsesStreamState maintains the state for converting Responses streaming
-// events to Chat Completions format. It tracks content blocks, tool calls,
-// reasoning blocks, and other streaming artifacts.
-type ResponsesStreamState struct {
-	// Response metadata
-	ID      string
-	Model   string
-	Created int64
-
-	// Content tracking
-	ContentIndex  int
-	CurrentText   string
-	CurrentItemID string
-	PendingText   []string // Text to accumulate before emitting
-
-	// Tool call tracking
-	ToolCalls       []ResponsesToolCallState
-	CurrentToolCall *ResponsesToolCallState
-
-	// Reasoning tracking
-	ReasoningBlocks  []ResponsesReasoningState
-	CurrentReasoning *ResponsesReasoningState
-
-	// Usage tracking
-	InputTokens  int
-	OutputTokens int
-
-	// Status tracking
-	Status       string
-	FinishReason string
-}
-
-// ResponsesToolCallState tracks a single tool call during streaming.
-type ResponsesToolCallState struct {
-	Index      int
-	ItemID     string
-	CallID     string
-	Name       string
-	Arguments  string
-	Status     string
-	IsComplete bool
-}
-
-// ResponsesReasoningState tracks a reasoning block during streaming.
-type ResponsesReasoningState struct {
-	ItemID       string
-	SummaryIndex int
-	SummaryText  string
-	Status       string
-	IsComplete   bool
-}
-
-// ResponsesUsageDetail provides additional token usage details in Responses format.
-type ResponsesUsageDetail struct {
-	InputTokens  int `json:"input_tokens"`
-	OutputTokens int `json:"output_tokens"`
-	TotalTokens  int `json:"total_tokens"`
-
-	// Optional detailed breakdown
-	InputTokensDetails  *ResponsesInputTokensDetails  `json:"input_tokens_details,omitempty"`
-	OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
-}
-
-// ResponsesInputTokensDetails breaks down input token usage.
-type ResponsesInputTokensDetails struct {
-	CachedTokens int `json:"cached_tokens,omitempty"`
-}
-
-// ResponsesOutputTokensDetails breaks down output token usage.
-type ResponsesOutputTokensDetails struct {
-	ReasoningTokens int `json:"reasoning_tokens,omitempty"`
-}
-
-// ---------------------------------------------------------------------------
-// Finish reason mapping helpers
-// ---------------------------------------------------------------------------
-
-// ChatFinishToAnthropic maps a Chat Completions finish_reason to an Anthropic stop_reason.
-func ChatFinishToAnthropic(reason string) string {
-	switch reason {
-	case "stop":
-		return "end_turn"
-	case "tool_calls":
-		return "tool_use"
-	case "length":
-		return "max_tokens"
-	default:
-		return "end_turn"
-	}
-}
-
-// AnthropicStopToChat maps an Anthropic stop_reason to a Chat Completions finish_reason.
-func AnthropicStopToChat(reason string) string {
-	switch reason {
-	case "end_turn":
-		return "stop"
-	case "tool_use":
-		return "tool_calls"
-	case "max_tokens":
-		return "length"
-	default:
-		return "stop"
-	}
-}
-
-// ResponsesStatusToChat maps a Responses API status to a Chat Completions finish_reason.
-func ResponsesStatusToChat(status string, details *ResponsesIncompleteDetails) string {
-	switch status {
-	case "completed":
-		return "stop"
-	case "incomplete":
-		if details != nil && details.Reason == "max_output_tokens" {
-			return "length"
-		}
-		return "stop"
-	default:
-		return "stop"
-	}
-}
-
-// ChatFinishToResponsesStatus maps a Chat Completions finish_reason to a Responses status.
-func ChatFinishToResponsesStatus(reason string) string {
-	switch reason {
-	case "length":
-		return "incomplete"
-	default:
-		return "completed"
-	}
-}
-
 // ---------------------------------------------------------------------------
 // Shared constants
 // ---------------------------------------------------------------------------
diff --git a/backend/internal/service/openai_gateway_messages.go b/backend/internal/service/openai_gateway_messages.go
index 4fe89732..b728bb07 100644
--- a/backend/internal/service/openai_gateway_messages.go
+++ b/backend/internal/service/openai_gateway_messages.go
@@ -49,7 +49,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
 	mappedModel := account.GetMappedModel(originalModel)
 	responsesReq.Model = mappedModel
 
-	logger.L().Info("openai messages: model mapping applied",
+	logger.L().Debug("openai messages: model mapping applied",
 		zap.Int64("account_id", account.ID),
 		zap.String("original_model", originalModel),
 		zap.String("mapped_model", mappedModel),
@@ -67,7 +67,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
 		if err := json.Unmarshal(responsesBody, &reqBody); err != nil {
 			return nil, fmt.Errorf("unmarshal for codex transform: %w", err)
 		}
-		applyCodexOAuthTransform(reqBody, false)
+		applyCodexOAuthTransform(reqBody, false, false)
 		// OAuth codex transform forces stream=true upstream, so always use
 		// the streaming response handler regardless of what the client asked.
 		isStream = true
@@ -148,9 +148,9 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
 
 	// 9. Handle normal response
 	if isStream {
-		return s.handleAnthropicStreamingResponse(resp, c, originalModel, startTime)
+		return s.handleAnthropicStreamingResponse(resp, c, originalModel, mappedModel, startTime)
 	}
-	return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, startTime)
+	return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, mappedModel, startTime)
 }
 
 // handleAnthropicErrorResponse reads an upstream error and returns it in
@@ -200,6 +200,7 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
 	resp *http.Response,
 	c *gin.Context,
 	originalModel string,
+	mappedModel string,
 	startTime time.Time,
 ) (*OpenAIForwardResult, error) {
 	requestID := resp.Header.Get("x-request-id")
@@ -233,11 +234,12 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
 	c.JSON(http.StatusOK, anthropicResp)
 
 	return &OpenAIForwardResult{
-		RequestID: requestID,
-		Usage:     usage,
-		Model:     originalModel,
-		Stream:    false,
-		Duration:  time.Since(startTime),
+		RequestID:    requestID,
+		Usage:        usage,
+		Model:        originalModel,
+		BillingModel: mappedModel,
+		Stream:       false,
+		Duration:     time.Since(startTime),
 	}, nil
 }
 
@@ -247,6 +249,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
 	resp *http.Response,
 	c *gin.Context,
 	originalModel string,
+	mappedModel string,
 	startTime time.Time,
 ) (*OpenAIForwardResult, error) {
 	requestID := resp.Header.Get("x-request-id")
@@ -293,7 +296,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
 		}
 
 		// Extract usage from completion events
-		if (event.Type == "response.completed" || event.Type == "response.incomplete") &&
+		if (event.Type == "response.completed" || event.Type == "response.incomplete" || event.Type == "response.failed") &&
 			event.Response != nil && event.Response.Usage != nil {
 			usage = OpenAIUsage{
 				InputTokens:  event.Response.Usage.InputTokens,
@@ -324,6 +327,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
 					RequestID:    requestID,
 					Usage:        usage,
 					Model:        originalModel,
+					BillingModel: mappedModel,
 					Stream:       true,
 					Duration:     time.Since(startTime),
 					FirstTokenMs: firstTokenMs,
@@ -360,6 +364,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
 		RequestID:    requestID,
 		Usage:        usage,
 		Model:        originalModel,
+		BillingModel: mappedModel,
 		Stream:       true,
 		Duration:     time.Since(startTime),
 		FirstTokenMs: firstTokenMs,
diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go
index 2110f032..73a24cad 100644
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -207,7 +207,12 @@ type OpenAIUsage struct {
 type OpenAIForwardResult struct {
 	RequestID string
 	Usage     OpenAIUsage
-	Model     string
+	Model     string // 原始模型（用于响应和日志显示）
+	// BillingModel is the model used for cost calculation.
+	// When non-empty, CalculateCost uses this instead of Model.
+	// This is set by the Anthropic Messages conversion path where
+	// the mapped upstream model differs from the client-facing model.
+	BillingModel string
 	// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
 	// Stored for usage records display; nil means not provided / not applicable.
 	ReasoningEffort *string
@@ -3610,7 +3615,11 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
 		multiplier = resolver.Resolve(ctx, user.ID, *apiKey.GroupID, apiKey.Group.RateMultiplier)
 	}
 
-	cost, err := s.billingService.CalculateCost(result.Model, tokens, multiplier)
+	billingModel := result.Model
+	if result.BillingModel != "" {
+		billingModel = result.BillingModel
+	}
+	cost, err := s.billingService.CalculateCost(billingModel, tokens, multiplier)
 	if err != nil {
 		cost = &CostBreakdown{ActualCost: 0}
 	}
@@ -3630,7 +3639,7 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
 		APIKeyID:              apiKey.ID,
 		AccountID:             account.ID,
 		RequestID:             result.RequestID,
-		Model:                 result.Model,
+		Model:                 billingModel,
 		ReasoningEffort:       result.ReasoningEffort,
 		InputTokens:           actualInputTokens,
 		OutputTokens:          result.Usage.OutputTokens,