diff --git a/backend/internal/handler/openai_gateway_handler.go b/backend/internal/handler/openai_gateway_handler.go index ccb61974..483b3032 100644 --- a/backend/internal/handler/openai_gateway_handler.go +++ b/backend/internal/handler/openai_gateway_handler.go @@ -670,8 +670,14 @@ func (h *OpenAIGatewayHandler) anthropicStreamingAwareError(c *gin.Context, stat if streamStarted { flusher, ok := c.Writer.(http.Flusher) if ok { - errorEvent := "event: error\ndata: " + `{"type":"error","error":{"type":` + strconv.Quote(errType) + `,"message":` + strconv.Quote(message) + `}}` + "\n\n" - fmt.Fprint(c.Writer, errorEvent) //nolint:errcheck + errPayload, _ := json.Marshal(gin.H{ + "type": "error", + "error": gin.H{ + "type": errType, + "message": message, + }, + }) + fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errPayload) //nolint:errcheck flusher.Flush() } return diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go index 20eeb969..60c54539 100644 --- a/backend/internal/pkg/apicompat/anthropic_responses_test.go +++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go @@ -532,3 +532,204 @@ func TestResponsesAnthropicEventToSSE(t *testing.T) { assert.Contains(t, sse, "data: ") assert.Contains(t, sse, `"resp_1"`) } + +// --------------------------------------------------------------------------- +// response.failed tests +// --------------------------------------------------------------------------- + +func TestStreamingFailed(t *testing.T) { + state := NewResponsesEventToAnthropicState() + + // 1. response.created + ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{ + Type: "response.created", + Response: &ResponsesResponse{ID: "resp_fail_1", Model: "gpt-5.2"}, + }, state) + + // 2. Some text output before failure + ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{ + Type: "response.output_text.delta", + Delta: "Partial output before failure", + }, state) + + // 3. response.failed + events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{ + Type: "response.failed", + Response: &ResponsesResponse{ + Status: "failed", + Error: &ResponsesError{Code: "server_error", Message: "Internal error"}, + Usage: &ResponsesUsage{InputTokens: 50, OutputTokens: 10}, + }, + }, state) + + // Should close text block + message_delta + message_stop + require.Len(t, events, 3) + assert.Equal(t, "content_block_stop", events[0].Type) + assert.Equal(t, "message_delta", events[1].Type) + assert.Equal(t, "end_turn", events[1].Delta.StopReason) + assert.Equal(t, 50, events[1].Usage.InputTokens) + assert.Equal(t, 10, events[1].Usage.OutputTokens) + assert.Equal(t, "message_stop", events[2].Type) +} + +func TestStreamingFailedNoOutput(t *testing.T) { + state := NewResponsesEventToAnthropicState() + + // 1. response.created + ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{ + Type: "response.created", + Response: &ResponsesResponse{ID: "resp_fail_2", Model: "gpt-5.2"}, + }, state) + + // 2. response.failed with no prior output + events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{ + Type: "response.failed", + Response: &ResponsesResponse{ + Status: "failed", + Error: &ResponsesError{Code: "rate_limit_error", Message: "Too many requests"}, + Usage: &ResponsesUsage{InputTokens: 20, OutputTokens: 0}, + }, + }, state) + + // Should emit message_delta + message_stop (no block to close) + require.Len(t, events, 2) + assert.Equal(t, "message_delta", events[0].Type) + assert.Equal(t, "end_turn", events[0].Delta.StopReason) + assert.Equal(t, "message_stop", events[1].Type) +} + +func TestResponsesToAnthropic_Failed(t *testing.T) { + resp := &ResponsesResponse{ + ID: "resp_fail_3", + Model: "gpt-5.2", + Status: "failed", + Error: &ResponsesError{Code: "server_error", Message: "Something went wrong"}, + Output: []ResponsesOutput{}, + Usage: &ResponsesUsage{InputTokens: 30, OutputTokens: 0}, + } + + anth := ResponsesToAnthropic(resp, "claude-opus-4-6") + // Failed status defaults to "end_turn" stop reason + assert.Equal(t, "end_turn", anth.StopReason) + // Should have at least an empty text block + require.Len(t, anth.Content, 1) + assert.Equal(t, "text", anth.Content[0].Type) +} + +// --------------------------------------------------------------------------- +// thinking → reasoning conversion tests +// --------------------------------------------------------------------------- + +func TestAnthropicToResponses_ThinkingEnabled(t *testing.T) { + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + Thinking: &AnthropicThinking{Type: "enabled", BudgetTokens: 10000}, + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + require.NotNil(t, resp.Reasoning) + assert.Equal(t, "high", resp.Reasoning.Effort) + assert.Equal(t, "auto", resp.Reasoning.Summary) + assert.Contains(t, resp.Include, "reasoning.encrypted_content") + assert.NotContains(t, resp.Include, "reasoning.summary") +} + +func TestAnthropicToResponses_ThinkingAdaptive(t *testing.T) { + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + Thinking: &AnthropicThinking{Type: "adaptive", BudgetTokens: 5000}, + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + require.NotNil(t, resp.Reasoning) + assert.Equal(t, "medium", resp.Reasoning.Effort) + assert.Equal(t, "auto", resp.Reasoning.Summary) + assert.NotContains(t, resp.Include, "reasoning.summary") +} + +func TestAnthropicToResponses_ThinkingDisabled(t *testing.T) { + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + Thinking: &AnthropicThinking{Type: "disabled"}, + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + assert.Nil(t, resp.Reasoning) + assert.NotContains(t, resp.Include, "reasoning.summary") +} + +func TestAnthropicToResponses_NoThinking(t *testing.T) { + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + assert.Nil(t, resp.Reasoning) +} + +// --------------------------------------------------------------------------- +// tool_choice conversion tests +// --------------------------------------------------------------------------- + +func TestAnthropicToResponses_ToolChoiceAuto(t *testing.T) { + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + ToolChoice: json.RawMessage(`{"type":"auto"}`), + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + + var tc string + require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc)) + assert.Equal(t, "auto", tc) +} + +func TestAnthropicToResponses_ToolChoiceAny(t *testing.T) { + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + ToolChoice: json.RawMessage(`{"type":"any"}`), + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + + var tc string + require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc)) + assert.Equal(t, "required", tc) +} + +func TestAnthropicToResponses_ToolChoiceSpecific(t *testing.T) { + req := &AnthropicRequest{ + Model: "gpt-5.2", + MaxTokens: 1024, + Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}}, + ToolChoice: json.RawMessage(`{"type":"tool","name":"get_weather"}`), + } + + resp, err := AnthropicToResponses(req) + require.NoError(t, err) + + var tc map[string]any + require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc)) + assert.Equal(t, "function", tc["type"]) + fn, ok := tc["function"].(map[string]any) + require.True(t, ok) + assert.Equal(t, "get_weather", fn["name"]) +} diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go index 1a13658d..cc0c9e6c 100644 --- a/backend/internal/pkg/apicompat/anthropic_to_responses.go +++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go @@ -2,6 +2,7 @@ package apicompat import ( "encoding/json" + "fmt" "strings" ) @@ -44,9 +45,65 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) { out.Tools = convertAnthropicToolsToResponses(req.Tools) } + // Convert thinking → reasoning. + // generate_summary="auto" causes the upstream to emit reasoning_summary_text + // streaming events; the include array only needs reasoning.encrypted_content + // (already set above) for content continuity. + if req.Thinking != nil { + switch req.Thinking.Type { + case "enabled": + out.Reasoning = &ResponsesReasoning{Effort: "high", Summary: "auto"} + case "adaptive": + out.Reasoning = &ResponsesReasoning{Effort: "medium", Summary: "auto"} + } + // "disabled" or unknown → omit reasoning + } + + // Convert tool_choice + if len(req.ToolChoice) > 0 { + tc, err := convertAnthropicToolChoiceToResponses(req.ToolChoice) + if err != nil { + return nil, fmt.Errorf("convert tool_choice: %w", err) + } + out.ToolChoice = tc + } + return out, nil } +// convertAnthropicToolChoiceToResponses maps Anthropic tool_choice to Responses format. +// +// {"type":"auto"} → "auto" +// {"type":"any"} → "required" +// {"type":"none"} → "none" +// {"type":"tool","name":"X"} → {"type":"function","function":{"name":"X"}} +func convertAnthropicToolChoiceToResponses(raw json.RawMessage) (json.RawMessage, error) { + var tc struct { + Type string `json:"type"` + Name string `json:"name"` + } + if err := json.Unmarshal(raw, &tc); err != nil { + return nil, err + } + + switch tc.Type { + case "auto": + return json.Marshal("auto") + case "any": + return json.Marshal("required") + case "none": + return json.Marshal("none") + case "tool": + return json.Marshal(map[string]any{ + "type": "function", + "function": map[string]string{"name": tc.Name}, + }) + default: + // Pass through unknown types as-is + return raw, nil + } +} + // convertAnthropicToResponsesInput builds the Responses API input items array // from the Anthropic system field and message list. func convertAnthropicToResponsesInput(system json.RawMessage, msgs []AnthropicMessage) ([]ResponsesInputItem, error) { diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic.go b/backend/internal/pkg/apicompat/responses_to_anthropic.go index 16770650..39d36cf4 100644 --- a/backend/internal/pkg/apicompat/responses_to_anthropic.go +++ b/backend/internal/pkg/apicompat/responses_to_anthropic.go @@ -153,7 +153,7 @@ func ResponsesEventToAnthropicEvents( return resToAnthHandleReasoningDelta(evt, state) case "response.reasoning_summary_text.done": return resToAnthHandleBlockDone(state) - case "response.completed", "response.incomplete": + case "response.completed", "response.incomplete", "response.failed": return resToAnthHandleCompleted(evt, state) default: return nil diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go index 92e85318..435f5032 100644 --- a/backend/internal/pkg/apicompat/types.go +++ b/backend/internal/pkg/apicompat/types.go @@ -1,7 +1,7 @@ // Package apicompat provides type definitions and conversion utilities for -// translating between Anthropic Messages, OpenAI Chat Completions, and OpenAI -// Responses API formats. It enables multi-protocol support so that clients -// using different API formats can be served through a unified gateway. +// translating between Anthropic Messages and OpenAI Responses API formats. +// It enables multi-protocol support so that clients using different API +// formats can be served through a unified gateway. package apicompat import "encoding/json" @@ -21,6 +21,14 @@ type AnthropicRequest struct { Temperature *float64 `json:"temperature,omitempty"` TopP *float64 `json:"top_p,omitempty"` StopSeqs []string `json:"stop_sequences,omitempty"` + Thinking *AnthropicThinking `json:"thinking,omitempty"` + ToolChoice json.RawMessage `json:"tool_choice,omitempty"` +} + +// AnthropicThinking configures extended thinking in the Anthropic API. +type AnthropicThinking struct { + Type string `json:"type"` // "enabled" | "adaptive" | "disabled" + BudgetTokens int `json:"budget_tokens,omitempty"` // max thinking tokens } // AnthropicMessage is a single message in the Anthropic conversation. @@ -120,143 +128,29 @@ type AnthropicDelta struct { StopSequence *string `json:"stop_sequence,omitempty"` } -// --------------------------------------------------------------------------- -// OpenAI Chat Completions API types -// --------------------------------------------------------------------------- - -// ChatRequest is the request body for POST /v1/chat/completions. -type ChatRequest struct { - Model string `json:"model"` - Messages []ChatMessage `json:"messages"` - MaxTokens *int `json:"max_tokens,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - Stream bool `json:"stream,omitempty"` - Tools []ChatTool `json:"tools,omitempty"` - Stop json.RawMessage `json:"stop,omitempty"` // string or []string -} - -// ChatMessage is a single message in the Chat Completions conversation. -type ChatMessage struct { - Role string `json:"role"` // "system" | "user" | "assistant" | "tool" - Content json.RawMessage `json:"content,omitempty"` // string or []ChatContentPart - - // assistant fields - ToolCalls []ChatToolCall `json:"tool_calls,omitempty"` - - // tool fields - ToolCallID string `json:"tool_call_id,omitempty"` - - // Copilot-specific reasoning passthrough - ReasoningText string `json:"reasoning_text,omitempty"` - ReasoningOpaque string `json:"reasoning_opaque,omitempty"` -} - -// ChatContentPart is a typed content part in a multi-part message. -type ChatContentPart struct { - Type string `json:"type"` // "text" | "image_url" - Text string `json:"text,omitempty"` -} - -// ChatToolCall represents a tool invocation in an assistant message. -// In streaming deltas, Index identifies which tool call is being updated. -type ChatToolCall struct { - Index int `json:"index"` - ID string `json:"id,omitempty"` - Type string `json:"type,omitempty"` // "function" - Function ChatFunctionCall `json:"function"` -} - -// ChatFunctionCall holds the function name and arguments. -type ChatFunctionCall struct { - Name string `json:"name"` - Arguments string `json:"arguments"` -} - -// ChatTool describes a tool available to the model. -type ChatTool struct { - Type string `json:"type"` // "function" - Function ChatFunction `json:"function"` -} - -// ChatFunction is the function definition inside a ChatTool. -type ChatFunction struct { - Name string `json:"name"` - Description string `json:"description,omitempty"` - Parameters json.RawMessage `json:"parameters,omitempty"` // JSON Schema -} - -// ChatResponse is the non-streaming response from POST /v1/chat/completions. -type ChatResponse struct { - ID string `json:"id"` - Object string `json:"object"` // "chat.completion" - Created int64 `json:"created"` - Model string `json:"model"` - Choices []ChatChoice `json:"choices"` - Usage *ChatUsage `json:"usage,omitempty"` -} - -// ChatChoice is one completion choice. -type ChatChoice struct { - Index int `json:"index"` - Message ChatMessage `json:"message"` - FinishReason string `json:"finish_reason"` -} - -// ChatUsage holds token counts in Chat Completions format. -type ChatUsage struct { - PromptTokens int `json:"prompt_tokens"` - CompletionTokens int `json:"completion_tokens"` - TotalTokens int `json:"total_tokens"` -} - -// --------------------------------------------------------------------------- -// Chat Completions SSE types -// --------------------------------------------------------------------------- - -// ChatStreamChunk is a single SSE chunk in the Chat Completions streaming protocol. -type ChatStreamChunk struct { - ID string `json:"id"` - Object string `json:"object"` // "chat.completion.chunk" - Created int64 `json:"created"` - Model string `json:"model"` - Choices []ChatStreamChoice `json:"choices"` - Usage *ChatUsage `json:"usage,omitempty"` -} - -// ChatStreamChoice is one choice inside a streaming chunk. -type ChatStreamChoice struct { - Index int `json:"index"` - Delta ChatStreamDelta `json:"delta"` - FinishReason *string `json:"finish_reason"` -} - -// ChatStreamDelta carries incremental content in a streaming chunk. -type ChatStreamDelta struct { - Role string `json:"role,omitempty"` - Content string `json:"content,omitempty"` - ToolCalls []ChatToolCall `json:"tool_calls,omitempty"` - - // Copilot-specific reasoning passthrough (streaming) - ReasoningText string `json:"reasoning_text,omitempty"` - ReasoningOpaque string `json:"reasoning_opaque,omitempty"` -} - // --------------------------------------------------------------------------- // OpenAI Responses API types // --------------------------------------------------------------------------- // ResponsesRequest is the request body for POST /v1/responses. type ResponsesRequest struct { - Model string `json:"model"` - Input json.RawMessage `json:"input"` // string or []ResponsesInputItem - MaxOutputTokens *int `json:"max_output_tokens,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP *float64 `json:"top_p,omitempty"` - Stream bool `json:"stream,omitempty"` - Tools []ResponsesTool `json:"tools,omitempty"` - Include []string `json:"include,omitempty"` - Store *bool `json:"store,omitempty"` + Model string `json:"model"` + Input json.RawMessage `json:"input"` // string or []ResponsesInputItem + MaxOutputTokens *int `json:"max_output_tokens,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP *float64 `json:"top_p,omitempty"` + Stream bool `json:"stream,omitempty"` + Tools []ResponsesTool `json:"tools,omitempty"` + Include []string `json:"include,omitempty"` + Store *bool `json:"store,omitempty"` + Reasoning *ResponsesReasoning `json:"reasoning,omitempty"` + ToolChoice json.RawMessage `json:"tool_choice,omitempty"` +} + +// ResponsesReasoning configures reasoning effort in the Responses API. +type ResponsesReasoning struct { + Effort string `json:"effort"` // "low" | "medium" | "high" + Summary string `json:"summary,omitempty"` // "auto" | "concise" | "detailed" } // ResponsesInputItem is one item in the Responses API input array. @@ -305,6 +199,15 @@ type ResponsesResponse struct { // incomplete_details is present when status="incomplete" IncompleteDetails *ResponsesIncompleteDetails `json:"incomplete_details,omitempty"` + + // Error is present when status="failed" + Error *ResponsesError `json:"error,omitempty"` +} + +// ResponsesError describes an error in a failed response. +type ResponsesError struct { + Code string `json:"code"` + Message string `json:"message"` } // ResponsesIncompleteDetails explains why a response is incomplete. @@ -349,6 +252,16 @@ type ResponsesUsage struct { OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"` } +// ResponsesInputTokensDetails breaks down input token usage. +type ResponsesInputTokensDetails struct { + CachedTokens int `json:"cached_tokens,omitempty"` +} + +// ResponsesOutputTokensDetails breaks down output token usage. +type ResponsesOutputTokensDetails struct { + ReasoningTokens int `json:"reasoning_tokens,omitempty"` +} + // --------------------------------------------------------------------------- // Responses SSE event types // --------------------------------------------------------------------------- @@ -388,153 +301,6 @@ type ResponsesStreamEvent struct { SequenceNumber int `json:"sequence_number,omitempty"` } -// ResponsesOutputReasoning is a reasoning output item in the Responses API. -// This type represents the "type":"reasoning" output item that contains -// extended thinking from the model. -type ResponsesOutputReasoning struct { - ID string `json:"id,omitempty"` - Type string `json:"type"` // "reasoning" - Status string `json:"status,omitempty"` // "in_progress" | "completed" | "incomplete" - EncryptedContent string `json:"encrypted_content,omitempty"` - Summary []ResponsesReasoningSummary `json:"summary,omitempty"` -} - -// ResponsesReasoningSummary is a summary text block inside a reasoning output. -type ResponsesReasoningSummary struct { - Type string `json:"type"` // "summary_text" - Text string `json:"text"` -} - -// ResponsesStreamState maintains the state for converting Responses streaming -// events to Chat Completions format. It tracks content blocks, tool calls, -// reasoning blocks, and other streaming artifacts. -type ResponsesStreamState struct { - // Response metadata - ID string - Model string - Created int64 - - // Content tracking - ContentIndex int - CurrentText string - CurrentItemID string - PendingText []string // Text to accumulate before emitting - - // Tool call tracking - ToolCalls []ResponsesToolCallState - CurrentToolCall *ResponsesToolCallState - - // Reasoning tracking - ReasoningBlocks []ResponsesReasoningState - CurrentReasoning *ResponsesReasoningState - - // Usage tracking - InputTokens int - OutputTokens int - - // Status tracking - Status string - FinishReason string -} - -// ResponsesToolCallState tracks a single tool call during streaming. -type ResponsesToolCallState struct { - Index int - ItemID string - CallID string - Name string - Arguments string - Status string - IsComplete bool -} - -// ResponsesReasoningState tracks a reasoning block during streaming. -type ResponsesReasoningState struct { - ItemID string - SummaryIndex int - SummaryText string - Status string - IsComplete bool -} - -// ResponsesUsageDetail provides additional token usage details in Responses format. -type ResponsesUsageDetail struct { - InputTokens int `json:"input_tokens"` - OutputTokens int `json:"output_tokens"` - TotalTokens int `json:"total_tokens"` - - // Optional detailed breakdown - InputTokensDetails *ResponsesInputTokensDetails `json:"input_tokens_details,omitempty"` - OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"` -} - -// ResponsesInputTokensDetails breaks down input token usage. -type ResponsesInputTokensDetails struct { - CachedTokens int `json:"cached_tokens,omitempty"` -} - -// ResponsesOutputTokensDetails breaks down output token usage. -type ResponsesOutputTokensDetails struct { - ReasoningTokens int `json:"reasoning_tokens,omitempty"` -} - -// --------------------------------------------------------------------------- -// Finish reason mapping helpers -// --------------------------------------------------------------------------- - -// ChatFinishToAnthropic maps a Chat Completions finish_reason to an Anthropic stop_reason. -func ChatFinishToAnthropic(reason string) string { - switch reason { - case "stop": - return "end_turn" - case "tool_calls": - return "tool_use" - case "length": - return "max_tokens" - default: - return "end_turn" - } -} - -// AnthropicStopToChat maps an Anthropic stop_reason to a Chat Completions finish_reason. -func AnthropicStopToChat(reason string) string { - switch reason { - case "end_turn": - return "stop" - case "tool_use": - return "tool_calls" - case "max_tokens": - return "length" - default: - return "stop" - } -} - -// ResponsesStatusToChat maps a Responses API status to a Chat Completions finish_reason. -func ResponsesStatusToChat(status string, details *ResponsesIncompleteDetails) string { - switch status { - case "completed": - return "stop" - case "incomplete": - if details != nil && details.Reason == "max_output_tokens" { - return "length" - } - return "stop" - default: - return "stop" - } -} - -// ChatFinishToResponsesStatus maps a Chat Completions finish_reason to a Responses status. -func ChatFinishToResponsesStatus(reason string) string { - switch reason { - case "length": - return "incomplete" - default: - return "completed" - } -} - // --------------------------------------------------------------------------- // Shared constants // --------------------------------------------------------------------------- diff --git a/backend/internal/service/openai_gateway_messages.go b/backend/internal/service/openai_gateway_messages.go index 4fe89732..b728bb07 100644 --- a/backend/internal/service/openai_gateway_messages.go +++ b/backend/internal/service/openai_gateway_messages.go @@ -49,7 +49,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic( mappedModel := account.GetMappedModel(originalModel) responsesReq.Model = mappedModel - logger.L().Info("openai messages: model mapping applied", + logger.L().Debug("openai messages: model mapping applied", zap.Int64("account_id", account.ID), zap.String("original_model", originalModel), zap.String("mapped_model", mappedModel), @@ -67,7 +67,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic( if err := json.Unmarshal(responsesBody, &reqBody); err != nil { return nil, fmt.Errorf("unmarshal for codex transform: %w", err) } - applyCodexOAuthTransform(reqBody, false) + applyCodexOAuthTransform(reqBody, false, false) // OAuth codex transform forces stream=true upstream, so always use // the streaming response handler regardless of what the client asked. isStream = true @@ -148,9 +148,9 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic( // 9. Handle normal response if isStream { - return s.handleAnthropicStreamingResponse(resp, c, originalModel, startTime) + return s.handleAnthropicStreamingResponse(resp, c, originalModel, mappedModel, startTime) } - return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, startTime) + return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, mappedModel, startTime) } // handleAnthropicErrorResponse reads an upstream error and returns it in @@ -200,6 +200,7 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse( resp *http.Response, c *gin.Context, originalModel string, + mappedModel string, startTime time.Time, ) (*OpenAIForwardResult, error) { requestID := resp.Header.Get("x-request-id") @@ -233,11 +234,12 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse( c.JSON(http.StatusOK, anthropicResp) return &OpenAIForwardResult{ - RequestID: requestID, - Usage: usage, - Model: originalModel, - Stream: false, - Duration: time.Since(startTime), + RequestID: requestID, + Usage: usage, + Model: originalModel, + BillingModel: mappedModel, + Stream: false, + Duration: time.Since(startTime), }, nil } @@ -247,6 +249,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse( resp *http.Response, c *gin.Context, originalModel string, + mappedModel string, startTime time.Time, ) (*OpenAIForwardResult, error) { requestID := resp.Header.Get("x-request-id") @@ -293,7 +296,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse( } // Extract usage from completion events - if (event.Type == "response.completed" || event.Type == "response.incomplete") && + if (event.Type == "response.completed" || event.Type == "response.incomplete" || event.Type == "response.failed") && event.Response != nil && event.Response.Usage != nil { usage = OpenAIUsage{ InputTokens: event.Response.Usage.InputTokens, @@ -324,6 +327,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse( RequestID: requestID, Usage: usage, Model: originalModel, + BillingModel: mappedModel, Stream: true, Duration: time.Since(startTime), FirstTokenMs: firstTokenMs, @@ -360,6 +364,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse( RequestID: requestID, Usage: usage, Model: originalModel, + BillingModel: mappedModel, Stream: true, Duration: time.Since(startTime), FirstTokenMs: firstTokenMs, diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go index 2110f032..73a24cad 100644 --- a/backend/internal/service/openai_gateway_service.go +++ b/backend/internal/service/openai_gateway_service.go @@ -207,7 +207,12 @@ type OpenAIUsage struct { type OpenAIForwardResult struct { RequestID string Usage OpenAIUsage - Model string + Model string // 原始模型(用于响应和日志显示) + // BillingModel is the model used for cost calculation. + // When non-empty, CalculateCost uses this instead of Model. + // This is set by the Anthropic Messages conversion path where + // the mapped upstream model differs from the client-facing model. + BillingModel string // ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix. // Stored for usage records display; nil means not provided / not applicable. ReasoningEffort *string @@ -3610,7 +3615,11 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec multiplier = resolver.Resolve(ctx, user.ID, *apiKey.GroupID, apiKey.Group.RateMultiplier) } - cost, err := s.billingService.CalculateCost(result.Model, tokens, multiplier) + billingModel := result.Model + if result.BillingModel != "" { + billingModel = result.BillingModel + } + cost, err := s.billingService.CalculateCost(billingModel, tokens, multiplier) if err != nil { cost = &CostBreakdown{ActualCost: 0} } @@ -3630,7 +3639,7 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec APIKeyID: apiKey.ID, AccountID: account.ID, RequestID: result.RequestID, - Model: result.Model, + Model: billingModel, ReasoningEffort: result.ReasoningEffort, InputTokens: actualInputTokens, OutputTokens: result.Usage.OutputTokens,