feat: /v1/messages端点适配codex账号池
This commit is contained in:
@@ -670,8 +670,14 @@ func (h *OpenAIGatewayHandler) anthropicStreamingAwareError(c *gin.Context, stat
|
||||
if streamStarted {
|
||||
flusher, ok := c.Writer.(http.Flusher)
|
||||
if ok {
|
||||
errorEvent := "event: error\ndata: " + `{"type":"error","error":{"type":` + strconv.Quote(errType) + `,"message":` + strconv.Quote(message) + `}}` + "\n\n"
|
||||
fmt.Fprint(c.Writer, errorEvent) //nolint:errcheck
|
||||
errPayload, _ := json.Marshal(gin.H{
|
||||
"type": "error",
|
||||
"error": gin.H{
|
||||
"type": errType,
|
||||
"message": message,
|
||||
},
|
||||
})
|
||||
fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errPayload) //nolint:errcheck
|
||||
flusher.Flush()
|
||||
}
|
||||
return
|
||||
|
||||
@@ -532,3 +532,204 @@ func TestResponsesAnthropicEventToSSE(t *testing.T) {
|
||||
assert.Contains(t, sse, "data: ")
|
||||
assert.Contains(t, sse, `"resp_1"`)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// response.failed tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestStreamingFailed(t *testing.T) {
|
||||
state := NewResponsesEventToAnthropicState()
|
||||
|
||||
// 1. response.created
|
||||
ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||
Type: "response.created",
|
||||
Response: &ResponsesResponse{ID: "resp_fail_1", Model: "gpt-5.2"},
|
||||
}, state)
|
||||
|
||||
// 2. Some text output before failure
|
||||
ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||
Type: "response.output_text.delta",
|
||||
Delta: "Partial output before failure",
|
||||
}, state)
|
||||
|
||||
// 3. response.failed
|
||||
events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||
Type: "response.failed",
|
||||
Response: &ResponsesResponse{
|
||||
Status: "failed",
|
||||
Error: &ResponsesError{Code: "server_error", Message: "Internal error"},
|
||||
Usage: &ResponsesUsage{InputTokens: 50, OutputTokens: 10},
|
||||
},
|
||||
}, state)
|
||||
|
||||
// Should close text block + message_delta + message_stop
|
||||
require.Len(t, events, 3)
|
||||
assert.Equal(t, "content_block_stop", events[0].Type)
|
||||
assert.Equal(t, "message_delta", events[1].Type)
|
||||
assert.Equal(t, "end_turn", events[1].Delta.StopReason)
|
||||
assert.Equal(t, 50, events[1].Usage.InputTokens)
|
||||
assert.Equal(t, 10, events[1].Usage.OutputTokens)
|
||||
assert.Equal(t, "message_stop", events[2].Type)
|
||||
}
|
||||
|
||||
func TestStreamingFailedNoOutput(t *testing.T) {
|
||||
state := NewResponsesEventToAnthropicState()
|
||||
|
||||
// 1. response.created
|
||||
ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||
Type: "response.created",
|
||||
Response: &ResponsesResponse{ID: "resp_fail_2", Model: "gpt-5.2"},
|
||||
}, state)
|
||||
|
||||
// 2. response.failed with no prior output
|
||||
events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||
Type: "response.failed",
|
||||
Response: &ResponsesResponse{
|
||||
Status: "failed",
|
||||
Error: &ResponsesError{Code: "rate_limit_error", Message: "Too many requests"},
|
||||
Usage: &ResponsesUsage{InputTokens: 20, OutputTokens: 0},
|
||||
},
|
||||
}, state)
|
||||
|
||||
// Should emit message_delta + message_stop (no block to close)
|
||||
require.Len(t, events, 2)
|
||||
assert.Equal(t, "message_delta", events[0].Type)
|
||||
assert.Equal(t, "end_turn", events[0].Delta.StopReason)
|
||||
assert.Equal(t, "message_stop", events[1].Type)
|
||||
}
|
||||
|
||||
func TestResponsesToAnthropic_Failed(t *testing.T) {
|
||||
resp := &ResponsesResponse{
|
||||
ID: "resp_fail_3",
|
||||
Model: "gpt-5.2",
|
||||
Status: "failed",
|
||||
Error: &ResponsesError{Code: "server_error", Message: "Something went wrong"},
|
||||
Output: []ResponsesOutput{},
|
||||
Usage: &ResponsesUsage{InputTokens: 30, OutputTokens: 0},
|
||||
}
|
||||
|
||||
anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
|
||||
// Failed status defaults to "end_turn" stop reason
|
||||
assert.Equal(t, "end_turn", anth.StopReason)
|
||||
// Should have at least an empty text block
|
||||
require.Len(t, anth.Content, 1)
|
||||
assert.Equal(t, "text", anth.Content[0].Type)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// thinking → reasoning conversion tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestAnthropicToResponses_ThinkingEnabled(t *testing.T) {
|
||||
req := &AnthropicRequest{
|
||||
Model: "gpt-5.2",
|
||||
MaxTokens: 1024,
|
||||
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||
Thinking: &AnthropicThinking{Type: "enabled", BudgetTokens: 10000},
|
||||
}
|
||||
|
||||
resp, err := AnthropicToResponses(req)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp.Reasoning)
|
||||
assert.Equal(t, "high", resp.Reasoning.Effort)
|
||||
assert.Equal(t, "auto", resp.Reasoning.Summary)
|
||||
assert.Contains(t, resp.Include, "reasoning.encrypted_content")
|
||||
assert.NotContains(t, resp.Include, "reasoning.summary")
|
||||
}
|
||||
|
||||
func TestAnthropicToResponses_ThinkingAdaptive(t *testing.T) {
|
||||
req := &AnthropicRequest{
|
||||
Model: "gpt-5.2",
|
||||
MaxTokens: 1024,
|
||||
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||
Thinking: &AnthropicThinking{Type: "adaptive", BudgetTokens: 5000},
|
||||
}
|
||||
|
||||
resp, err := AnthropicToResponses(req)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp.Reasoning)
|
||||
assert.Equal(t, "medium", resp.Reasoning.Effort)
|
||||
assert.Equal(t, "auto", resp.Reasoning.Summary)
|
||||
assert.NotContains(t, resp.Include, "reasoning.summary")
|
||||
}
|
||||
|
||||
func TestAnthropicToResponses_ThinkingDisabled(t *testing.T) {
|
||||
req := &AnthropicRequest{
|
||||
Model: "gpt-5.2",
|
||||
MaxTokens: 1024,
|
||||
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||
Thinking: &AnthropicThinking{Type: "disabled"},
|
||||
}
|
||||
|
||||
resp, err := AnthropicToResponses(req)
|
||||
require.NoError(t, err)
|
||||
assert.Nil(t, resp.Reasoning)
|
||||
assert.NotContains(t, resp.Include, "reasoning.summary")
|
||||
}
|
||||
|
||||
func TestAnthropicToResponses_NoThinking(t *testing.T) {
|
||||
req := &AnthropicRequest{
|
||||
Model: "gpt-5.2",
|
||||
MaxTokens: 1024,
|
||||
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||
}
|
||||
|
||||
resp, err := AnthropicToResponses(req)
|
||||
require.NoError(t, err)
|
||||
assert.Nil(t, resp.Reasoning)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// tool_choice conversion tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
func TestAnthropicToResponses_ToolChoiceAuto(t *testing.T) {
|
||||
req := &AnthropicRequest{
|
||||
Model: "gpt-5.2",
|
||||
MaxTokens: 1024,
|
||||
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||
ToolChoice: json.RawMessage(`{"type":"auto"}`),
|
||||
}
|
||||
|
||||
resp, err := AnthropicToResponses(req)
|
||||
require.NoError(t, err)
|
||||
|
||||
var tc string
|
||||
require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
|
||||
assert.Equal(t, "auto", tc)
|
||||
}
|
||||
|
||||
func TestAnthropicToResponses_ToolChoiceAny(t *testing.T) {
|
||||
req := &AnthropicRequest{
|
||||
Model: "gpt-5.2",
|
||||
MaxTokens: 1024,
|
||||
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||
ToolChoice: json.RawMessage(`{"type":"any"}`),
|
||||
}
|
||||
|
||||
resp, err := AnthropicToResponses(req)
|
||||
require.NoError(t, err)
|
||||
|
||||
var tc string
|
||||
require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
|
||||
assert.Equal(t, "required", tc)
|
||||
}
|
||||
|
||||
func TestAnthropicToResponses_ToolChoiceSpecific(t *testing.T) {
|
||||
req := &AnthropicRequest{
|
||||
Model: "gpt-5.2",
|
||||
MaxTokens: 1024,
|
||||
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||
ToolChoice: json.RawMessage(`{"type":"tool","name":"get_weather"}`),
|
||||
}
|
||||
|
||||
resp, err := AnthropicToResponses(req)
|
||||
require.NoError(t, err)
|
||||
|
||||
var tc map[string]any
|
||||
require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
|
||||
assert.Equal(t, "function", tc["type"])
|
||||
fn, ok := tc["function"].(map[string]any)
|
||||
require.True(t, ok)
|
||||
assert.Equal(t, "get_weather", fn["name"])
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package apicompat
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
@@ -44,9 +45,65 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) {
|
||||
out.Tools = convertAnthropicToolsToResponses(req.Tools)
|
||||
}
|
||||
|
||||
// Convert thinking → reasoning.
|
||||
// generate_summary="auto" causes the upstream to emit reasoning_summary_text
|
||||
// streaming events; the include array only needs reasoning.encrypted_content
|
||||
// (already set above) for content continuity.
|
||||
if req.Thinking != nil {
|
||||
switch req.Thinking.Type {
|
||||
case "enabled":
|
||||
out.Reasoning = &ResponsesReasoning{Effort: "high", Summary: "auto"}
|
||||
case "adaptive":
|
||||
out.Reasoning = &ResponsesReasoning{Effort: "medium", Summary: "auto"}
|
||||
}
|
||||
// "disabled" or unknown → omit reasoning
|
||||
}
|
||||
|
||||
// Convert tool_choice
|
||||
if len(req.ToolChoice) > 0 {
|
||||
tc, err := convertAnthropicToolChoiceToResponses(req.ToolChoice)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("convert tool_choice: %w", err)
|
||||
}
|
||||
out.ToolChoice = tc
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// convertAnthropicToolChoiceToResponses maps Anthropic tool_choice to Responses format.
|
||||
//
|
||||
// {"type":"auto"} → "auto"
|
||||
// {"type":"any"} → "required"
|
||||
// {"type":"none"} → "none"
|
||||
// {"type":"tool","name":"X"} → {"type":"function","function":{"name":"X"}}
|
||||
func convertAnthropicToolChoiceToResponses(raw json.RawMessage) (json.RawMessage, error) {
|
||||
var tc struct {
|
||||
Type string `json:"type"`
|
||||
Name string `json:"name"`
|
||||
}
|
||||
if err := json.Unmarshal(raw, &tc); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch tc.Type {
|
||||
case "auto":
|
||||
return json.Marshal("auto")
|
||||
case "any":
|
||||
return json.Marshal("required")
|
||||
case "none":
|
||||
return json.Marshal("none")
|
||||
case "tool":
|
||||
return json.Marshal(map[string]any{
|
||||
"type": "function",
|
||||
"function": map[string]string{"name": tc.Name},
|
||||
})
|
||||
default:
|
||||
// Pass through unknown types as-is
|
||||
return raw, nil
|
||||
}
|
||||
}
|
||||
|
||||
// convertAnthropicToResponsesInput builds the Responses API input items array
|
||||
// from the Anthropic system field and message list.
|
||||
func convertAnthropicToResponsesInput(system json.RawMessage, msgs []AnthropicMessage) ([]ResponsesInputItem, error) {
|
||||
|
||||
@@ -153,7 +153,7 @@ func ResponsesEventToAnthropicEvents(
|
||||
return resToAnthHandleReasoningDelta(evt, state)
|
||||
case "response.reasoning_summary_text.done":
|
||||
return resToAnthHandleBlockDone(state)
|
||||
case "response.completed", "response.incomplete":
|
||||
case "response.completed", "response.incomplete", "response.failed":
|
||||
return resToAnthHandleCompleted(evt, state)
|
||||
default:
|
||||
return nil
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// Package apicompat provides type definitions and conversion utilities for
|
||||
// translating between Anthropic Messages, OpenAI Chat Completions, and OpenAI
|
||||
// Responses API formats. It enables multi-protocol support so that clients
|
||||
// using different API formats can be served through a unified gateway.
|
||||
// translating between Anthropic Messages and OpenAI Responses API formats.
|
||||
// It enables multi-protocol support so that clients using different API
|
||||
// formats can be served through a unified gateway.
|
||||
package apicompat
|
||||
|
||||
import "encoding/json"
|
||||
@@ -21,6 +21,14 @@ type AnthropicRequest struct {
|
||||
Temperature *float64 `json:"temperature,omitempty"`
|
||||
TopP *float64 `json:"top_p,omitempty"`
|
||||
StopSeqs []string `json:"stop_sequences,omitempty"`
|
||||
Thinking *AnthropicThinking `json:"thinking,omitempty"`
|
||||
ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
|
||||
}
|
||||
|
||||
// AnthropicThinking configures extended thinking in the Anthropic API.
|
||||
type AnthropicThinking struct {
|
||||
Type string `json:"type"` // "enabled" | "adaptive" | "disabled"
|
||||
BudgetTokens int `json:"budget_tokens,omitempty"` // max thinking tokens
|
||||
}
|
||||
|
||||
// AnthropicMessage is a single message in the Anthropic conversation.
|
||||
@@ -120,143 +128,29 @@ type AnthropicDelta struct {
|
||||
StopSequence *string `json:"stop_sequence,omitempty"`
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// OpenAI Chat Completions API types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// ChatRequest is the request body for POST /v1/chat/completions.
|
||||
type ChatRequest struct {
|
||||
Model string `json:"model"`
|
||||
Messages []ChatMessage `json:"messages"`
|
||||
MaxTokens *int `json:"max_tokens,omitempty"`
|
||||
Temperature *float64 `json:"temperature,omitempty"`
|
||||
TopP *float64 `json:"top_p,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
Tools []ChatTool `json:"tools,omitempty"`
|
||||
Stop json.RawMessage `json:"stop,omitempty"` // string or []string
|
||||
}
|
||||
|
||||
// ChatMessage is a single message in the Chat Completions conversation.
|
||||
type ChatMessage struct {
|
||||
Role string `json:"role"` // "system" | "user" | "assistant" | "tool"
|
||||
Content json.RawMessage `json:"content,omitempty"` // string or []ChatContentPart
|
||||
|
||||
// assistant fields
|
||||
ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
|
||||
|
||||
// tool fields
|
||||
ToolCallID string `json:"tool_call_id,omitempty"`
|
||||
|
||||
// Copilot-specific reasoning passthrough
|
||||
ReasoningText string `json:"reasoning_text,omitempty"`
|
||||
ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
|
||||
}
|
||||
|
||||
// ChatContentPart is a typed content part in a multi-part message.
|
||||
type ChatContentPart struct {
|
||||
Type string `json:"type"` // "text" | "image_url"
|
||||
Text string `json:"text,omitempty"`
|
||||
}
|
||||
|
||||
// ChatToolCall represents a tool invocation in an assistant message.
|
||||
// In streaming deltas, Index identifies which tool call is being updated.
|
||||
type ChatToolCall struct {
|
||||
Index int `json:"index"`
|
||||
ID string `json:"id,omitempty"`
|
||||
Type string `json:"type,omitempty"` // "function"
|
||||
Function ChatFunctionCall `json:"function"`
|
||||
}
|
||||
|
||||
// ChatFunctionCall holds the function name and arguments.
|
||||
type ChatFunctionCall struct {
|
||||
Name string `json:"name"`
|
||||
Arguments string `json:"arguments"`
|
||||
}
|
||||
|
||||
// ChatTool describes a tool available to the model.
|
||||
type ChatTool struct {
|
||||
Type string `json:"type"` // "function"
|
||||
Function ChatFunction `json:"function"`
|
||||
}
|
||||
|
||||
// ChatFunction is the function definition inside a ChatTool.
|
||||
type ChatFunction struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Parameters json.RawMessage `json:"parameters,omitempty"` // JSON Schema
|
||||
}
|
||||
|
||||
// ChatResponse is the non-streaming response from POST /v1/chat/completions.
|
||||
type ChatResponse struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"` // "chat.completion"
|
||||
Created int64 `json:"created"`
|
||||
Model string `json:"model"`
|
||||
Choices []ChatChoice `json:"choices"`
|
||||
Usage *ChatUsage `json:"usage,omitempty"`
|
||||
}
|
||||
|
||||
// ChatChoice is one completion choice.
|
||||
type ChatChoice struct {
|
||||
Index int `json:"index"`
|
||||
Message ChatMessage `json:"message"`
|
||||
FinishReason string `json:"finish_reason"`
|
||||
}
|
||||
|
||||
// ChatUsage holds token counts in Chat Completions format.
|
||||
type ChatUsage struct {
|
||||
PromptTokens int `json:"prompt_tokens"`
|
||||
CompletionTokens int `json:"completion_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Chat Completions SSE types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// ChatStreamChunk is a single SSE chunk in the Chat Completions streaming protocol.
|
||||
type ChatStreamChunk struct {
|
||||
ID string `json:"id"`
|
||||
Object string `json:"object"` // "chat.completion.chunk"
|
||||
Created int64 `json:"created"`
|
||||
Model string `json:"model"`
|
||||
Choices []ChatStreamChoice `json:"choices"`
|
||||
Usage *ChatUsage `json:"usage,omitempty"`
|
||||
}
|
||||
|
||||
// ChatStreamChoice is one choice inside a streaming chunk.
|
||||
type ChatStreamChoice struct {
|
||||
Index int `json:"index"`
|
||||
Delta ChatStreamDelta `json:"delta"`
|
||||
FinishReason *string `json:"finish_reason"`
|
||||
}
|
||||
|
||||
// ChatStreamDelta carries incremental content in a streaming chunk.
|
||||
type ChatStreamDelta struct {
|
||||
Role string `json:"role,omitempty"`
|
||||
Content string `json:"content,omitempty"`
|
||||
ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
|
||||
|
||||
// Copilot-specific reasoning passthrough (streaming)
|
||||
ReasoningText string `json:"reasoning_text,omitempty"`
|
||||
ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// OpenAI Responses API types
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// ResponsesRequest is the request body for POST /v1/responses.
|
||||
type ResponsesRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input json.RawMessage `json:"input"` // string or []ResponsesInputItem
|
||||
MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
|
||||
Temperature *float64 `json:"temperature,omitempty"`
|
||||
TopP *float64 `json:"top_p,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
Tools []ResponsesTool `json:"tools,omitempty"`
|
||||
Include []string `json:"include,omitempty"`
|
||||
Store *bool `json:"store,omitempty"`
|
||||
Model string `json:"model"`
|
||||
Input json.RawMessage `json:"input"` // string or []ResponsesInputItem
|
||||
MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
|
||||
Temperature *float64 `json:"temperature,omitempty"`
|
||||
TopP *float64 `json:"top_p,omitempty"`
|
||||
Stream bool `json:"stream,omitempty"`
|
||||
Tools []ResponsesTool `json:"tools,omitempty"`
|
||||
Include []string `json:"include,omitempty"`
|
||||
Store *bool `json:"store,omitempty"`
|
||||
Reasoning *ResponsesReasoning `json:"reasoning,omitempty"`
|
||||
ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
|
||||
}
|
||||
|
||||
// ResponsesReasoning configures reasoning effort in the Responses API.
|
||||
type ResponsesReasoning struct {
|
||||
Effort string `json:"effort"` // "low" | "medium" | "high"
|
||||
Summary string `json:"summary,omitempty"` // "auto" | "concise" | "detailed"
|
||||
}
|
||||
|
||||
// ResponsesInputItem is one item in the Responses API input array.
|
||||
@@ -305,6 +199,15 @@ type ResponsesResponse struct {
|
||||
|
||||
// incomplete_details is present when status="incomplete"
|
||||
IncompleteDetails *ResponsesIncompleteDetails `json:"incomplete_details,omitempty"`
|
||||
|
||||
// Error is present when status="failed"
|
||||
Error *ResponsesError `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// ResponsesError describes an error in a failed response.
|
||||
type ResponsesError struct {
|
||||
Code string `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
// ResponsesIncompleteDetails explains why a response is incomplete.
|
||||
@@ -349,6 +252,16 @@ type ResponsesUsage struct {
|
||||
OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
|
||||
}
|
||||
|
||||
// ResponsesInputTokensDetails breaks down input token usage.
|
||||
type ResponsesInputTokensDetails struct {
|
||||
CachedTokens int `json:"cached_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// ResponsesOutputTokensDetails breaks down output token usage.
|
||||
type ResponsesOutputTokensDetails struct {
|
||||
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Responses SSE event types
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -388,153 +301,6 @@ type ResponsesStreamEvent struct {
|
||||
SequenceNumber int `json:"sequence_number,omitempty"`
|
||||
}
|
||||
|
||||
// ResponsesOutputReasoning is a reasoning output item in the Responses API.
|
||||
// This type represents the "type":"reasoning" output item that contains
|
||||
// extended thinking from the model.
|
||||
type ResponsesOutputReasoning struct {
|
||||
ID string `json:"id,omitempty"`
|
||||
Type string `json:"type"` // "reasoning"
|
||||
Status string `json:"status,omitempty"` // "in_progress" | "completed" | "incomplete"
|
||||
EncryptedContent string `json:"encrypted_content,omitempty"`
|
||||
Summary []ResponsesReasoningSummary `json:"summary,omitempty"`
|
||||
}
|
||||
|
||||
// ResponsesReasoningSummary is a summary text block inside a reasoning output.
|
||||
type ResponsesReasoningSummary struct {
|
||||
Type string `json:"type"` // "summary_text"
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
// ResponsesStreamState maintains the state for converting Responses streaming
|
||||
// events to Chat Completions format. It tracks content blocks, tool calls,
|
||||
// reasoning blocks, and other streaming artifacts.
|
||||
type ResponsesStreamState struct {
|
||||
// Response metadata
|
||||
ID string
|
||||
Model string
|
||||
Created int64
|
||||
|
||||
// Content tracking
|
||||
ContentIndex int
|
||||
CurrentText string
|
||||
CurrentItemID string
|
||||
PendingText []string // Text to accumulate before emitting
|
||||
|
||||
// Tool call tracking
|
||||
ToolCalls []ResponsesToolCallState
|
||||
CurrentToolCall *ResponsesToolCallState
|
||||
|
||||
// Reasoning tracking
|
||||
ReasoningBlocks []ResponsesReasoningState
|
||||
CurrentReasoning *ResponsesReasoningState
|
||||
|
||||
// Usage tracking
|
||||
InputTokens int
|
||||
OutputTokens int
|
||||
|
||||
// Status tracking
|
||||
Status string
|
||||
FinishReason string
|
||||
}
|
||||
|
||||
// ResponsesToolCallState tracks a single tool call during streaming.
|
||||
type ResponsesToolCallState struct {
|
||||
Index int
|
||||
ItemID string
|
||||
CallID string
|
||||
Name string
|
||||
Arguments string
|
||||
Status string
|
||||
IsComplete bool
|
||||
}
|
||||
|
||||
// ResponsesReasoningState tracks a reasoning block during streaming.
|
||||
type ResponsesReasoningState struct {
|
||||
ItemID string
|
||||
SummaryIndex int
|
||||
SummaryText string
|
||||
Status string
|
||||
IsComplete bool
|
||||
}
|
||||
|
||||
// ResponsesUsageDetail provides additional token usage details in Responses format.
|
||||
type ResponsesUsageDetail struct {
|
||||
InputTokens int `json:"input_tokens"`
|
||||
OutputTokens int `json:"output_tokens"`
|
||||
TotalTokens int `json:"total_tokens"`
|
||||
|
||||
// Optional detailed breakdown
|
||||
InputTokensDetails *ResponsesInputTokensDetails `json:"input_tokens_details,omitempty"`
|
||||
OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
|
||||
}
|
||||
|
||||
// ResponsesInputTokensDetails breaks down input token usage.
|
||||
type ResponsesInputTokensDetails struct {
|
||||
CachedTokens int `json:"cached_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// ResponsesOutputTokensDetails breaks down output token usage.
|
||||
type ResponsesOutputTokensDetails struct {
|
||||
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Finish reason mapping helpers
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
// ChatFinishToAnthropic maps a Chat Completions finish_reason to an Anthropic stop_reason.
|
||||
func ChatFinishToAnthropic(reason string) string {
|
||||
switch reason {
|
||||
case "stop":
|
||||
return "end_turn"
|
||||
case "tool_calls":
|
||||
return "tool_use"
|
||||
case "length":
|
||||
return "max_tokens"
|
||||
default:
|
||||
return "end_turn"
|
||||
}
|
||||
}
|
||||
|
||||
// AnthropicStopToChat maps an Anthropic stop_reason to a Chat Completions finish_reason.
|
||||
func AnthropicStopToChat(reason string) string {
|
||||
switch reason {
|
||||
case "end_turn":
|
||||
return "stop"
|
||||
case "tool_use":
|
||||
return "tool_calls"
|
||||
case "max_tokens":
|
||||
return "length"
|
||||
default:
|
||||
return "stop"
|
||||
}
|
||||
}
|
||||
|
||||
// ResponsesStatusToChat maps a Responses API status to a Chat Completions finish_reason.
|
||||
func ResponsesStatusToChat(status string, details *ResponsesIncompleteDetails) string {
|
||||
switch status {
|
||||
case "completed":
|
||||
return "stop"
|
||||
case "incomplete":
|
||||
if details != nil && details.Reason == "max_output_tokens" {
|
||||
return "length"
|
||||
}
|
||||
return "stop"
|
||||
default:
|
||||
return "stop"
|
||||
}
|
||||
}
|
||||
|
||||
// ChatFinishToResponsesStatus maps a Chat Completions finish_reason to a Responses status.
|
||||
func ChatFinishToResponsesStatus(reason string) string {
|
||||
switch reason {
|
||||
case "length":
|
||||
return "incomplete"
|
||||
default:
|
||||
return "completed"
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Shared constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
@@ -49,7 +49,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
|
||||
mappedModel := account.GetMappedModel(originalModel)
|
||||
responsesReq.Model = mappedModel
|
||||
|
||||
logger.L().Info("openai messages: model mapping applied",
|
||||
logger.L().Debug("openai messages: model mapping applied",
|
||||
zap.Int64("account_id", account.ID),
|
||||
zap.String("original_model", originalModel),
|
||||
zap.String("mapped_model", mappedModel),
|
||||
@@ -67,7 +67,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
|
||||
if err := json.Unmarshal(responsesBody, &reqBody); err != nil {
|
||||
return nil, fmt.Errorf("unmarshal for codex transform: %w", err)
|
||||
}
|
||||
applyCodexOAuthTransform(reqBody, false)
|
||||
applyCodexOAuthTransform(reqBody, false, false)
|
||||
// OAuth codex transform forces stream=true upstream, so always use
|
||||
// the streaming response handler regardless of what the client asked.
|
||||
isStream = true
|
||||
@@ -148,9 +148,9 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
|
||||
|
||||
// 9. Handle normal response
|
||||
if isStream {
|
||||
return s.handleAnthropicStreamingResponse(resp, c, originalModel, startTime)
|
||||
return s.handleAnthropicStreamingResponse(resp, c, originalModel, mappedModel, startTime)
|
||||
}
|
||||
return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, startTime)
|
||||
return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, mappedModel, startTime)
|
||||
}
|
||||
|
||||
// handleAnthropicErrorResponse reads an upstream error and returns it in
|
||||
@@ -200,6 +200,7 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
|
||||
resp *http.Response,
|
||||
c *gin.Context,
|
||||
originalModel string,
|
||||
mappedModel string,
|
||||
startTime time.Time,
|
||||
) (*OpenAIForwardResult, error) {
|
||||
requestID := resp.Header.Get("x-request-id")
|
||||
@@ -233,11 +234,12 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
|
||||
c.JSON(http.StatusOK, anthropicResp)
|
||||
|
||||
return &OpenAIForwardResult{
|
||||
RequestID: requestID,
|
||||
Usage: usage,
|
||||
Model: originalModel,
|
||||
Stream: false,
|
||||
Duration: time.Since(startTime),
|
||||
RequestID: requestID,
|
||||
Usage: usage,
|
||||
Model: originalModel,
|
||||
BillingModel: mappedModel,
|
||||
Stream: false,
|
||||
Duration: time.Since(startTime),
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -247,6 +249,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
|
||||
resp *http.Response,
|
||||
c *gin.Context,
|
||||
originalModel string,
|
||||
mappedModel string,
|
||||
startTime time.Time,
|
||||
) (*OpenAIForwardResult, error) {
|
||||
requestID := resp.Header.Get("x-request-id")
|
||||
@@ -293,7 +296,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
|
||||
}
|
||||
|
||||
// Extract usage from completion events
|
||||
if (event.Type == "response.completed" || event.Type == "response.incomplete") &&
|
||||
if (event.Type == "response.completed" || event.Type == "response.incomplete" || event.Type == "response.failed") &&
|
||||
event.Response != nil && event.Response.Usage != nil {
|
||||
usage = OpenAIUsage{
|
||||
InputTokens: event.Response.Usage.InputTokens,
|
||||
@@ -324,6 +327,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
|
||||
RequestID: requestID,
|
||||
Usage: usage,
|
||||
Model: originalModel,
|
||||
BillingModel: mappedModel,
|
||||
Stream: true,
|
||||
Duration: time.Since(startTime),
|
||||
FirstTokenMs: firstTokenMs,
|
||||
@@ -360,6 +364,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
|
||||
RequestID: requestID,
|
||||
Usage: usage,
|
||||
Model: originalModel,
|
||||
BillingModel: mappedModel,
|
||||
Stream: true,
|
||||
Duration: time.Since(startTime),
|
||||
FirstTokenMs: firstTokenMs,
|
||||
|
||||
@@ -207,7 +207,12 @@ type OpenAIUsage struct {
|
||||
type OpenAIForwardResult struct {
|
||||
RequestID string
|
||||
Usage OpenAIUsage
|
||||
Model string
|
||||
Model string // 原始模型(用于响应和日志显示)
|
||||
// BillingModel is the model used for cost calculation.
|
||||
// When non-empty, CalculateCost uses this instead of Model.
|
||||
// This is set by the Anthropic Messages conversion path where
|
||||
// the mapped upstream model differs from the client-facing model.
|
||||
BillingModel string
|
||||
// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
|
||||
// Stored for usage records display; nil means not provided / not applicable.
|
||||
ReasoningEffort *string
|
||||
@@ -3610,7 +3615,11 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
|
||||
multiplier = resolver.Resolve(ctx, user.ID, *apiKey.GroupID, apiKey.Group.RateMultiplier)
|
||||
}
|
||||
|
||||
cost, err := s.billingService.CalculateCost(result.Model, tokens, multiplier)
|
||||
billingModel := result.Model
|
||||
if result.BillingModel != "" {
|
||||
billingModel = result.BillingModel
|
||||
}
|
||||
cost, err := s.billingService.CalculateCost(billingModel, tokens, multiplier)
|
||||
if err != nil {
|
||||
cost = &CostBreakdown{ActualCost: 0}
|
||||
}
|
||||
@@ -3630,7 +3639,7 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
|
||||
APIKeyID: apiKey.ID,
|
||||
AccountID: account.ID,
|
||||
RequestID: result.RequestID,
|
||||
Model: result.Model,
|
||||
Model: billingModel,
|
||||
ReasoningEffort: result.ReasoningEffort,
|
||||
InputTokens: actualInputTokens,
|
||||
OutputTokens: result.Usage.OutputTokens,
|
||||
|
||||
Reference in New Issue
Block a user