feat: /v1/messages端点适配codex账号池
This commit is contained in:
@@ -670,8 +670,14 @@ func (h *OpenAIGatewayHandler) anthropicStreamingAwareError(c *gin.Context, stat
|
|||||||
if streamStarted {
|
if streamStarted {
|
||||||
flusher, ok := c.Writer.(http.Flusher)
|
flusher, ok := c.Writer.(http.Flusher)
|
||||||
if ok {
|
if ok {
|
||||||
errorEvent := "event: error\ndata: " + `{"type":"error","error":{"type":` + strconv.Quote(errType) + `,"message":` + strconv.Quote(message) + `}}` + "\n\n"
|
errPayload, _ := json.Marshal(gin.H{
|
||||||
fmt.Fprint(c.Writer, errorEvent) //nolint:errcheck
|
"type": "error",
|
||||||
|
"error": gin.H{
|
||||||
|
"type": errType,
|
||||||
|
"message": message,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errPayload) //nolint:errcheck
|
||||||
flusher.Flush()
|
flusher.Flush()
|
||||||
}
|
}
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -532,3 +532,204 @@ func TestResponsesAnthropicEventToSSE(t *testing.T) {
|
|||||||
assert.Contains(t, sse, "data: ")
|
assert.Contains(t, sse, "data: ")
|
||||||
assert.Contains(t, sse, `"resp_1"`)
|
assert.Contains(t, sse, `"resp_1"`)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// response.failed tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestStreamingFailed(t *testing.T) {
|
||||||
|
state := NewResponsesEventToAnthropicState()
|
||||||
|
|
||||||
|
// 1. response.created
|
||||||
|
ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||||
|
Type: "response.created",
|
||||||
|
Response: &ResponsesResponse{ID: "resp_fail_1", Model: "gpt-5.2"},
|
||||||
|
}, state)
|
||||||
|
|
||||||
|
// 2. Some text output before failure
|
||||||
|
ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||||
|
Type: "response.output_text.delta",
|
||||||
|
Delta: "Partial output before failure",
|
||||||
|
}, state)
|
||||||
|
|
||||||
|
// 3. response.failed
|
||||||
|
events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||||
|
Type: "response.failed",
|
||||||
|
Response: &ResponsesResponse{
|
||||||
|
Status: "failed",
|
||||||
|
Error: &ResponsesError{Code: "server_error", Message: "Internal error"},
|
||||||
|
Usage: &ResponsesUsage{InputTokens: 50, OutputTokens: 10},
|
||||||
|
},
|
||||||
|
}, state)
|
||||||
|
|
||||||
|
// Should close text block + message_delta + message_stop
|
||||||
|
require.Len(t, events, 3)
|
||||||
|
assert.Equal(t, "content_block_stop", events[0].Type)
|
||||||
|
assert.Equal(t, "message_delta", events[1].Type)
|
||||||
|
assert.Equal(t, "end_turn", events[1].Delta.StopReason)
|
||||||
|
assert.Equal(t, 50, events[1].Usage.InputTokens)
|
||||||
|
assert.Equal(t, 10, events[1].Usage.OutputTokens)
|
||||||
|
assert.Equal(t, "message_stop", events[2].Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStreamingFailedNoOutput(t *testing.T) {
|
||||||
|
state := NewResponsesEventToAnthropicState()
|
||||||
|
|
||||||
|
// 1. response.created
|
||||||
|
ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||||
|
Type: "response.created",
|
||||||
|
Response: &ResponsesResponse{ID: "resp_fail_2", Model: "gpt-5.2"},
|
||||||
|
}, state)
|
||||||
|
|
||||||
|
// 2. response.failed with no prior output
|
||||||
|
events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
|
||||||
|
Type: "response.failed",
|
||||||
|
Response: &ResponsesResponse{
|
||||||
|
Status: "failed",
|
||||||
|
Error: &ResponsesError{Code: "rate_limit_error", Message: "Too many requests"},
|
||||||
|
Usage: &ResponsesUsage{InputTokens: 20, OutputTokens: 0},
|
||||||
|
},
|
||||||
|
}, state)
|
||||||
|
|
||||||
|
// Should emit message_delta + message_stop (no block to close)
|
||||||
|
require.Len(t, events, 2)
|
||||||
|
assert.Equal(t, "message_delta", events[0].Type)
|
||||||
|
assert.Equal(t, "end_turn", events[0].Delta.StopReason)
|
||||||
|
assert.Equal(t, "message_stop", events[1].Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestResponsesToAnthropic_Failed(t *testing.T) {
|
||||||
|
resp := &ResponsesResponse{
|
||||||
|
ID: "resp_fail_3",
|
||||||
|
Model: "gpt-5.2",
|
||||||
|
Status: "failed",
|
||||||
|
Error: &ResponsesError{Code: "server_error", Message: "Something went wrong"},
|
||||||
|
Output: []ResponsesOutput{},
|
||||||
|
Usage: &ResponsesUsage{InputTokens: 30, OutputTokens: 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
|
||||||
|
// Failed status defaults to "end_turn" stop reason
|
||||||
|
assert.Equal(t, "end_turn", anth.StopReason)
|
||||||
|
// Should have at least an empty text block
|
||||||
|
require.Len(t, anth.Content, 1)
|
||||||
|
assert.Equal(t, "text", anth.Content[0].Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// thinking → reasoning conversion tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestAnthropicToResponses_ThinkingEnabled(t *testing.T) {
|
||||||
|
req := &AnthropicRequest{
|
||||||
|
Model: "gpt-5.2",
|
||||||
|
MaxTokens: 1024,
|
||||||
|
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||||
|
Thinking: &AnthropicThinking{Type: "enabled", BudgetTokens: 10000},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := AnthropicToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, resp.Reasoning)
|
||||||
|
assert.Equal(t, "high", resp.Reasoning.Effort)
|
||||||
|
assert.Equal(t, "auto", resp.Reasoning.Summary)
|
||||||
|
assert.Contains(t, resp.Include, "reasoning.encrypted_content")
|
||||||
|
assert.NotContains(t, resp.Include, "reasoning.summary")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAnthropicToResponses_ThinkingAdaptive(t *testing.T) {
|
||||||
|
req := &AnthropicRequest{
|
||||||
|
Model: "gpt-5.2",
|
||||||
|
MaxTokens: 1024,
|
||||||
|
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||||
|
Thinking: &AnthropicThinking{Type: "adaptive", BudgetTokens: 5000},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := AnthropicToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotNil(t, resp.Reasoning)
|
||||||
|
assert.Equal(t, "medium", resp.Reasoning.Effort)
|
||||||
|
assert.Equal(t, "auto", resp.Reasoning.Summary)
|
||||||
|
assert.NotContains(t, resp.Include, "reasoning.summary")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAnthropicToResponses_ThinkingDisabled(t *testing.T) {
|
||||||
|
req := &AnthropicRequest{
|
||||||
|
Model: "gpt-5.2",
|
||||||
|
MaxTokens: 1024,
|
||||||
|
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||||
|
Thinking: &AnthropicThinking{Type: "disabled"},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := AnthropicToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Nil(t, resp.Reasoning)
|
||||||
|
assert.NotContains(t, resp.Include, "reasoning.summary")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAnthropicToResponses_NoThinking(t *testing.T) {
|
||||||
|
req := &AnthropicRequest{
|
||||||
|
Model: "gpt-5.2",
|
||||||
|
MaxTokens: 1024,
|
||||||
|
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := AnthropicToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Nil(t, resp.Reasoning)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// tool_choice conversion tests
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
func TestAnthropicToResponses_ToolChoiceAuto(t *testing.T) {
|
||||||
|
req := &AnthropicRequest{
|
||||||
|
Model: "gpt-5.2",
|
||||||
|
MaxTokens: 1024,
|
||||||
|
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||||
|
ToolChoice: json.RawMessage(`{"type":"auto"}`),
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := AnthropicToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var tc string
|
||||||
|
require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
|
||||||
|
assert.Equal(t, "auto", tc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAnthropicToResponses_ToolChoiceAny(t *testing.T) {
|
||||||
|
req := &AnthropicRequest{
|
||||||
|
Model: "gpt-5.2",
|
||||||
|
MaxTokens: 1024,
|
||||||
|
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||||
|
ToolChoice: json.RawMessage(`{"type":"any"}`),
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := AnthropicToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var tc string
|
||||||
|
require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
|
||||||
|
assert.Equal(t, "required", tc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAnthropicToResponses_ToolChoiceSpecific(t *testing.T) {
|
||||||
|
req := &AnthropicRequest{
|
||||||
|
Model: "gpt-5.2",
|
||||||
|
MaxTokens: 1024,
|
||||||
|
Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
|
||||||
|
ToolChoice: json.RawMessage(`{"type":"tool","name":"get_weather"}`),
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := AnthropicToResponses(req)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
var tc map[string]any
|
||||||
|
require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
|
||||||
|
assert.Equal(t, "function", tc["type"])
|
||||||
|
fn, ok := tc["function"].(map[string]any)
|
||||||
|
require.True(t, ok)
|
||||||
|
assert.Equal(t, "get_weather", fn["name"])
|
||||||
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package apicompat
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -44,9 +45,65 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) {
|
|||||||
out.Tools = convertAnthropicToolsToResponses(req.Tools)
|
out.Tools = convertAnthropicToolsToResponses(req.Tools)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert thinking → reasoning.
|
||||||
|
// generate_summary="auto" causes the upstream to emit reasoning_summary_text
|
||||||
|
// streaming events; the include array only needs reasoning.encrypted_content
|
||||||
|
// (already set above) for content continuity.
|
||||||
|
if req.Thinking != nil {
|
||||||
|
switch req.Thinking.Type {
|
||||||
|
case "enabled":
|
||||||
|
out.Reasoning = &ResponsesReasoning{Effort: "high", Summary: "auto"}
|
||||||
|
case "adaptive":
|
||||||
|
out.Reasoning = &ResponsesReasoning{Effort: "medium", Summary: "auto"}
|
||||||
|
}
|
||||||
|
// "disabled" or unknown → omit reasoning
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert tool_choice
|
||||||
|
if len(req.ToolChoice) > 0 {
|
||||||
|
tc, err := convertAnthropicToolChoiceToResponses(req.ToolChoice)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("convert tool_choice: %w", err)
|
||||||
|
}
|
||||||
|
out.ToolChoice = tc
|
||||||
|
}
|
||||||
|
|
||||||
return out, nil
|
return out, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// convertAnthropicToolChoiceToResponses maps Anthropic tool_choice to Responses format.
|
||||||
|
//
|
||||||
|
// {"type":"auto"} → "auto"
|
||||||
|
// {"type":"any"} → "required"
|
||||||
|
// {"type":"none"} → "none"
|
||||||
|
// {"type":"tool","name":"X"} → {"type":"function","function":{"name":"X"}}
|
||||||
|
func convertAnthropicToolChoiceToResponses(raw json.RawMessage) (json.RawMessage, error) {
|
||||||
|
var tc struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
if err := json.Unmarshal(raw, &tc); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch tc.Type {
|
||||||
|
case "auto":
|
||||||
|
return json.Marshal("auto")
|
||||||
|
case "any":
|
||||||
|
return json.Marshal("required")
|
||||||
|
case "none":
|
||||||
|
return json.Marshal("none")
|
||||||
|
case "tool":
|
||||||
|
return json.Marshal(map[string]any{
|
||||||
|
"type": "function",
|
||||||
|
"function": map[string]string{"name": tc.Name},
|
||||||
|
})
|
||||||
|
default:
|
||||||
|
// Pass through unknown types as-is
|
||||||
|
return raw, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// convertAnthropicToResponsesInput builds the Responses API input items array
|
// convertAnthropicToResponsesInput builds the Responses API input items array
|
||||||
// from the Anthropic system field and message list.
|
// from the Anthropic system field and message list.
|
||||||
func convertAnthropicToResponsesInput(system json.RawMessage, msgs []AnthropicMessage) ([]ResponsesInputItem, error) {
|
func convertAnthropicToResponsesInput(system json.RawMessage, msgs []AnthropicMessage) ([]ResponsesInputItem, error) {
|
||||||
|
|||||||
@@ -153,7 +153,7 @@ func ResponsesEventToAnthropicEvents(
|
|||||||
return resToAnthHandleReasoningDelta(evt, state)
|
return resToAnthHandleReasoningDelta(evt, state)
|
||||||
case "response.reasoning_summary_text.done":
|
case "response.reasoning_summary_text.done":
|
||||||
return resToAnthHandleBlockDone(state)
|
return resToAnthHandleBlockDone(state)
|
||||||
case "response.completed", "response.incomplete":
|
case "response.completed", "response.incomplete", "response.failed":
|
||||||
return resToAnthHandleCompleted(evt, state)
|
return resToAnthHandleCompleted(evt, state)
|
||||||
default:
|
default:
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
// Package apicompat provides type definitions and conversion utilities for
|
// Package apicompat provides type definitions and conversion utilities for
|
||||||
// translating between Anthropic Messages, OpenAI Chat Completions, and OpenAI
|
// translating between Anthropic Messages and OpenAI Responses API formats.
|
||||||
// Responses API formats. It enables multi-protocol support so that clients
|
// It enables multi-protocol support so that clients using different API
|
||||||
// using different API formats can be served through a unified gateway.
|
// formats can be served through a unified gateway.
|
||||||
package apicompat
|
package apicompat
|
||||||
|
|
||||||
import "encoding/json"
|
import "encoding/json"
|
||||||
@@ -21,6 +21,14 @@ type AnthropicRequest struct {
|
|||||||
Temperature *float64 `json:"temperature,omitempty"`
|
Temperature *float64 `json:"temperature,omitempty"`
|
||||||
TopP *float64 `json:"top_p,omitempty"`
|
TopP *float64 `json:"top_p,omitempty"`
|
||||||
StopSeqs []string `json:"stop_sequences,omitempty"`
|
StopSeqs []string `json:"stop_sequences,omitempty"`
|
||||||
|
Thinking *AnthropicThinking `json:"thinking,omitempty"`
|
||||||
|
ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// AnthropicThinking configures extended thinking in the Anthropic API.
|
||||||
|
type AnthropicThinking struct {
|
||||||
|
Type string `json:"type"` // "enabled" | "adaptive" | "disabled"
|
||||||
|
BudgetTokens int `json:"budget_tokens,omitempty"` // max thinking tokens
|
||||||
}
|
}
|
||||||
|
|
||||||
// AnthropicMessage is a single message in the Anthropic conversation.
|
// AnthropicMessage is a single message in the Anthropic conversation.
|
||||||
@@ -120,143 +128,29 @@ type AnthropicDelta struct {
|
|||||||
StopSequence *string `json:"stop_sequence,omitempty"`
|
StopSequence *string `json:"stop_sequence,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// OpenAI Chat Completions API types
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
// ChatRequest is the request body for POST /v1/chat/completions.
|
|
||||||
type ChatRequest struct {
|
|
||||||
Model string `json:"model"`
|
|
||||||
Messages []ChatMessage `json:"messages"`
|
|
||||||
MaxTokens *int `json:"max_tokens,omitempty"`
|
|
||||||
Temperature *float64 `json:"temperature,omitempty"`
|
|
||||||
TopP *float64 `json:"top_p,omitempty"`
|
|
||||||
Stream bool `json:"stream,omitempty"`
|
|
||||||
Tools []ChatTool `json:"tools,omitempty"`
|
|
||||||
Stop json.RawMessage `json:"stop,omitempty"` // string or []string
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatMessage is a single message in the Chat Completions conversation.
|
|
||||||
type ChatMessage struct {
|
|
||||||
Role string `json:"role"` // "system" | "user" | "assistant" | "tool"
|
|
||||||
Content json.RawMessage `json:"content,omitempty"` // string or []ChatContentPart
|
|
||||||
|
|
||||||
// assistant fields
|
|
||||||
ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
|
|
||||||
|
|
||||||
// tool fields
|
|
||||||
ToolCallID string `json:"tool_call_id,omitempty"`
|
|
||||||
|
|
||||||
// Copilot-specific reasoning passthrough
|
|
||||||
ReasoningText string `json:"reasoning_text,omitempty"`
|
|
||||||
ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatContentPart is a typed content part in a multi-part message.
|
|
||||||
type ChatContentPart struct {
|
|
||||||
Type string `json:"type"` // "text" | "image_url"
|
|
||||||
Text string `json:"text,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatToolCall represents a tool invocation in an assistant message.
|
|
||||||
// In streaming deltas, Index identifies which tool call is being updated.
|
|
||||||
type ChatToolCall struct {
|
|
||||||
Index int `json:"index"`
|
|
||||||
ID string `json:"id,omitempty"`
|
|
||||||
Type string `json:"type,omitempty"` // "function"
|
|
||||||
Function ChatFunctionCall `json:"function"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatFunctionCall holds the function name and arguments.
|
|
||||||
type ChatFunctionCall struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
Arguments string `json:"arguments"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatTool describes a tool available to the model.
|
|
||||||
type ChatTool struct {
|
|
||||||
Type string `json:"type"` // "function"
|
|
||||||
Function ChatFunction `json:"function"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatFunction is the function definition inside a ChatTool.
|
|
||||||
type ChatFunction struct {
|
|
||||||
Name string `json:"name"`
|
|
||||||
Description string `json:"description,omitempty"`
|
|
||||||
Parameters json.RawMessage `json:"parameters,omitempty"` // JSON Schema
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatResponse is the non-streaming response from POST /v1/chat/completions.
|
|
||||||
type ChatResponse struct {
|
|
||||||
ID string `json:"id"`
|
|
||||||
Object string `json:"object"` // "chat.completion"
|
|
||||||
Created int64 `json:"created"`
|
|
||||||
Model string `json:"model"`
|
|
||||||
Choices []ChatChoice `json:"choices"`
|
|
||||||
Usage *ChatUsage `json:"usage,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatChoice is one completion choice.
|
|
||||||
type ChatChoice struct {
|
|
||||||
Index int `json:"index"`
|
|
||||||
Message ChatMessage `json:"message"`
|
|
||||||
FinishReason string `json:"finish_reason"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatUsage holds token counts in Chat Completions format.
|
|
||||||
type ChatUsage struct {
|
|
||||||
PromptTokens int `json:"prompt_tokens"`
|
|
||||||
CompletionTokens int `json:"completion_tokens"`
|
|
||||||
TotalTokens int `json:"total_tokens"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Chat Completions SSE types
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
// ChatStreamChunk is a single SSE chunk in the Chat Completions streaming protocol.
|
|
||||||
type ChatStreamChunk struct {
|
|
||||||
ID string `json:"id"`
|
|
||||||
Object string `json:"object"` // "chat.completion.chunk"
|
|
||||||
Created int64 `json:"created"`
|
|
||||||
Model string `json:"model"`
|
|
||||||
Choices []ChatStreamChoice `json:"choices"`
|
|
||||||
Usage *ChatUsage `json:"usage,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatStreamChoice is one choice inside a streaming chunk.
|
|
||||||
type ChatStreamChoice struct {
|
|
||||||
Index int `json:"index"`
|
|
||||||
Delta ChatStreamDelta `json:"delta"`
|
|
||||||
FinishReason *string `json:"finish_reason"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatStreamDelta carries incremental content in a streaming chunk.
|
|
||||||
type ChatStreamDelta struct {
|
|
||||||
Role string `json:"role,omitempty"`
|
|
||||||
Content string `json:"content,omitempty"`
|
|
||||||
ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
|
|
||||||
|
|
||||||
// Copilot-specific reasoning passthrough (streaming)
|
|
||||||
ReasoningText string `json:"reasoning_text,omitempty"`
|
|
||||||
ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// OpenAI Responses API types
|
// OpenAI Responses API types
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
// ResponsesRequest is the request body for POST /v1/responses.
|
// ResponsesRequest is the request body for POST /v1/responses.
|
||||||
type ResponsesRequest struct {
|
type ResponsesRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Input json.RawMessage `json:"input"` // string or []ResponsesInputItem
|
Input json.RawMessage `json:"input"` // string or []ResponsesInputItem
|
||||||
MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
|
MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
|
||||||
Temperature *float64 `json:"temperature,omitempty"`
|
Temperature *float64 `json:"temperature,omitempty"`
|
||||||
TopP *float64 `json:"top_p,omitempty"`
|
TopP *float64 `json:"top_p,omitempty"`
|
||||||
Stream bool `json:"stream,omitempty"`
|
Stream bool `json:"stream,omitempty"`
|
||||||
Tools []ResponsesTool `json:"tools,omitempty"`
|
Tools []ResponsesTool `json:"tools,omitempty"`
|
||||||
Include []string `json:"include,omitempty"`
|
Include []string `json:"include,omitempty"`
|
||||||
Store *bool `json:"store,omitempty"`
|
Store *bool `json:"store,omitempty"`
|
||||||
|
Reasoning *ResponsesReasoning `json:"reasoning,omitempty"`
|
||||||
|
ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResponsesReasoning configures reasoning effort in the Responses API.
|
||||||
|
type ResponsesReasoning struct {
|
||||||
|
Effort string `json:"effort"` // "low" | "medium" | "high"
|
||||||
|
Summary string `json:"summary,omitempty"` // "auto" | "concise" | "detailed"
|
||||||
}
|
}
|
||||||
|
|
||||||
// ResponsesInputItem is one item in the Responses API input array.
|
// ResponsesInputItem is one item in the Responses API input array.
|
||||||
@@ -305,6 +199,15 @@ type ResponsesResponse struct {
|
|||||||
|
|
||||||
// incomplete_details is present when status="incomplete"
|
// incomplete_details is present when status="incomplete"
|
||||||
IncompleteDetails *ResponsesIncompleteDetails `json:"incomplete_details,omitempty"`
|
IncompleteDetails *ResponsesIncompleteDetails `json:"incomplete_details,omitempty"`
|
||||||
|
|
||||||
|
// Error is present when status="failed"
|
||||||
|
Error *ResponsesError `json:"error,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResponsesError describes an error in a failed response.
|
||||||
|
type ResponsesError struct {
|
||||||
|
Code string `json:"code"`
|
||||||
|
Message string `json:"message"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ResponsesIncompleteDetails explains why a response is incomplete.
|
// ResponsesIncompleteDetails explains why a response is incomplete.
|
||||||
@@ -349,6 +252,16 @@ type ResponsesUsage struct {
|
|||||||
OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
|
OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ResponsesInputTokensDetails breaks down input token usage.
|
||||||
|
type ResponsesInputTokensDetails struct {
|
||||||
|
CachedTokens int `json:"cached_tokens,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ResponsesOutputTokensDetails breaks down output token usage.
|
||||||
|
type ResponsesOutputTokensDetails struct {
|
||||||
|
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Responses SSE event types
|
// Responses SSE event types
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -388,153 +301,6 @@ type ResponsesStreamEvent struct {
|
|||||||
SequenceNumber int `json:"sequence_number,omitempty"`
|
SequenceNumber int `json:"sequence_number,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// ResponsesOutputReasoning is a reasoning output item in the Responses API.
|
|
||||||
// This type represents the "type":"reasoning" output item that contains
|
|
||||||
// extended thinking from the model.
|
|
||||||
type ResponsesOutputReasoning struct {
|
|
||||||
ID string `json:"id,omitempty"`
|
|
||||||
Type string `json:"type"` // "reasoning"
|
|
||||||
Status string `json:"status,omitempty"` // "in_progress" | "completed" | "incomplete"
|
|
||||||
EncryptedContent string `json:"encrypted_content,omitempty"`
|
|
||||||
Summary []ResponsesReasoningSummary `json:"summary,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResponsesReasoningSummary is a summary text block inside a reasoning output.
|
|
||||||
type ResponsesReasoningSummary struct {
|
|
||||||
Type string `json:"type"` // "summary_text"
|
|
||||||
Text string `json:"text"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResponsesStreamState maintains the state for converting Responses streaming
|
|
||||||
// events to Chat Completions format. It tracks content blocks, tool calls,
|
|
||||||
// reasoning blocks, and other streaming artifacts.
|
|
||||||
type ResponsesStreamState struct {
|
|
||||||
// Response metadata
|
|
||||||
ID string
|
|
||||||
Model string
|
|
||||||
Created int64
|
|
||||||
|
|
||||||
// Content tracking
|
|
||||||
ContentIndex int
|
|
||||||
CurrentText string
|
|
||||||
CurrentItemID string
|
|
||||||
PendingText []string // Text to accumulate before emitting
|
|
||||||
|
|
||||||
// Tool call tracking
|
|
||||||
ToolCalls []ResponsesToolCallState
|
|
||||||
CurrentToolCall *ResponsesToolCallState
|
|
||||||
|
|
||||||
// Reasoning tracking
|
|
||||||
ReasoningBlocks []ResponsesReasoningState
|
|
||||||
CurrentReasoning *ResponsesReasoningState
|
|
||||||
|
|
||||||
// Usage tracking
|
|
||||||
InputTokens int
|
|
||||||
OutputTokens int
|
|
||||||
|
|
||||||
// Status tracking
|
|
||||||
Status string
|
|
||||||
FinishReason string
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResponsesToolCallState tracks a single tool call during streaming.
|
|
||||||
type ResponsesToolCallState struct {
|
|
||||||
Index int
|
|
||||||
ItemID string
|
|
||||||
CallID string
|
|
||||||
Name string
|
|
||||||
Arguments string
|
|
||||||
Status string
|
|
||||||
IsComplete bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResponsesReasoningState tracks a reasoning block during streaming.
|
|
||||||
type ResponsesReasoningState struct {
|
|
||||||
ItemID string
|
|
||||||
SummaryIndex int
|
|
||||||
SummaryText string
|
|
||||||
Status string
|
|
||||||
IsComplete bool
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResponsesUsageDetail provides additional token usage details in Responses format.
|
|
||||||
type ResponsesUsageDetail struct {
|
|
||||||
InputTokens int `json:"input_tokens"`
|
|
||||||
OutputTokens int `json:"output_tokens"`
|
|
||||||
TotalTokens int `json:"total_tokens"`
|
|
||||||
|
|
||||||
// Optional detailed breakdown
|
|
||||||
InputTokensDetails *ResponsesInputTokensDetails `json:"input_tokens_details,omitempty"`
|
|
||||||
OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResponsesInputTokensDetails breaks down input token usage.
|
|
||||||
type ResponsesInputTokensDetails struct {
|
|
||||||
CachedTokens int `json:"cached_tokens,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResponsesOutputTokensDetails breaks down output token usage.
|
|
||||||
type ResponsesOutputTokensDetails struct {
|
|
||||||
ReasoningTokens int `json:"reasoning_tokens,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Finish reason mapping helpers
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
|
|
||||||
// ChatFinishToAnthropic maps a Chat Completions finish_reason to an Anthropic stop_reason.
|
|
||||||
func ChatFinishToAnthropic(reason string) string {
|
|
||||||
switch reason {
|
|
||||||
case "stop":
|
|
||||||
return "end_turn"
|
|
||||||
case "tool_calls":
|
|
||||||
return "tool_use"
|
|
||||||
case "length":
|
|
||||||
return "max_tokens"
|
|
||||||
default:
|
|
||||||
return "end_turn"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// AnthropicStopToChat maps an Anthropic stop_reason to a Chat Completions finish_reason.
|
|
||||||
func AnthropicStopToChat(reason string) string {
|
|
||||||
switch reason {
|
|
||||||
case "end_turn":
|
|
||||||
return "stop"
|
|
||||||
case "tool_use":
|
|
||||||
return "tool_calls"
|
|
||||||
case "max_tokens":
|
|
||||||
return "length"
|
|
||||||
default:
|
|
||||||
return "stop"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResponsesStatusToChat maps a Responses API status to a Chat Completions finish_reason.
|
|
||||||
func ResponsesStatusToChat(status string, details *ResponsesIncompleteDetails) string {
|
|
||||||
switch status {
|
|
||||||
case "completed":
|
|
||||||
return "stop"
|
|
||||||
case "incomplete":
|
|
||||||
if details != nil && details.Reason == "max_output_tokens" {
|
|
||||||
return "length"
|
|
||||||
}
|
|
||||||
return "stop"
|
|
||||||
default:
|
|
||||||
return "stop"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ChatFinishToResponsesStatus maps a Chat Completions finish_reason to a Responses status.
|
|
||||||
func ChatFinishToResponsesStatus(reason string) string {
|
|
||||||
switch reason {
|
|
||||||
case "length":
|
|
||||||
return "incomplete"
|
|
||||||
default:
|
|
||||||
return "completed"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Shared constants
|
// Shared constants
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
@@ -49,7 +49,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
|
|||||||
mappedModel := account.GetMappedModel(originalModel)
|
mappedModel := account.GetMappedModel(originalModel)
|
||||||
responsesReq.Model = mappedModel
|
responsesReq.Model = mappedModel
|
||||||
|
|
||||||
logger.L().Info("openai messages: model mapping applied",
|
logger.L().Debug("openai messages: model mapping applied",
|
||||||
zap.Int64("account_id", account.ID),
|
zap.Int64("account_id", account.ID),
|
||||||
zap.String("original_model", originalModel),
|
zap.String("original_model", originalModel),
|
||||||
zap.String("mapped_model", mappedModel),
|
zap.String("mapped_model", mappedModel),
|
||||||
@@ -67,7 +67,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
|
|||||||
if err := json.Unmarshal(responsesBody, &reqBody); err != nil {
|
if err := json.Unmarshal(responsesBody, &reqBody); err != nil {
|
||||||
return nil, fmt.Errorf("unmarshal for codex transform: %w", err)
|
return nil, fmt.Errorf("unmarshal for codex transform: %w", err)
|
||||||
}
|
}
|
||||||
applyCodexOAuthTransform(reqBody, false)
|
applyCodexOAuthTransform(reqBody, false, false)
|
||||||
// OAuth codex transform forces stream=true upstream, so always use
|
// OAuth codex transform forces stream=true upstream, so always use
|
||||||
// the streaming response handler regardless of what the client asked.
|
// the streaming response handler regardless of what the client asked.
|
||||||
isStream = true
|
isStream = true
|
||||||
@@ -148,9 +148,9 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
|
|||||||
|
|
||||||
// 9. Handle normal response
|
// 9. Handle normal response
|
||||||
if isStream {
|
if isStream {
|
||||||
return s.handleAnthropicStreamingResponse(resp, c, originalModel, startTime)
|
return s.handleAnthropicStreamingResponse(resp, c, originalModel, mappedModel, startTime)
|
||||||
}
|
}
|
||||||
return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, startTime)
|
return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, mappedModel, startTime)
|
||||||
}
|
}
|
||||||
|
|
||||||
// handleAnthropicErrorResponse reads an upstream error and returns it in
|
// handleAnthropicErrorResponse reads an upstream error and returns it in
|
||||||
@@ -200,6 +200,7 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
|
|||||||
resp *http.Response,
|
resp *http.Response,
|
||||||
c *gin.Context,
|
c *gin.Context,
|
||||||
originalModel string,
|
originalModel string,
|
||||||
|
mappedModel string,
|
||||||
startTime time.Time,
|
startTime time.Time,
|
||||||
) (*OpenAIForwardResult, error) {
|
) (*OpenAIForwardResult, error) {
|
||||||
requestID := resp.Header.Get("x-request-id")
|
requestID := resp.Header.Get("x-request-id")
|
||||||
@@ -233,11 +234,12 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
|
|||||||
c.JSON(http.StatusOK, anthropicResp)
|
c.JSON(http.StatusOK, anthropicResp)
|
||||||
|
|
||||||
return &OpenAIForwardResult{
|
return &OpenAIForwardResult{
|
||||||
RequestID: requestID,
|
RequestID: requestID,
|
||||||
Usage: usage,
|
Usage: usage,
|
||||||
Model: originalModel,
|
Model: originalModel,
|
||||||
Stream: false,
|
BillingModel: mappedModel,
|
||||||
Duration: time.Since(startTime),
|
Stream: false,
|
||||||
|
Duration: time.Since(startTime),
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -247,6 +249,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
|
|||||||
resp *http.Response,
|
resp *http.Response,
|
||||||
c *gin.Context,
|
c *gin.Context,
|
||||||
originalModel string,
|
originalModel string,
|
||||||
|
mappedModel string,
|
||||||
startTime time.Time,
|
startTime time.Time,
|
||||||
) (*OpenAIForwardResult, error) {
|
) (*OpenAIForwardResult, error) {
|
||||||
requestID := resp.Header.Get("x-request-id")
|
requestID := resp.Header.Get("x-request-id")
|
||||||
@@ -293,7 +296,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Extract usage from completion events
|
// Extract usage from completion events
|
||||||
if (event.Type == "response.completed" || event.Type == "response.incomplete") &&
|
if (event.Type == "response.completed" || event.Type == "response.incomplete" || event.Type == "response.failed") &&
|
||||||
event.Response != nil && event.Response.Usage != nil {
|
event.Response != nil && event.Response.Usage != nil {
|
||||||
usage = OpenAIUsage{
|
usage = OpenAIUsage{
|
||||||
InputTokens: event.Response.Usage.InputTokens,
|
InputTokens: event.Response.Usage.InputTokens,
|
||||||
@@ -324,6 +327,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
|
|||||||
RequestID: requestID,
|
RequestID: requestID,
|
||||||
Usage: usage,
|
Usage: usage,
|
||||||
Model: originalModel,
|
Model: originalModel,
|
||||||
|
BillingModel: mappedModel,
|
||||||
Stream: true,
|
Stream: true,
|
||||||
Duration: time.Since(startTime),
|
Duration: time.Since(startTime),
|
||||||
FirstTokenMs: firstTokenMs,
|
FirstTokenMs: firstTokenMs,
|
||||||
@@ -360,6 +364,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
|
|||||||
RequestID: requestID,
|
RequestID: requestID,
|
||||||
Usage: usage,
|
Usage: usage,
|
||||||
Model: originalModel,
|
Model: originalModel,
|
||||||
|
BillingModel: mappedModel,
|
||||||
Stream: true,
|
Stream: true,
|
||||||
Duration: time.Since(startTime),
|
Duration: time.Since(startTime),
|
||||||
FirstTokenMs: firstTokenMs,
|
FirstTokenMs: firstTokenMs,
|
||||||
|
|||||||
@@ -207,7 +207,12 @@ type OpenAIUsage struct {
|
|||||||
type OpenAIForwardResult struct {
|
type OpenAIForwardResult struct {
|
||||||
RequestID string
|
RequestID string
|
||||||
Usage OpenAIUsage
|
Usage OpenAIUsage
|
||||||
Model string
|
Model string // 原始模型(用于响应和日志显示)
|
||||||
|
// BillingModel is the model used for cost calculation.
|
||||||
|
// When non-empty, CalculateCost uses this instead of Model.
|
||||||
|
// This is set by the Anthropic Messages conversion path where
|
||||||
|
// the mapped upstream model differs from the client-facing model.
|
||||||
|
BillingModel string
|
||||||
// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
|
// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
|
||||||
// Stored for usage records display; nil means not provided / not applicable.
|
// Stored for usage records display; nil means not provided / not applicable.
|
||||||
ReasoningEffort *string
|
ReasoningEffort *string
|
||||||
@@ -3610,7 +3615,11 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
|
|||||||
multiplier = resolver.Resolve(ctx, user.ID, *apiKey.GroupID, apiKey.Group.RateMultiplier)
|
multiplier = resolver.Resolve(ctx, user.ID, *apiKey.GroupID, apiKey.Group.RateMultiplier)
|
||||||
}
|
}
|
||||||
|
|
||||||
cost, err := s.billingService.CalculateCost(result.Model, tokens, multiplier)
|
billingModel := result.Model
|
||||||
|
if result.BillingModel != "" {
|
||||||
|
billingModel = result.BillingModel
|
||||||
|
}
|
||||||
|
cost, err := s.billingService.CalculateCost(billingModel, tokens, multiplier)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cost = &CostBreakdown{ActualCost: 0}
|
cost = &CostBreakdown{ActualCost: 0}
|
||||||
}
|
}
|
||||||
@@ -3630,7 +3639,7 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
|
|||||||
APIKeyID: apiKey.ID,
|
APIKeyID: apiKey.ID,
|
||||||
AccountID: account.ID,
|
AccountID: account.ID,
|
||||||
RequestID: result.RequestID,
|
RequestID: result.RequestID,
|
||||||
Model: result.Model,
|
Model: billingModel,
|
||||||
ReasoningEffort: result.ReasoningEffort,
|
ReasoningEffort: result.ReasoningEffort,
|
||||||
InputTokens: actualInputTokens,
|
InputTokens: actualInputTokens,
|
||||||
OutputTokens: result.Usage.OutputTokens,
|
OutputTokens: result.Usage.OutputTokens,
|
||||||
|
|||||||
Reference in New Issue
Block a user