diff --git a/backend/internal/handler/openai_gateway_handler.go b/backend/internal/handler/openai_gateway_handler.go
index 644b634d..751853d8 100644
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -118,6 +118,20 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
}
setOpsRequestContext(c, "", false, body)
+ sessionHashBody := body
+ if service.IsOpenAIResponsesCompactPathForTest(c) {
+ if compactSeed := strings.TrimSpace(gjson.GetBytes(body, "prompt_cache_key").String()); compactSeed != "" {
+ c.Set(service.OpenAICompactSessionSeedKeyForTest(), compactSeed)
+ }
+ normalizedCompactBody, normalizedCompact, compactErr := service.NormalizeOpenAICompactRequestBodyForTest(body)
+ if compactErr != nil {
+ h.errorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to normalize compact request body")
+ return
+ }
+ if normalizedCompact {
+ body = normalizedCompactBody
+ }
+ }
// 校验请求体 JSON 合法性
if !gjson.ValidBytes(body) {
@@ -193,7 +207,7 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
}
// Generate session hash (header first; fallback to prompt_cache_key)
- sessionHash := h.gatewayService.GenerateSessionHash(c, body)
+ sessionHash := h.gatewayService.GenerateSessionHash(c, sessionHashBody)
maxAccountSwitches := h.maxAccountSwitches
switchCount := 0
@@ -427,6 +441,262 @@ func (h *OpenAIGatewayHandler) logOpenAIRemoteCompactOutcome(c *gin.Context, sta
log.Warn("codex.remote_compact.failed")
}
+// Messages handles Anthropic Messages API requests routed to OpenAI platform.
+// POST /v1/messages (when group platform is OpenAI)
+func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
+ streamStarted := false
+ defer h.recoverAnthropicMessagesPanic(c, &streamStarted)
+
+ requestStart := time.Now()
+
+ apiKey, ok := middleware2.GetAPIKeyFromContext(c)
+ if !ok {
+ h.anthropicErrorResponse(c, http.StatusUnauthorized, "authentication_error", "Invalid API key")
+ return
+ }
+
+ subject, ok := middleware2.GetAuthSubjectFromContext(c)
+ if !ok {
+ h.anthropicErrorResponse(c, http.StatusInternalServerError, "api_error", "User context not found")
+ return
+ }
+ reqLog := requestLogger(
+ c,
+ "handler.openai_gateway.messages",
+ zap.Int64("user_id", subject.UserID),
+ zap.Int64("api_key_id", apiKey.ID),
+ zap.Any("group_id", apiKey.GroupID),
+ )
+ if !h.ensureResponsesDependencies(c, reqLog) {
+ return
+ }
+
+ body, err := pkghttputil.ReadRequestBodyWithPrealloc(c.Request)
+ if err != nil {
+ if maxErr, ok := extractMaxBytesError(err); ok {
+ h.anthropicErrorResponse(c, http.StatusRequestEntityTooLarge, "invalid_request_error", buildBodyTooLargeMessage(maxErr.Limit))
+ return
+ }
+ h.anthropicErrorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to read request body")
+ return
+ }
+ if len(body) == 0 {
+ h.anthropicErrorResponse(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
+ return
+ }
+
+ if !gjson.ValidBytes(body) {
+ h.anthropicErrorResponse(c, http.StatusBadRequest, "invalid_request_error", "Failed to parse request body")
+ return
+ }
+
+ modelResult := gjson.GetBytes(body, "model")
+ if !modelResult.Exists() || modelResult.Type != gjson.String || modelResult.String() == "" {
+ h.anthropicErrorResponse(c, http.StatusBadRequest, "invalid_request_error", "model is required")
+ return
+ }
+ reqModel := modelResult.String()
+ reqStream := gjson.GetBytes(body, "stream").Bool()
+
+ reqLog = reqLog.With(zap.String("model", reqModel), zap.Bool("stream", reqStream))
+
+ setOpsRequestContext(c, reqModel, reqStream, body)
+
+ subscription, _ := middleware2.GetSubscriptionFromContext(c)
+
+ service.SetOpsLatencyMs(c, service.OpsAuthLatencyMsKey, time.Since(requestStart).Milliseconds())
+ routingStart := time.Now()
+
+ userReleaseFunc, acquired := h.acquireResponsesUserSlot(c, subject.UserID, subject.Concurrency, reqStream, &streamStarted, reqLog)
+ if !acquired {
+ return
+ }
+ if userReleaseFunc != nil {
+ defer userReleaseFunc()
+ }
+
+ if err := h.billingCacheService.CheckBillingEligibility(c.Request.Context(), apiKey.User, apiKey, apiKey.Group, subscription); err != nil {
+ reqLog.Info("openai_messages.billing_eligibility_check_failed", zap.Error(err))
+ status, code, message := billingErrorDetails(err)
+ h.anthropicStreamingAwareError(c, status, code, message, streamStarted)
+ return
+ }
+
+ sessionHash := h.gatewayService.GenerateSessionHash(c, body)
+ promptCacheKey := h.gatewayService.ExtractSessionID(c, body)
+
+ maxAccountSwitches := h.maxAccountSwitches
+ switchCount := 0
+ failedAccountIDs := make(map[int64]struct{})
+ var lastFailoverErr *service.UpstreamFailoverError
+
+ for {
+ reqLog.Debug("openai_messages.account_selecting", zap.Int("excluded_account_count", len(failedAccountIDs)))
+ selection, scheduleDecision, err := h.gatewayService.SelectAccountWithScheduler(
+ c.Request.Context(),
+ apiKey.GroupID,
+ "", // no previous_response_id
+ sessionHash,
+ reqModel,
+ failedAccountIDs,
+ service.OpenAIUpstreamTransportAny,
+ )
+ if err != nil {
+ reqLog.Warn("openai_messages.account_select_failed",
+ zap.Error(err),
+ zap.Int("excluded_account_count", len(failedAccountIDs)),
+ )
+ if len(failedAccountIDs) == 0 {
+ h.anthropicStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
+ return
+ }
+ if lastFailoverErr != nil {
+ h.handleAnthropicFailoverExhausted(c, lastFailoverErr, streamStarted)
+ } else {
+ h.anthropicStreamingAwareError(c, http.StatusBadGateway, "api_error", "Upstream request failed", streamStarted)
+ }
+ return
+ }
+ if selection == nil || selection.Account == nil {
+ h.anthropicStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "No available accounts", streamStarted)
+ return
+ }
+ account := selection.Account
+ reqLog.Debug("openai_messages.account_selected", zap.Int64("account_id", account.ID), zap.String("account_name", account.Name))
+ _ = scheduleDecision
+ setOpsSelectedAccount(c, account.ID, account.Platform)
+
+ accountReleaseFunc, acquired := h.acquireResponsesAccountSlot(c, apiKey.GroupID, sessionHash, selection, reqStream, &streamStarted, reqLog)
+ if !acquired {
+ return
+ }
+
+ service.SetOpsLatencyMs(c, service.OpsRoutingLatencyMsKey, time.Since(routingStart).Milliseconds())
+ forwardStart := time.Now()
+
+ result, err := h.gatewayService.ForwardAsAnthropic(c.Request.Context(), c, account, body, promptCacheKey)
+
+ forwardDurationMs := time.Since(forwardStart).Milliseconds()
+ if accountReleaseFunc != nil {
+ accountReleaseFunc()
+ }
+ upstreamLatencyMs, _ := getContextInt64(c, service.OpsUpstreamLatencyMsKey)
+ responseLatencyMs := forwardDurationMs
+ if upstreamLatencyMs > 0 && forwardDurationMs > upstreamLatencyMs {
+ responseLatencyMs = forwardDurationMs - upstreamLatencyMs
+ }
+ service.SetOpsLatencyMs(c, service.OpsResponseLatencyMsKey, responseLatencyMs)
+ if err == nil && result != nil && result.FirstTokenMs != nil {
+ service.SetOpsLatencyMs(c, service.OpsTimeToFirstTokenMsKey, int64(*result.FirstTokenMs))
+ }
+ if err != nil {
+ var failoverErr *service.UpstreamFailoverError
+ if errors.As(err, &failoverErr) {
+ h.gatewayService.ReportOpenAIAccountScheduleResult(account.ID, false, nil)
+ h.gatewayService.RecordOpenAIAccountSwitch()
+ failedAccountIDs[account.ID] = struct{}{}
+ lastFailoverErr = failoverErr
+ if switchCount >= maxAccountSwitches {
+ h.handleAnthropicFailoverExhausted(c, failoverErr, streamStarted)
+ return
+ }
+ switchCount++
+ reqLog.Warn("openai_messages.upstream_failover_switching",
+ zap.Int64("account_id", account.ID),
+ zap.Int("upstream_status", failoverErr.StatusCode),
+ zap.Int("switch_count", switchCount),
+ zap.Int("max_switches", maxAccountSwitches),
+ )
+ continue
+ }
+ h.gatewayService.ReportOpenAIAccountScheduleResult(account.ID, false, nil)
+ wroteFallback := h.ensureAnthropicErrorResponse(c, streamStarted)
+ reqLog.Warn("openai_messages.forward_failed",
+ zap.Int64("account_id", account.ID),
+ zap.Bool("fallback_error_response_written", wroteFallback),
+ zap.Error(err),
+ )
+ return
+ }
+ if result != nil {
+ h.gatewayService.ReportOpenAIAccountScheduleResult(account.ID, true, result.FirstTokenMs)
+ } else {
+ h.gatewayService.ReportOpenAIAccountScheduleResult(account.ID, true, nil)
+ }
+
+ userAgent := c.GetHeader("User-Agent")
+ clientIP := ip.GetClientIP(c)
+
+ h.submitUsageRecordTask(func(ctx context.Context) {
+ if err := h.gatewayService.RecordUsage(ctx, &service.OpenAIRecordUsageInput{
+ Result: result,
+ APIKey: apiKey,
+ User: apiKey.User,
+ Account: account,
+ Subscription: subscription,
+ UserAgent: userAgent,
+ IPAddress: clientIP,
+ APIKeyService: h.apiKeyService,
+ }); err != nil {
+ logger.L().With(
+ zap.String("component", "handler.openai_gateway.messages"),
+ zap.Int64("user_id", subject.UserID),
+ zap.Int64("api_key_id", apiKey.ID),
+ zap.Any("group_id", apiKey.GroupID),
+ zap.String("model", reqModel),
+ zap.Int64("account_id", account.ID),
+ ).Error("openai_messages.record_usage_failed", zap.Error(err))
+ }
+ })
+ reqLog.Debug("openai_messages.request_completed",
+ zap.Int64("account_id", account.ID),
+ zap.Int("switch_count", switchCount),
+ )
+ return
+ }
+}
+
+// anthropicErrorResponse writes an error in Anthropic Messages API format.
+func (h *OpenAIGatewayHandler) anthropicErrorResponse(c *gin.Context, status int, errType, message string) {
+ c.JSON(status, gin.H{
+ "type": "error",
+ "error": gin.H{
+ "type": errType,
+ "message": message,
+ },
+ })
+}
+
+// anthropicStreamingAwareError handles errors that may occur during streaming,
+// using Anthropic SSE error format.
+func (h *OpenAIGatewayHandler) anthropicStreamingAwareError(c *gin.Context, status int, errType, message string, streamStarted bool) {
+ if streamStarted {
+ flusher, ok := c.Writer.(http.Flusher)
+ if ok {
+ errorEvent := "event: error\ndata: " + `{"type":"error","error":{"type":` + strconv.Quote(errType) + `,"message":` + strconv.Quote(message) + `}}` + "\n\n"
+ fmt.Fprint(c.Writer, errorEvent) //nolint:errcheck
+ flusher.Flush()
+ }
+ return
+ }
+ h.anthropicErrorResponse(c, status, errType, message)
+}
+
+// handleAnthropicFailoverExhausted maps upstream failover errors to Anthropic format.
+func (h *OpenAIGatewayHandler) handleAnthropicFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, streamStarted bool) {
+ status, errType, errMsg := h.mapUpstreamError(failoverErr.StatusCode)
+ h.anthropicStreamingAwareError(c, status, errType, errMsg, streamStarted)
+}
+
+// ensureAnthropicErrorResponse writes a fallback Anthropic error if no response was written.
+func (h *OpenAIGatewayHandler) ensureAnthropicErrorResponse(c *gin.Context, streamStarted bool) bool {
+ if c == nil || c.Writer == nil || c.Writer.Written() {
+ return false
+ }
+ h.anthropicStreamingAwareError(c, http.StatusBadGateway, "api_error", "Upstream request failed", streamStarted)
+ return true
+}
+
func (h *OpenAIGatewayHandler) validateFunctionCallOutputRequest(c *gin.Context, body []byte, reqLog *zap.Logger) bool {
if !gjson.GetBytes(body, `input.#(type=="function_call_output")`).Exists() {
return true
@@ -907,6 +1177,26 @@ func (h *OpenAIGatewayHandler) recoverResponsesPanic(c *gin.Context, streamStart
)
}
+// recoverAnthropicMessagesPanic recovers from panics in the Anthropic Messages
+// handler and returns an Anthropic-formatted error response.
+func (h *OpenAIGatewayHandler) recoverAnthropicMessagesPanic(c *gin.Context, streamStarted *bool) {
+ recovered := recover()
+ if recovered == nil {
+ return
+ }
+
+ started := streamStarted != nil && *streamStarted
+ requestLogger(c, "handler.openai_gateway.messages").Error(
+ "openai.messages_panic_recovered",
+ zap.Bool("stream_started", started),
+ zap.Any("panic", recovered),
+ zap.ByteString("stack", debug.Stack()),
+ )
+ if !started {
+ h.anthropicErrorResponse(c, http.StatusInternalServerError, "api_error", "Internal server error")
+ }
+}
+
func (h *OpenAIGatewayHandler) ensureResponsesDependencies(c *gin.Context, reqLog *zap.Logger) bool {
missing := h.missingResponsesDependencies()
if len(missing) == 0 {
diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go
new file mode 100644
index 00000000..20eeb969
--- /dev/null
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -0,0 +1,534 @@
+package apicompat
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// AnthropicToResponses tests
+// ---------------------------------------------------------------------------
+
+func TestAnthropicToResponses_BasicText(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Stream: true,
+ Messages: []AnthropicMessage{
+ {Role: "user", Content: json.RawMessage(`"Hello"`)},
+ },
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+ assert.Equal(t, "gpt-5.2", resp.Model)
+ assert.True(t, resp.Stream)
+ assert.Equal(t, 1024, *resp.MaxOutputTokens)
+ assert.False(t, *resp.Store)
+
+ var items []ResponsesInputItem
+ require.NoError(t, json.Unmarshal(resp.Input, &items))
+ require.Len(t, items, 1)
+ assert.Equal(t, "user", items[0].Role)
+}
+
+func TestAnthropicToResponses_SystemPrompt(t *testing.T) {
+ t.Run("string", func(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 100,
+ System: json.RawMessage(`"You are helpful."`),
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hi"`)}},
+ }
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+
+ var items []ResponsesInputItem
+ require.NoError(t, json.Unmarshal(resp.Input, &items))
+ require.Len(t, items, 2)
+ assert.Equal(t, "system", items[0].Role)
+ })
+
+ t.Run("array", func(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 100,
+ System: json.RawMessage(`[{"type":"text","text":"Part 1"},{"type":"text","text":"Part 2"}]`),
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hi"`)}},
+ }
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+
+ var items []ResponsesInputItem
+ require.NoError(t, json.Unmarshal(resp.Input, &items))
+ require.Len(t, items, 2)
+ assert.Equal(t, "system", items[0].Role)
+ // System text should be joined with double newline.
+ var text string
+ require.NoError(t, json.Unmarshal(items[0].Content, &text))
+ assert.Equal(t, "Part 1\n\nPart 2", text)
+ })
+}
+
+func TestAnthropicToResponses_ToolUse(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Messages: []AnthropicMessage{
+ {Role: "user", Content: json.RawMessage(`"What is the weather?"`)},
+ {Role: "assistant", Content: json.RawMessage(`[{"type":"text","text":"Let me check."},{"type":"tool_use","id":"call_1","name":"get_weather","input":{"city":"NYC"}}]`)},
+ {Role: "user", Content: json.RawMessage(`[{"type":"tool_result","tool_use_id":"call_1","content":"Sunny, 72°F"}]`)},
+ },
+ Tools: []AnthropicTool{
+ {Name: "get_weather", Description: "Get weather", InputSchema: json.RawMessage(`{"type":"object","properties":{"city":{"type":"string"}}}`)},
+ },
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+
+ // Check tools
+ require.Len(t, resp.Tools, 1)
+ assert.Equal(t, "function", resp.Tools[0].Type)
+ assert.Equal(t, "get_weather", resp.Tools[0].Name)
+
+ // Check input items
+ var items []ResponsesInputItem
+ require.NoError(t, json.Unmarshal(resp.Input, &items))
+ // user + assistant + function_call + function_call_output = 4
+ require.Len(t, items, 4)
+
+ assert.Equal(t, "user", items[0].Role)
+ assert.Equal(t, "assistant", items[1].Role)
+ assert.Equal(t, "function_call", items[2].Type)
+ assert.Equal(t, "fc_call_1", items[2].CallID)
+ assert.Equal(t, "function_call_output", items[3].Type)
+ assert.Equal(t, "fc_call_1", items[3].CallID)
+ assert.Equal(t, "Sunny, 72°F", items[3].Output)
+}
+
+func TestAnthropicToResponses_ThinkingIgnored(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Messages: []AnthropicMessage{
+ {Role: "user", Content: json.RawMessage(`"Hello"`)},
+ {Role: "assistant", Content: json.RawMessage(`[{"type":"thinking","thinking":"deep thought"},{"type":"text","text":"Hi!"}]`)},
+ {Role: "user", Content: json.RawMessage(`"More"`)},
+ },
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+
+ var items []ResponsesInputItem
+ require.NoError(t, json.Unmarshal(resp.Input, &items))
+ // user + assistant(text only, thinking ignored) + user = 3
+ require.Len(t, items, 3)
+ assert.Equal(t, "assistant", items[1].Role)
+ // Assistant content should only have text, not thinking.
+ var parts []ResponsesContentPart
+ require.NoError(t, json.Unmarshal(items[1].Content, &parts))
+ require.Len(t, parts, 1)
+ assert.Equal(t, "output_text", parts[0].Type)
+ assert.Equal(t, "Hi!", parts[0].Text)
+}
+
+func TestAnthropicToResponses_MaxTokensFloor(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 10, // below minMaxOutputTokens (128)
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hi"`)}},
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+ assert.Equal(t, 128, *resp.MaxOutputTokens)
+}
+
+// ---------------------------------------------------------------------------
+// ResponsesToAnthropic (non-streaming) tests
+// ---------------------------------------------------------------------------
+
+func TestResponsesToAnthropic_TextOnly(t *testing.T) {
+ resp := &ResponsesResponse{
+ ID: "resp_123",
+ Model: "gpt-5.2",
+ Status: "completed",
+ Output: []ResponsesOutput{
+ {
+ Type: "message",
+ Content: []ResponsesContentPart{
+ {Type: "output_text", Text: "Hello there!"},
+ },
+ },
+ },
+ Usage: &ResponsesUsage{InputTokens: 10, OutputTokens: 5, TotalTokens: 15},
+ }
+
+ anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
+ assert.Equal(t, "resp_123", anth.ID)
+ assert.Equal(t, "claude-opus-4-6", anth.Model)
+ assert.Equal(t, "end_turn", anth.StopReason)
+ require.Len(t, anth.Content, 1)
+ assert.Equal(t, "text", anth.Content[0].Type)
+ assert.Equal(t, "Hello there!", anth.Content[0].Text)
+ assert.Equal(t, 10, anth.Usage.InputTokens)
+ assert.Equal(t, 5, anth.Usage.OutputTokens)
+}
+
+func TestResponsesToAnthropic_ToolUse(t *testing.T) {
+ resp := &ResponsesResponse{
+ ID: "resp_456",
+ Model: "gpt-5.2",
+ Status: "completed",
+ Output: []ResponsesOutput{
+ {
+ Type: "message",
+ Content: []ResponsesContentPart{
+ {Type: "output_text", Text: "Let me check."},
+ },
+ },
+ {
+ Type: "function_call",
+ CallID: "call_1",
+ Name: "get_weather",
+ Arguments: `{"city":"NYC"}`,
+ },
+ },
+ }
+
+ anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
+ assert.Equal(t, "tool_use", anth.StopReason)
+ require.Len(t, anth.Content, 2)
+ assert.Equal(t, "text", anth.Content[0].Type)
+ assert.Equal(t, "tool_use", anth.Content[1].Type)
+ assert.Equal(t, "call_1", anth.Content[1].ID)
+ assert.Equal(t, "get_weather", anth.Content[1].Name)
+}
+
+func TestResponsesToAnthropic_Reasoning(t *testing.T) {
+ resp := &ResponsesResponse{
+ ID: "resp_789",
+ Model: "gpt-5.2",
+ Status: "completed",
+ Output: []ResponsesOutput{
+ {
+ Type: "reasoning",
+ Summary: []ResponsesSummary{
+ {Type: "summary_text", Text: "Thinking about the answer..."},
+ },
+ },
+ {
+ Type: "message",
+ Content: []ResponsesContentPart{
+ {Type: "output_text", Text: "42"},
+ },
+ },
+ },
+ }
+
+ anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
+ require.Len(t, anth.Content, 2)
+ assert.Equal(t, "thinking", anth.Content[0].Type)
+ assert.Equal(t, "Thinking about the answer...", anth.Content[0].Thinking)
+ assert.Equal(t, "text", anth.Content[1].Type)
+ assert.Equal(t, "42", anth.Content[1].Text)
+}
+
+func TestResponsesToAnthropic_Incomplete(t *testing.T) {
+ resp := &ResponsesResponse{
+ ID: "resp_inc",
+ Model: "gpt-5.2",
+ Status: "incomplete",
+ IncompleteDetails: &ResponsesIncompleteDetails{
+ Reason: "max_output_tokens",
+ },
+ Output: []ResponsesOutput{
+ {
+ Type: "message",
+ Content: []ResponsesContentPart{{Type: "output_text", Text: "Partial..."}},
+ },
+ },
+ }
+
+ anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
+ assert.Equal(t, "max_tokens", anth.StopReason)
+}
+
+func TestResponsesToAnthropic_EmptyOutput(t *testing.T) {
+ resp := &ResponsesResponse{
+ ID: "resp_empty",
+ Model: "gpt-5.2",
+ Status: "completed",
+ Output: []ResponsesOutput{},
+ }
+
+ anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
+ require.Len(t, anth.Content, 1)
+ assert.Equal(t, "text", anth.Content[0].Type)
+ assert.Equal(t, "", anth.Content[0].Text)
+}
+
+// ---------------------------------------------------------------------------
+// Streaming: ResponsesEventToAnthropicEvents tests
+// ---------------------------------------------------------------------------
+
+func TestStreamingTextOnly(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+
+ // 1. response.created
+ events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.created",
+ Response: &ResponsesResponse{
+ ID: "resp_1",
+ Model: "gpt-5.2",
+ },
+ }, state)
+ require.Len(t, events, 1)
+ assert.Equal(t, "message_start", events[0].Type)
+
+ // 2. output_item.added (message)
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.output_item.added",
+ OutputIndex: 0,
+ Item: &ResponsesOutput{Type: "message"},
+ }, state)
+ assert.Len(t, events, 0) // message item doesn't emit events
+
+ // 3. text delta
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.output_text.delta",
+ Delta: "Hello",
+ }, state)
+ require.Len(t, events, 2) // content_block_start + content_block_delta
+ assert.Equal(t, "content_block_start", events[0].Type)
+ assert.Equal(t, "text", events[0].ContentBlock.Type)
+ assert.Equal(t, "content_block_delta", events[1].Type)
+ assert.Equal(t, "text_delta", events[1].Delta.Type)
+ assert.Equal(t, "Hello", events[1].Delta.Text)
+
+ // 4. more text
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.output_text.delta",
+ Delta: " world",
+ }, state)
+ require.Len(t, events, 1) // only delta, no new block start
+ assert.Equal(t, "content_block_delta", events[0].Type)
+
+ // 5. text done
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.output_text.done",
+ }, state)
+ require.Len(t, events, 1)
+ assert.Equal(t, "content_block_stop", events[0].Type)
+
+ // 6. completed
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.completed",
+ Response: &ResponsesResponse{
+ Status: "completed",
+ Usage: &ResponsesUsage{InputTokens: 10, OutputTokens: 5},
+ },
+ }, state)
+ require.Len(t, events, 2) // message_delta + message_stop
+ assert.Equal(t, "message_delta", events[0].Type)
+ assert.Equal(t, "end_turn", events[0].Delta.StopReason)
+ assert.Equal(t, 10, events[0].Usage.InputTokens)
+ assert.Equal(t, 5, events[0].Usage.OutputTokens)
+ assert.Equal(t, "message_stop", events[1].Type)
+}
+
+func TestStreamingToolCall(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+
+ // 1. response.created
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.created",
+ Response: &ResponsesResponse{ID: "resp_2", Model: "gpt-5.2"},
+ }, state)
+
+ // 2. function_call added
+ events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.output_item.added",
+ OutputIndex: 0,
+ Item: &ResponsesOutput{Type: "function_call", CallID: "call_1", Name: "get_weather"},
+ }, state)
+ require.Len(t, events, 1)
+ assert.Equal(t, "content_block_start", events[0].Type)
+ assert.Equal(t, "tool_use", events[0].ContentBlock.Type)
+ assert.Equal(t, "call_1", events[0].ContentBlock.ID)
+
+ // 3. arguments delta
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.function_call_arguments.delta",
+ OutputIndex: 0,
+ Delta: `{"city":`,
+ }, state)
+ require.Len(t, events, 1)
+ assert.Equal(t, "content_block_delta", events[0].Type)
+ assert.Equal(t, "input_json_delta", events[0].Delta.Type)
+ assert.Equal(t, `{"city":`, events[0].Delta.PartialJSON)
+
+ // 4. arguments done
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.function_call_arguments.done",
+ }, state)
+ require.Len(t, events, 1)
+ assert.Equal(t, "content_block_stop", events[0].Type)
+
+ // 5. completed with tool_calls
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.completed",
+ Response: &ResponsesResponse{
+ Status: "completed",
+ Usage: &ResponsesUsage{InputTokens: 20, OutputTokens: 10},
+ },
+ }, state)
+ require.Len(t, events, 2)
+ assert.Equal(t, "tool_use", events[0].Delta.StopReason)
+}
+
+func TestStreamingReasoning(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.created",
+ Response: &ResponsesResponse{ID: "resp_3", Model: "gpt-5.2"},
+ }, state)
+
+ // reasoning item added
+ events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.output_item.added",
+ OutputIndex: 0,
+ Item: &ResponsesOutput{Type: "reasoning"},
+ }, state)
+ require.Len(t, events, 1)
+ assert.Equal(t, "content_block_start", events[0].Type)
+ assert.Equal(t, "thinking", events[0].ContentBlock.Type)
+
+ // reasoning text delta
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.reasoning_summary_text.delta",
+ OutputIndex: 0,
+ Delta: "Let me think...",
+ }, state)
+ require.Len(t, events, 1)
+ assert.Equal(t, "content_block_delta", events[0].Type)
+ assert.Equal(t, "thinking_delta", events[0].Delta.Type)
+ assert.Equal(t, "Let me think...", events[0].Delta.Thinking)
+
+ // reasoning done
+ events = ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.reasoning_summary_text.done",
+ }, state)
+ require.Len(t, events, 1)
+ assert.Equal(t, "content_block_stop", events[0].Type)
+}
+
+func TestStreamingIncomplete(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.created",
+ Response: &ResponsesResponse{ID: "resp_4", Model: "gpt-5.2"},
+ }, state)
+
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.output_text.delta",
+ Delta: "Partial output...",
+ }, state)
+
+ events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.incomplete",
+ Response: &ResponsesResponse{
+ Status: "incomplete",
+ IncompleteDetails: &ResponsesIncompleteDetails{Reason: "max_output_tokens"},
+ Usage: &ResponsesUsage{InputTokens: 100, OutputTokens: 4096},
+ },
+ }, state)
+
+ // Should close the text block + message_delta + message_stop
+ require.Len(t, events, 3)
+ assert.Equal(t, "content_block_stop", events[0].Type)
+ assert.Equal(t, "message_delta", events[1].Type)
+ assert.Equal(t, "max_tokens", events[1].Delta.StopReason)
+ assert.Equal(t, "message_stop", events[2].Type)
+}
+
+func TestFinalizeStream_NeverStarted(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+ events := FinalizeResponsesAnthropicStream(state)
+ assert.Nil(t, events)
+}
+
+func TestFinalizeStream_AlreadyCompleted(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+ state.MessageStartSent = true
+ state.MessageStopSent = true
+ events := FinalizeResponsesAnthropicStream(state)
+ assert.Nil(t, events)
+}
+
+func TestFinalizeStream_AbnormalTermination(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+
+ // Simulate a stream that started but never completed
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.created",
+ Response: &ResponsesResponse{ID: "resp_5", Model: "gpt-5.2"},
+ }, state)
+
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.output_text.delta",
+ Delta: "Interrupted...",
+ }, state)
+
+ // Stream ends without response.completed
+ events := FinalizeResponsesAnthropicStream(state)
+ require.Len(t, events, 3) // content_block_stop + message_delta + message_stop
+ assert.Equal(t, "content_block_stop", events[0].Type)
+ assert.Equal(t, "message_delta", events[1].Type)
+ assert.Equal(t, "end_turn", events[1].Delta.StopReason)
+ assert.Equal(t, "message_stop", events[2].Type)
+}
+
+func TestStreamingEmptyResponse(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.created",
+ Response: &ResponsesResponse{ID: "resp_6", Model: "gpt-5.2"},
+ }, state)
+
+ events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.completed",
+ Response: &ResponsesResponse{
+ Status: "completed",
+ Usage: &ResponsesUsage{InputTokens: 5, OutputTokens: 0},
+ },
+ }, state)
+
+ require.Len(t, events, 2) // message_delta + message_stop
+ assert.Equal(t, "message_delta", events[0].Type)
+ assert.Equal(t, "end_turn", events[0].Delta.StopReason)
+}
+
+func TestResponsesAnthropicEventToSSE(t *testing.T) {
+ evt := AnthropicStreamEvent{
+ Type: "message_start",
+ Message: &AnthropicResponse{
+ ID: "resp_1",
+ Type: "message",
+ Role: "assistant",
+ },
+ }
+ sse, err := ResponsesAnthropicEventToSSE(evt)
+ require.NoError(t, err)
+ assert.Contains(t, sse, "event: message_start\n")
+ assert.Contains(t, sse, "data: ")
+ assert.Contains(t, sse, `"resp_1"`)
+}
diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go
new file mode 100644
index 00000000..1a13658d
--- /dev/null
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -0,0 +1,283 @@
+package apicompat
+
+import (
+ "encoding/json"
+ "strings"
+)
+
+// AnthropicToResponses converts an Anthropic Messages request directly into
+// a Responses API request. This preserves fields that would be lost in a
+// Chat Completions intermediary round-trip (e.g. thinking, cache_control,
+// structured system prompts).
+func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) {
+ input, err := convertAnthropicToResponsesInput(req.System, req.Messages)
+ if err != nil {
+ return nil, err
+ }
+
+ inputJSON, err := json.Marshal(input)
+ if err != nil {
+ return nil, err
+ }
+
+ out := &ResponsesRequest{
+ Model: req.Model,
+ Input: inputJSON,
+ Temperature: req.Temperature,
+ TopP: req.TopP,
+ Stream: req.Stream,
+ Include: []string{"reasoning.encrypted_content"},
+ }
+
+ storeFalse := false
+ out.Store = &storeFalse
+
+ if req.MaxTokens > 0 {
+ v := req.MaxTokens
+ if v < minMaxOutputTokens {
+ v = minMaxOutputTokens
+ }
+ out.MaxOutputTokens = &v
+ }
+
+ if len(req.Tools) > 0 {
+ out.Tools = convertAnthropicToolsToResponses(req.Tools)
+ }
+
+ return out, nil
+}
+
+// convertAnthropicToResponsesInput builds the Responses API input items array
+// from the Anthropic system field and message list.
+func convertAnthropicToResponsesInput(system json.RawMessage, msgs []AnthropicMessage) ([]ResponsesInputItem, error) {
+ var out []ResponsesInputItem
+
+ // System prompt → system role input item.
+ if len(system) > 0 {
+ sysText, err := parseAnthropicSystemPrompt(system)
+ if err != nil {
+ return nil, err
+ }
+ if sysText != "" {
+ content, _ := json.Marshal(sysText)
+ out = append(out, ResponsesInputItem{
+ Role: "system",
+ Content: content,
+ })
+ }
+ }
+
+ for _, m := range msgs {
+ items, err := anthropicMsgToResponsesItems(m)
+ if err != nil {
+ return nil, err
+ }
+ out = append(out, items...)
+ }
+ return out, nil
+}
+
+// parseAnthropicSystemPrompt handles the Anthropic system field which can be
+// a plain string or an array of text blocks.
+func parseAnthropicSystemPrompt(raw json.RawMessage) (string, error) {
+ var s string
+ if err := json.Unmarshal(raw, &s); err == nil {
+ return s, nil
+ }
+ var blocks []AnthropicContentBlock
+ if err := json.Unmarshal(raw, &blocks); err != nil {
+ return "", err
+ }
+ var parts []string
+ for _, b := range blocks {
+ if b.Type == "text" && b.Text != "" {
+ parts = append(parts, b.Text)
+ }
+ }
+ return strings.Join(parts, "\n\n"), nil
+}
+
+// anthropicMsgToResponsesItems converts a single Anthropic message into one
+// or more Responses API input items.
+func anthropicMsgToResponsesItems(m AnthropicMessage) ([]ResponsesInputItem, error) {
+ switch m.Role {
+ case "user":
+ return anthropicUserToResponses(m.Content)
+ case "assistant":
+ return anthropicAssistantToResponses(m.Content)
+ default:
+ return anthropicUserToResponses(m.Content)
+ }
+}
+
+// anthropicUserToResponses handles an Anthropic user message. Content can be a
+// plain string or an array of blocks. tool_result blocks are extracted into
+// function_call_output items.
+func anthropicUserToResponses(raw json.RawMessage) ([]ResponsesInputItem, error) {
+ // Try plain string.
+ var s string
+ if err := json.Unmarshal(raw, &s); err == nil {
+ content, _ := json.Marshal(s)
+ return []ResponsesInputItem{{Role: "user", Content: content}}, nil
+ }
+
+ var blocks []AnthropicContentBlock
+ if err := json.Unmarshal(raw, &blocks); err != nil {
+ return nil, err
+ }
+
+ var out []ResponsesInputItem
+
+ // Extract tool_result blocks → function_call_output items.
+ for _, b := range blocks {
+ if b.Type != "tool_result" {
+ continue
+ }
+ text := extractAnthropicToolResultText(b)
+ if text == "" {
+ // OpenAI Responses API requires "output" field; use placeholder for empty results.
+ text = "(empty)"
+ }
+ out = append(out, ResponsesInputItem{
+ Type: "function_call_output",
+ CallID: toResponsesCallID(b.ToolUseID),
+ Output: text,
+ })
+ }
+
+ // Remaining text blocks → user message.
+ text := extractAnthropicTextFromBlocks(blocks)
+ if text != "" {
+ content, _ := json.Marshal(text)
+ out = append(out, ResponsesInputItem{Role: "user", Content: content})
+ }
+
+ return out, nil
+}
+
+// anthropicAssistantToResponses handles an Anthropic assistant message.
+// Text content → assistant message with output_text parts.
+// tool_use blocks → function_call items.
+// thinking blocks → ignored (OpenAI doesn't accept them as input).
+func anthropicAssistantToResponses(raw json.RawMessage) ([]ResponsesInputItem, error) {
+ // Try plain string.
+ var s string
+ if err := json.Unmarshal(raw, &s); err == nil {
+ parts := []ResponsesContentPart{{Type: "output_text", Text: s}}
+ partsJSON, err := json.Marshal(parts)
+ if err != nil {
+ return nil, err
+ }
+ return []ResponsesInputItem{{Role: "assistant", Content: partsJSON}}, nil
+ }
+
+ var blocks []AnthropicContentBlock
+ if err := json.Unmarshal(raw, &blocks); err != nil {
+ return nil, err
+ }
+
+ var items []ResponsesInputItem
+
+ // Text content → assistant message with output_text content parts.
+ text := extractAnthropicTextFromBlocks(blocks)
+ if text != "" {
+ parts := []ResponsesContentPart{{Type: "output_text", Text: text}}
+ partsJSON, err := json.Marshal(parts)
+ if err != nil {
+ return nil, err
+ }
+ items = append(items, ResponsesInputItem{Role: "assistant", Content: partsJSON})
+ }
+
+ // tool_use → function_call items.
+ for _, b := range blocks {
+ if b.Type != "tool_use" {
+ continue
+ }
+ args := "{}"
+ if len(b.Input) > 0 {
+ args = string(b.Input)
+ }
+ fcID := toResponsesCallID(b.ID)
+ items = append(items, ResponsesInputItem{
+ Type: "function_call",
+ CallID: fcID,
+ Name: b.Name,
+ Arguments: args,
+ ID: fcID,
+ })
+ }
+
+ return items, nil
+}
+
+// toResponsesCallID converts an Anthropic tool ID (toolu_xxx / call_xxx) to a
+// Responses API function_call ID that starts with "fc_".
+func toResponsesCallID(id string) string {
+ if strings.HasPrefix(id, "fc_") {
+ return id
+ }
+ return "fc_" + id
+}
+
+// fromResponsesCallID reverses toResponsesCallID, stripping the "fc_" prefix
+// that was added during request conversion.
+func fromResponsesCallID(id string) string {
+ if after, ok := strings.CutPrefix(id, "fc_"); ok {
+ // Only strip if the remainder doesn't look like it was already "fc_" prefixed.
+ // E.g. "fc_toolu_xxx" → "toolu_xxx", "fc_call_xxx" → "call_xxx"
+ if strings.HasPrefix(after, "toolu_") || strings.HasPrefix(after, "call_") {
+ return after
+ }
+ }
+ return id
+}
+
+// extractAnthropicToolResultText gets the text content from a tool_result block.
+func extractAnthropicToolResultText(b AnthropicContentBlock) string {
+ if len(b.Content) == 0 {
+ return ""
+ }
+ var s string
+ if err := json.Unmarshal(b.Content, &s); err == nil {
+ return s
+ }
+ var inner []AnthropicContentBlock
+ if err := json.Unmarshal(b.Content, &inner); err == nil {
+ var parts []string
+ for _, ib := range inner {
+ if ib.Type == "text" && ib.Text != "" {
+ parts = append(parts, ib.Text)
+ }
+ }
+ return strings.Join(parts, "\n\n")
+ }
+ return ""
+}
+
+// extractAnthropicTextFromBlocks joins all text blocks, ignoring thinking/
+// tool_use/tool_result blocks.
+func extractAnthropicTextFromBlocks(blocks []AnthropicContentBlock) string {
+ var parts []string
+ for _, b := range blocks {
+ if b.Type == "text" && b.Text != "" {
+ parts = append(parts, b.Text)
+ }
+ }
+ return strings.Join(parts, "\n\n")
+}
+
+// convertAnthropicToolsToResponses maps Anthropic tool definitions to
+// Responses API function tools (input_schema → parameters).
+func convertAnthropicToolsToResponses(tools []AnthropicTool) []ResponsesTool {
+ out := make([]ResponsesTool, len(tools))
+ for i, t := range tools {
+ out[i] = ResponsesTool{
+ Type: "function",
+ Name: t.Name,
+ Description: t.Description,
+ Parameters: t.InputSchema,
+ }
+ }
+ return out
+}
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic.go b/backend/internal/pkg/apicompat/responses_to_anthropic.go
new file mode 100644
index 00000000..16770650
--- /dev/null
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic.go
@@ -0,0 +1,436 @@
+package apicompat
+
+import (
+ "encoding/json"
+ "fmt"
+ "time"
+)
+
+// ---------------------------------------------------------------------------
+// Non-streaming: ResponsesResponse → AnthropicResponse
+// ---------------------------------------------------------------------------
+
+// ResponsesToAnthropic converts a Responses API response directly into an
+// Anthropic Messages response. Reasoning output items are mapped to thinking
+// blocks; function_call items become tool_use blocks.
+func ResponsesToAnthropic(resp *ResponsesResponse, model string) *AnthropicResponse {
+ out := &AnthropicResponse{
+ ID: resp.ID,
+ Type: "message",
+ Role: "assistant",
+ Model: model,
+ }
+
+ var blocks []AnthropicContentBlock
+
+ for _, item := range resp.Output {
+ switch item.Type {
+ case "reasoning":
+ summaryText := ""
+ for _, s := range item.Summary {
+ if s.Type == "summary_text" && s.Text != "" {
+ summaryText += s.Text
+ }
+ }
+ if summaryText != "" {
+ blocks = append(blocks, AnthropicContentBlock{
+ Type: "thinking",
+ Thinking: summaryText,
+ })
+ }
+ case "message":
+ for _, part := range item.Content {
+ if part.Type == "output_text" && part.Text != "" {
+ blocks = append(blocks, AnthropicContentBlock{
+ Type: "text",
+ Text: part.Text,
+ })
+ }
+ }
+ case "function_call":
+ blocks = append(blocks, AnthropicContentBlock{
+ Type: "tool_use",
+ ID: fromResponsesCallID(item.CallID),
+ Name: item.Name,
+ Input: json.RawMessage(item.Arguments),
+ })
+ }
+ }
+
+ if len(blocks) == 0 {
+ blocks = append(blocks, AnthropicContentBlock{Type: "text", Text: ""})
+ }
+ out.Content = blocks
+
+ out.StopReason = responsesStatusToAnthropicStopReason(resp.Status, resp.IncompleteDetails, blocks)
+
+ if resp.Usage != nil {
+ out.Usage = AnthropicUsage{
+ InputTokens: resp.Usage.InputTokens,
+ OutputTokens: resp.Usage.OutputTokens,
+ }
+ if resp.Usage.InputTokensDetails != nil {
+ out.Usage.CacheReadInputTokens = resp.Usage.InputTokensDetails.CachedTokens
+ }
+ }
+
+ return out
+}
+
+func responsesStatusToAnthropicStopReason(status string, details *ResponsesIncompleteDetails, blocks []AnthropicContentBlock) string {
+ switch status {
+ case "incomplete":
+ if details != nil && details.Reason == "max_output_tokens" {
+ return "max_tokens"
+ }
+ return "end_turn"
+ case "completed":
+ if len(blocks) > 0 && blocks[len(blocks)-1].Type == "tool_use" {
+ return "tool_use"
+ }
+ return "end_turn"
+ default:
+ return "end_turn"
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Streaming: ResponsesStreamEvent → []AnthropicStreamEvent (stateful converter)
+// ---------------------------------------------------------------------------
+
+// ResponsesEventToAnthropicState tracks state for converting a sequence of
+// Responses SSE events directly into Anthropic SSE events.
+type ResponsesEventToAnthropicState struct {
+ MessageStartSent bool
+ MessageStopSent bool
+
+ ContentBlockIndex int
+ ContentBlockOpen bool
+ CurrentBlockType string // "text" | "thinking" | "tool_use"
+
+ // OutputIndexToBlockIdx maps Responses output_index → Anthropic content block index.
+ OutputIndexToBlockIdx map[int]int
+
+ InputTokens int
+ OutputTokens int
+ CacheReadInputTokens int
+
+ ResponseID string
+ Model string
+ Created int64
+}
+
+// NewResponsesEventToAnthropicState returns an initialised stream state.
+func NewResponsesEventToAnthropicState() *ResponsesEventToAnthropicState {
+ return &ResponsesEventToAnthropicState{
+ OutputIndexToBlockIdx: make(map[int]int),
+ Created: time.Now().Unix(),
+ }
+}
+
+// ResponsesEventToAnthropicEvents converts a single Responses SSE event into
+// zero or more Anthropic SSE events, updating state as it goes.
+func ResponsesEventToAnthropicEvents(
+ evt *ResponsesStreamEvent,
+ state *ResponsesEventToAnthropicState,
+) []AnthropicStreamEvent {
+ switch evt.Type {
+ case "response.created":
+ return resToAnthHandleCreated(evt, state)
+ case "response.output_item.added":
+ return resToAnthHandleOutputItemAdded(evt, state)
+ case "response.output_text.delta":
+ return resToAnthHandleTextDelta(evt, state)
+ case "response.output_text.done":
+ return resToAnthHandleBlockDone(state)
+ case "response.function_call_arguments.delta":
+ return resToAnthHandleFuncArgsDelta(evt, state)
+ case "response.function_call_arguments.done":
+ return resToAnthHandleBlockDone(state)
+ case "response.output_item.done":
+ return resToAnthHandleOutputItemDone(evt, state)
+ case "response.reasoning_summary_text.delta":
+ return resToAnthHandleReasoningDelta(evt, state)
+ case "response.reasoning_summary_text.done":
+ return resToAnthHandleBlockDone(state)
+ case "response.completed", "response.incomplete":
+ return resToAnthHandleCompleted(evt, state)
+ default:
+ return nil
+ }
+}
+
+// FinalizeResponsesAnthropicStream emits synthetic termination events if the
+// stream ended without a proper completion event.
+func FinalizeResponsesAnthropicStream(state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if !state.MessageStartSent || state.MessageStopSent {
+ return nil
+ }
+
+ var events []AnthropicStreamEvent
+ events = append(events, closeCurrentBlock(state)...)
+
+ events = append(events,
+ AnthropicStreamEvent{
+ Type: "message_delta",
+ Delta: &AnthropicDelta{
+ StopReason: "end_turn",
+ },
+ Usage: &AnthropicUsage{
+ InputTokens: state.InputTokens,
+ OutputTokens: state.OutputTokens,
+ CacheReadInputTokens: state.CacheReadInputTokens,
+ },
+ },
+ AnthropicStreamEvent{Type: "message_stop"},
+ )
+ state.MessageStopSent = true
+ return events
+}
+
+// ResponsesAnthropicEventToSSE formats an AnthropicStreamEvent as an SSE line pair.
+func ResponsesAnthropicEventToSSE(evt AnthropicStreamEvent) (string, error) {
+ data, err := json.Marshal(evt)
+ if err != nil {
+ return "", err
+ }
+ return fmt.Sprintf("event: %s\ndata: %s\n\n", evt.Type, data), nil
+}
+
+// --- internal handlers ---
+
+func resToAnthHandleCreated(evt *ResponsesStreamEvent, state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if evt.Response != nil {
+ state.ResponseID = evt.Response.ID
+ // Only use upstream model if no override was set (e.g. originalModel)
+ if state.Model == "" {
+ state.Model = evt.Response.Model
+ }
+ }
+
+ if state.MessageStartSent {
+ return nil
+ }
+ state.MessageStartSent = true
+
+ return []AnthropicStreamEvent{{
+ Type: "message_start",
+ Message: &AnthropicResponse{
+ ID: state.ResponseID,
+ Type: "message",
+ Role: "assistant",
+ Content: []AnthropicContentBlock{},
+ Model: state.Model,
+ Usage: AnthropicUsage{
+ InputTokens: 0,
+ OutputTokens: 0,
+ },
+ },
+ }}
+}
+
+func resToAnthHandleOutputItemAdded(evt *ResponsesStreamEvent, state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if evt.Item == nil {
+ return nil
+ }
+
+ switch evt.Item.Type {
+ case "function_call":
+ var events []AnthropicStreamEvent
+ events = append(events, closeCurrentBlock(state)...)
+
+ idx := state.ContentBlockIndex
+ state.OutputIndexToBlockIdx[evt.OutputIndex] = idx
+ state.ContentBlockOpen = true
+ state.CurrentBlockType = "tool_use"
+
+ events = append(events, AnthropicStreamEvent{
+ Type: "content_block_start",
+ Index: &idx,
+ ContentBlock: &AnthropicContentBlock{
+ Type: "tool_use",
+ ID: fromResponsesCallID(evt.Item.CallID),
+ Name: evt.Item.Name,
+ Input: json.RawMessage("{}"),
+ },
+ })
+ return events
+
+ case "reasoning":
+ var events []AnthropicStreamEvent
+ events = append(events, closeCurrentBlock(state)...)
+
+ idx := state.ContentBlockIndex
+ state.OutputIndexToBlockIdx[evt.OutputIndex] = idx
+ state.ContentBlockOpen = true
+ state.CurrentBlockType = "thinking"
+
+ events = append(events, AnthropicStreamEvent{
+ Type: "content_block_start",
+ Index: &idx,
+ ContentBlock: &AnthropicContentBlock{
+ Type: "thinking",
+ Thinking: "",
+ },
+ })
+ return events
+
+ case "message":
+ return nil
+ }
+
+ return nil
+}
+
+func resToAnthHandleTextDelta(evt *ResponsesStreamEvent, state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if evt.Delta == "" {
+ return nil
+ }
+
+ var events []AnthropicStreamEvent
+
+ if !state.ContentBlockOpen || state.CurrentBlockType != "text" {
+ events = append(events, closeCurrentBlock(state)...)
+
+ idx := state.ContentBlockIndex
+ state.ContentBlockOpen = true
+ state.CurrentBlockType = "text"
+
+ events = append(events, AnthropicStreamEvent{
+ Type: "content_block_start",
+ Index: &idx,
+ ContentBlock: &AnthropicContentBlock{
+ Type: "text",
+ Text: "",
+ },
+ })
+ }
+
+ idx := state.ContentBlockIndex
+ events = append(events, AnthropicStreamEvent{
+ Type: "content_block_delta",
+ Index: &idx,
+ Delta: &AnthropicDelta{
+ Type: "text_delta",
+ Text: evt.Delta,
+ },
+ })
+ return events
+}
+
+func resToAnthHandleFuncArgsDelta(evt *ResponsesStreamEvent, state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if evt.Delta == "" {
+ return nil
+ }
+
+ blockIdx, ok := state.OutputIndexToBlockIdx[evt.OutputIndex]
+ if !ok {
+ return nil
+ }
+
+ return []AnthropicStreamEvent{{
+ Type: "content_block_delta",
+ Index: &blockIdx,
+ Delta: &AnthropicDelta{
+ Type: "input_json_delta",
+ PartialJSON: evt.Delta,
+ },
+ }}
+}
+
+func resToAnthHandleReasoningDelta(evt *ResponsesStreamEvent, state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if evt.Delta == "" {
+ return nil
+ }
+
+ blockIdx, ok := state.OutputIndexToBlockIdx[evt.OutputIndex]
+ if !ok {
+ return nil
+ }
+
+ return []AnthropicStreamEvent{{
+ Type: "content_block_delta",
+ Index: &blockIdx,
+ Delta: &AnthropicDelta{
+ Type: "thinking_delta",
+ Thinking: evt.Delta,
+ },
+ }}
+}
+
+func resToAnthHandleBlockDone(state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if !state.ContentBlockOpen {
+ return nil
+ }
+ return closeCurrentBlock(state)
+}
+
+func resToAnthHandleOutputItemDone(evt *ResponsesStreamEvent, state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if evt.Item == nil {
+ return nil
+ }
+ if state.ContentBlockOpen {
+ return closeCurrentBlock(state)
+ }
+ return nil
+}
+
+func resToAnthHandleCompleted(evt *ResponsesStreamEvent, state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if state.MessageStopSent {
+ return nil
+ }
+
+ var events []AnthropicStreamEvent
+ events = append(events, closeCurrentBlock(state)...)
+
+ stopReason := "end_turn"
+ if evt.Response != nil {
+ if evt.Response.Usage != nil {
+ state.InputTokens = evt.Response.Usage.InputTokens
+ state.OutputTokens = evt.Response.Usage.OutputTokens
+ if evt.Response.Usage.InputTokensDetails != nil {
+ state.CacheReadInputTokens = evt.Response.Usage.InputTokensDetails.CachedTokens
+ }
+ }
+ switch evt.Response.Status {
+ case "incomplete":
+ if evt.Response.IncompleteDetails != nil && evt.Response.IncompleteDetails.Reason == "max_output_tokens" {
+ stopReason = "max_tokens"
+ }
+ case "completed":
+ if state.ContentBlockIndex > 0 && state.CurrentBlockType == "tool_use" {
+ stopReason = "tool_use"
+ }
+ }
+ }
+
+ events = append(events,
+ AnthropicStreamEvent{
+ Type: "message_delta",
+ Delta: &AnthropicDelta{
+ StopReason: stopReason,
+ },
+ Usage: &AnthropicUsage{
+ InputTokens: state.InputTokens,
+ OutputTokens: state.OutputTokens,
+ CacheReadInputTokens: state.CacheReadInputTokens,
+ },
+ },
+ AnthropicStreamEvent{Type: "message_stop"},
+ )
+ state.MessageStopSent = true
+ return events
+}
+
+func closeCurrentBlock(state *ResponsesEventToAnthropicState) []AnthropicStreamEvent {
+ if !state.ContentBlockOpen {
+ return nil
+ }
+ idx := state.ContentBlockIndex
+ state.ContentBlockOpen = false
+ state.ContentBlockIndex++
+ return []AnthropicStreamEvent{{
+ Type: "content_block_stop",
+ Index: &idx,
+ }}
+}
diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go
new file mode 100644
index 00000000..92e85318
--- /dev/null
+++ b/backend/internal/pkg/apicompat/types.go
@@ -0,0 +1,544 @@
+// Package apicompat provides type definitions and conversion utilities for
+// translating between Anthropic Messages, OpenAI Chat Completions, and OpenAI
+// Responses API formats. It enables multi-protocol support so that clients
+// using different API formats can be served through a unified gateway.
+package apicompat
+
+import "encoding/json"
+
+// ---------------------------------------------------------------------------
+// Anthropic Messages API types
+// ---------------------------------------------------------------------------
+
+// AnthropicRequest is the request body for POST /v1/messages.
+type AnthropicRequest struct {
+ Model string `json:"model"`
+ MaxTokens int `json:"max_tokens"`
+ System json.RawMessage `json:"system,omitempty"` // string or []AnthropicContentBlock
+ Messages []AnthropicMessage `json:"messages"`
+ Tools []AnthropicTool `json:"tools,omitempty"`
+ Stream bool `json:"stream,omitempty"`
+ Temperature *float64 `json:"temperature,omitempty"`
+ TopP *float64 `json:"top_p,omitempty"`
+ StopSeqs []string `json:"stop_sequences,omitempty"`
+}
+
+// AnthropicMessage is a single message in the Anthropic conversation.
+type AnthropicMessage struct {
+ Role string `json:"role"` // "user" | "assistant"
+ Content json.RawMessage `json:"content"`
+}
+
+// AnthropicContentBlock is one block inside a message's content array.
+type AnthropicContentBlock struct {
+ Type string `json:"type"`
+
+ // type=text
+ Text string `json:"text,omitempty"`
+
+ // type=thinking
+ Thinking string `json:"thinking,omitempty"`
+
+ // type=tool_use
+ ID string `json:"id,omitempty"`
+ Name string `json:"name,omitempty"`
+ Input json.RawMessage `json:"input,omitempty"`
+
+ // type=tool_result
+ ToolUseID string `json:"tool_use_id,omitempty"`
+ Content json.RawMessage `json:"content,omitempty"` // string or []AnthropicContentBlock
+ IsError bool `json:"is_error,omitempty"`
+}
+
+// AnthropicTool describes a tool available to the model.
+type AnthropicTool struct {
+ Name string `json:"name"`
+ Description string `json:"description,omitempty"`
+ InputSchema json.RawMessage `json:"input_schema"` // JSON Schema object
+}
+
+// AnthropicResponse is the non-streaming response from POST /v1/messages.
+type AnthropicResponse struct {
+ ID string `json:"id"`
+ Type string `json:"type"` // "message"
+ Role string `json:"role"` // "assistant"
+ Content []AnthropicContentBlock `json:"content"`
+ Model string `json:"model"`
+ StopReason string `json:"stop_reason"`
+ StopSequence *string `json:"stop_sequence,omitempty"`
+ Usage AnthropicUsage `json:"usage"`
+}
+
+// AnthropicUsage holds token counts in Anthropic format.
+type AnthropicUsage struct {
+ InputTokens int `json:"input_tokens"`
+ OutputTokens int `json:"output_tokens"`
+ CacheCreationInputTokens int `json:"cache_creation_input_tokens"`
+ CacheReadInputTokens int `json:"cache_read_input_tokens"`
+}
+
+// ---------------------------------------------------------------------------
+// Anthropic SSE event types
+// ---------------------------------------------------------------------------
+
+// AnthropicStreamEvent is a single SSE event in the Anthropic streaming protocol.
+type AnthropicStreamEvent struct {
+ Type string `json:"type"`
+
+ // message_start
+ Message *AnthropicResponse `json:"message,omitempty"`
+
+ // content_block_start
+ Index *int `json:"index,omitempty"`
+ ContentBlock *AnthropicContentBlock `json:"content_block,omitempty"`
+
+ // content_block_delta
+ Delta *AnthropicDelta `json:"delta,omitempty"`
+
+ // message_delta
+ Usage *AnthropicUsage `json:"usage,omitempty"`
+}
+
+// AnthropicDelta carries incremental content in streaming events.
+type AnthropicDelta struct {
+ Type string `json:"type,omitempty"` // "text_delta" | "input_json_delta" | "thinking_delta" | "signature_delta"
+
+ // text_delta
+ Text string `json:"text,omitempty"`
+
+ // input_json_delta
+ PartialJSON string `json:"partial_json,omitempty"`
+
+ // thinking_delta
+ Thinking string `json:"thinking,omitempty"`
+
+ // signature_delta
+ Signature string `json:"signature,omitempty"`
+
+ // message_delta fields
+ StopReason string `json:"stop_reason,omitempty"`
+ StopSequence *string `json:"stop_sequence,omitempty"`
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI Chat Completions API types
+// ---------------------------------------------------------------------------
+
+// ChatRequest is the request body for POST /v1/chat/completions.
+type ChatRequest struct {
+ Model string `json:"model"`
+ Messages []ChatMessage `json:"messages"`
+ MaxTokens *int `json:"max_tokens,omitempty"`
+ Temperature *float64 `json:"temperature,omitempty"`
+ TopP *float64 `json:"top_p,omitempty"`
+ Stream bool `json:"stream,omitempty"`
+ Tools []ChatTool `json:"tools,omitempty"`
+ Stop json.RawMessage `json:"stop,omitempty"` // string or []string
+}
+
+// ChatMessage is a single message in the Chat Completions conversation.
+type ChatMessage struct {
+ Role string `json:"role"` // "system" | "user" | "assistant" | "tool"
+ Content json.RawMessage `json:"content,omitempty"` // string or []ChatContentPart
+
+ // assistant fields
+ ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
+
+ // tool fields
+ ToolCallID string `json:"tool_call_id,omitempty"`
+
+ // Copilot-specific reasoning passthrough
+ ReasoningText string `json:"reasoning_text,omitempty"`
+ ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
+}
+
+// ChatContentPart is a typed content part in a multi-part message.
+type ChatContentPart struct {
+ Type string `json:"type"` // "text" | "image_url"
+ Text string `json:"text,omitempty"`
+}
+
+// ChatToolCall represents a tool invocation in an assistant message.
+// In streaming deltas, Index identifies which tool call is being updated.
+type ChatToolCall struct {
+ Index int `json:"index"`
+ ID string `json:"id,omitempty"`
+ Type string `json:"type,omitempty"` // "function"
+ Function ChatFunctionCall `json:"function"`
+}
+
+// ChatFunctionCall holds the function name and arguments.
+type ChatFunctionCall struct {
+ Name string `json:"name"`
+ Arguments string `json:"arguments"`
+}
+
+// ChatTool describes a tool available to the model.
+type ChatTool struct {
+ Type string `json:"type"` // "function"
+ Function ChatFunction `json:"function"`
+}
+
+// ChatFunction is the function definition inside a ChatTool.
+type ChatFunction struct {
+ Name string `json:"name"`
+ Description string `json:"description,omitempty"`
+ Parameters json.RawMessage `json:"parameters,omitempty"` // JSON Schema
+}
+
+// ChatResponse is the non-streaming response from POST /v1/chat/completions.
+type ChatResponse struct {
+ ID string `json:"id"`
+ Object string `json:"object"` // "chat.completion"
+ Created int64 `json:"created"`
+ Model string `json:"model"`
+ Choices []ChatChoice `json:"choices"`
+ Usage *ChatUsage `json:"usage,omitempty"`
+}
+
+// ChatChoice is one completion choice.
+type ChatChoice struct {
+ Index int `json:"index"`
+ Message ChatMessage `json:"message"`
+ FinishReason string `json:"finish_reason"`
+}
+
+// ChatUsage holds token counts in Chat Completions format.
+type ChatUsage struct {
+ PromptTokens int `json:"prompt_tokens"`
+ CompletionTokens int `json:"completion_tokens"`
+ TotalTokens int `json:"total_tokens"`
+}
+
+// ---------------------------------------------------------------------------
+// Chat Completions SSE types
+// ---------------------------------------------------------------------------
+
+// ChatStreamChunk is a single SSE chunk in the Chat Completions streaming protocol.
+type ChatStreamChunk struct {
+ ID string `json:"id"`
+ Object string `json:"object"` // "chat.completion.chunk"
+ Created int64 `json:"created"`
+ Model string `json:"model"`
+ Choices []ChatStreamChoice `json:"choices"`
+ Usage *ChatUsage `json:"usage,omitempty"`
+}
+
+// ChatStreamChoice is one choice inside a streaming chunk.
+type ChatStreamChoice struct {
+ Index int `json:"index"`
+ Delta ChatStreamDelta `json:"delta"`
+ FinishReason *string `json:"finish_reason"`
+}
+
+// ChatStreamDelta carries incremental content in a streaming chunk.
+type ChatStreamDelta struct {
+ Role string `json:"role,omitempty"`
+ Content string `json:"content,omitempty"`
+ ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
+
+ // Copilot-specific reasoning passthrough (streaming)
+ ReasoningText string `json:"reasoning_text,omitempty"`
+ ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
+}
+
+// ---------------------------------------------------------------------------
+// OpenAI Responses API types
+// ---------------------------------------------------------------------------
+
+// ResponsesRequest is the request body for POST /v1/responses.
+type ResponsesRequest struct {
+ Model string `json:"model"`
+ Input json.RawMessage `json:"input"` // string or []ResponsesInputItem
+ MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
+ Temperature *float64 `json:"temperature,omitempty"`
+ TopP *float64 `json:"top_p,omitempty"`
+ Stream bool `json:"stream,omitempty"`
+ Tools []ResponsesTool `json:"tools,omitempty"`
+ Include []string `json:"include,omitempty"`
+ Store *bool `json:"store,omitempty"`
+}
+
+// ResponsesInputItem is one item in the Responses API input array.
+// The Type field determines which other fields are populated.
+type ResponsesInputItem struct {
+ // Common
+ Type string `json:"type,omitempty"` // "" for role-based messages
+
+ // Role-based messages (system/user/assistant)
+ Role string `json:"role,omitempty"`
+ Content json.RawMessage `json:"content,omitempty"` // string or []ResponsesContentPart
+
+ // type=function_call
+ CallID string `json:"call_id,omitempty"`
+ Name string `json:"name,omitempty"`
+ Arguments string `json:"arguments,omitempty"`
+ ID string `json:"id,omitempty"`
+
+ // type=function_call_output
+ Output string `json:"output,omitempty"`
+}
+
+// ResponsesContentPart is a typed content part in a Responses message.
+type ResponsesContentPart struct {
+ Type string `json:"type"` // "input_text" | "output_text" | "input_image"
+ Text string `json:"text,omitempty"`
+}
+
+// ResponsesTool describes a tool in the Responses API.
+type ResponsesTool struct {
+ Type string `json:"type"` // "function" | "web_search" | "local_shell" etc.
+ Name string `json:"name,omitempty"`
+ Description string `json:"description,omitempty"`
+ Parameters json.RawMessage `json:"parameters,omitempty"`
+ Strict *bool `json:"strict,omitempty"`
+}
+
+// ResponsesResponse is the non-streaming response from POST /v1/responses.
+type ResponsesResponse struct {
+ ID string `json:"id"`
+ Object string `json:"object"` // "response"
+ Model string `json:"model"`
+ Status string `json:"status"` // "completed" | "incomplete" | "failed"
+ Output []ResponsesOutput `json:"output"`
+ Usage *ResponsesUsage `json:"usage,omitempty"`
+
+ // incomplete_details is present when status="incomplete"
+ IncompleteDetails *ResponsesIncompleteDetails `json:"incomplete_details,omitempty"`
+}
+
+// ResponsesIncompleteDetails explains why a response is incomplete.
+type ResponsesIncompleteDetails struct {
+ Reason string `json:"reason"` // "max_output_tokens" | "content_filter"
+}
+
+// ResponsesOutput is one output item in a Responses API response.
+type ResponsesOutput struct {
+ Type string `json:"type"` // "message" | "reasoning" | "function_call"
+
+ // type=message
+ ID string `json:"id,omitempty"`
+ Role string `json:"role,omitempty"`
+ Content []ResponsesContentPart `json:"content,omitempty"`
+ Status string `json:"status,omitempty"`
+
+ // type=reasoning
+ EncryptedContent string `json:"encrypted_content,omitempty"`
+ Summary []ResponsesSummary `json:"summary,omitempty"`
+
+ // type=function_call
+ CallID string `json:"call_id,omitempty"`
+ Name string `json:"name,omitempty"`
+ Arguments string `json:"arguments,omitempty"`
+}
+
+// ResponsesSummary is a summary text block inside a reasoning output.
+type ResponsesSummary struct {
+ Type string `json:"type"` // "summary_text"
+ Text string `json:"text"`
+}
+
+// ResponsesUsage holds token counts in Responses API format.
+type ResponsesUsage struct {
+ InputTokens int `json:"input_tokens"`
+ OutputTokens int `json:"output_tokens"`
+ TotalTokens int `json:"total_tokens"`
+
+ // Optional detailed breakdown
+ InputTokensDetails *ResponsesInputTokensDetails `json:"input_tokens_details,omitempty"`
+ OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
+}
+
+// ---------------------------------------------------------------------------
+// Responses SSE event types
+// ---------------------------------------------------------------------------
+
+// ResponsesStreamEvent is a single SSE event in the Responses streaming protocol.
+// The Type field corresponds to the "type" in the JSON payload.
+type ResponsesStreamEvent struct {
+ Type string `json:"type"`
+
+ // response.created / response.completed / response.failed / response.incomplete
+ Response *ResponsesResponse `json:"response,omitempty"`
+
+ // response.output_item.added / response.output_item.done
+ Item *ResponsesOutput `json:"item,omitempty"`
+
+ // response.output_text.delta / response.output_text.done
+ OutputIndex int `json:"output_index,omitempty"`
+ ContentIndex int `json:"content_index,omitempty"`
+ Delta string `json:"delta,omitempty"`
+ Text string `json:"text,omitempty"`
+ ItemID string `json:"item_id,omitempty"`
+
+ // response.function_call_arguments.delta / done
+ CallID string `json:"call_id,omitempty"`
+ Name string `json:"name,omitempty"`
+ Arguments string `json:"arguments,omitempty"`
+
+ // response.reasoning_summary_text.delta / done
+ // Reuses Text/Delta fields above, SummaryIndex identifies which summary part
+ SummaryIndex int `json:"summary_index,omitempty"`
+
+ // error event fields
+ Code string `json:"code,omitempty"`
+ Param string `json:"param,omitempty"`
+
+ // Sequence number for ordering events
+ SequenceNumber int `json:"sequence_number,omitempty"`
+}
+
+// ResponsesOutputReasoning is a reasoning output item in the Responses API.
+// This type represents the "type":"reasoning" output item that contains
+// extended thinking from the model.
+type ResponsesOutputReasoning struct {
+ ID string `json:"id,omitempty"`
+ Type string `json:"type"` // "reasoning"
+ Status string `json:"status,omitempty"` // "in_progress" | "completed" | "incomplete"
+ EncryptedContent string `json:"encrypted_content,omitempty"`
+ Summary []ResponsesReasoningSummary `json:"summary,omitempty"`
+}
+
+// ResponsesReasoningSummary is a summary text block inside a reasoning output.
+type ResponsesReasoningSummary struct {
+ Type string `json:"type"` // "summary_text"
+ Text string `json:"text"`
+}
+
+// ResponsesStreamState maintains the state for converting Responses streaming
+// events to Chat Completions format. It tracks content blocks, tool calls,
+// reasoning blocks, and other streaming artifacts.
+type ResponsesStreamState struct {
+ // Response metadata
+ ID string
+ Model string
+ Created int64
+
+ // Content tracking
+ ContentIndex int
+ CurrentText string
+ CurrentItemID string
+ PendingText []string // Text to accumulate before emitting
+
+ // Tool call tracking
+ ToolCalls []ResponsesToolCallState
+ CurrentToolCall *ResponsesToolCallState
+
+ // Reasoning tracking
+ ReasoningBlocks []ResponsesReasoningState
+ CurrentReasoning *ResponsesReasoningState
+
+ // Usage tracking
+ InputTokens int
+ OutputTokens int
+
+ // Status tracking
+ Status string
+ FinishReason string
+}
+
+// ResponsesToolCallState tracks a single tool call during streaming.
+type ResponsesToolCallState struct {
+ Index int
+ ItemID string
+ CallID string
+ Name string
+ Arguments string
+ Status string
+ IsComplete bool
+}
+
+// ResponsesReasoningState tracks a reasoning block during streaming.
+type ResponsesReasoningState struct {
+ ItemID string
+ SummaryIndex int
+ SummaryText string
+ Status string
+ IsComplete bool
+}
+
+// ResponsesUsageDetail provides additional token usage details in Responses format.
+type ResponsesUsageDetail struct {
+ InputTokens int `json:"input_tokens"`
+ OutputTokens int `json:"output_tokens"`
+ TotalTokens int `json:"total_tokens"`
+
+ // Optional detailed breakdown
+ InputTokensDetails *ResponsesInputTokensDetails `json:"input_tokens_details,omitempty"`
+ OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
+}
+
+// ResponsesInputTokensDetails breaks down input token usage.
+type ResponsesInputTokensDetails struct {
+ CachedTokens int `json:"cached_tokens,omitempty"`
+}
+
+// ResponsesOutputTokensDetails breaks down output token usage.
+type ResponsesOutputTokensDetails struct {
+ ReasoningTokens int `json:"reasoning_tokens,omitempty"`
+}
+
+// ---------------------------------------------------------------------------
+// Finish reason mapping helpers
+// ---------------------------------------------------------------------------
+
+// ChatFinishToAnthropic maps a Chat Completions finish_reason to an Anthropic stop_reason.
+func ChatFinishToAnthropic(reason string) string {
+ switch reason {
+ case "stop":
+ return "end_turn"
+ case "tool_calls":
+ return "tool_use"
+ case "length":
+ return "max_tokens"
+ default:
+ return "end_turn"
+ }
+}
+
+// AnthropicStopToChat maps an Anthropic stop_reason to a Chat Completions finish_reason.
+func AnthropicStopToChat(reason string) string {
+ switch reason {
+ case "end_turn":
+ return "stop"
+ case "tool_use":
+ return "tool_calls"
+ case "max_tokens":
+ return "length"
+ default:
+ return "stop"
+ }
+}
+
+// ResponsesStatusToChat maps a Responses API status to a Chat Completions finish_reason.
+func ResponsesStatusToChat(status string, details *ResponsesIncompleteDetails) string {
+ switch status {
+ case "completed":
+ return "stop"
+ case "incomplete":
+ if details != nil && details.Reason == "max_output_tokens" {
+ return "length"
+ }
+ return "stop"
+ default:
+ return "stop"
+ }
+}
+
+// ChatFinishToResponsesStatus maps a Chat Completions finish_reason to a Responses status.
+func ChatFinishToResponsesStatus(reason string) string {
+ switch reason {
+ case "length":
+ return "incomplete"
+ default:
+ return "completed"
+ }
+}
+
+// ---------------------------------------------------------------------------
+// Shared constants
+// ---------------------------------------------------------------------------
+
+// minMaxOutputTokens is the floor for max_output_tokens in a Responses request.
+// Very small values may cause upstream API errors, so we enforce a minimum.
+const minMaxOutputTokens = 128
diff --git a/backend/internal/server/routes/gateway.go b/backend/internal/server/routes/gateway.go
index 13f13320..ade2ed83 100644
--- a/backend/internal/server/routes/gateway.go
+++ b/backend/internal/server/routes/gateway.go
@@ -43,12 +43,33 @@ func RegisterGatewayRoutes(
gateway.Use(gin.HandlerFunc(apiKeyAuth))
gateway.Use(requireGroupAnthropic)
{
- gateway.POST("/messages", h.Gateway.Messages)
- gateway.POST("/messages/count_tokens", h.Gateway.CountTokens)
+ // /v1/messages: auto-route based on group platform
+ gateway.POST("/messages", func(c *gin.Context) {
+ if getGroupPlatform(c) == service.PlatformOpenAI {
+ h.OpenAIGateway.Messages(c)
+ return
+ }
+ h.Gateway.Messages(c)
+ })
+ // /v1/messages/count_tokens: OpenAI groups get 404
+ gateway.POST("/messages/count_tokens", func(c *gin.Context) {
+ if getGroupPlatform(c) == service.PlatformOpenAI {
+ c.JSON(http.StatusNotFound, gin.H{
+ "type": "error",
+ "error": gin.H{
+ "type": "not_found_error",
+ "message": "Token counting is not supported for this platform",
+ },
+ })
+ return
+ }
+ h.Gateway.CountTokens(c)
+ })
gateway.GET("/models", h.Gateway.Models)
gateway.GET("/usage", h.Gateway.Usage)
// OpenAI Responses API
gateway.POST("/responses", h.OpenAIGateway.Responses)
+ gateway.POST("/responses/*subpath", h.OpenAIGateway.Responses)
gateway.GET("/responses", h.OpenAIGateway.ResponsesWebSocket)
// 明确阻止旧协议入口:OpenAI 仅支持 Responses API,避免客户端误解为会自动路由到其它平台。
gateway.POST("/chat/completions", func(c *gin.Context) {
@@ -77,6 +98,7 @@ func RegisterGatewayRoutes(
// OpenAI Responses API(不带v1前缀的别名)
r.POST("/responses", bodyLimit, clientRequestID, opsErrorLogger, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.Responses)
+ r.POST("/responses/*subpath", bodyLimit, clientRequestID, opsErrorLogger, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.Responses)
r.GET("/responses", bodyLimit, clientRequestID, opsErrorLogger, gin.HandlerFunc(apiKeyAuth), requireGroupAnthropic, h.OpenAIGateway.ResponsesWebSocket)
// Antigravity 模型列表
@@ -132,3 +154,12 @@ func RegisterGatewayRoutes(
// Sora 媒体代理(签名 URL,无需 API Key)
r.GET("/sora/media-signed/*filepath", h.SoraGateway.MediaProxySigned)
}
+
+// getGroupPlatform extracts the group platform from the API Key stored in context.
+func getGroupPlatform(c *gin.Context) string {
+ apiKey, ok := middleware.GetAPIKeyFromContext(c)
+ if !ok || apiKey.Group == nil {
+ return ""
+ }
+ return apiKey.Group.Platform
+}
diff --git a/backend/internal/server/routes/gateway_test.go b/backend/internal/server/routes/gateway_test.go
new file mode 100644
index 00000000..00edd31b
--- /dev/null
+++ b/backend/internal/server/routes/gateway_test.go
@@ -0,0 +1,51 @@
+package routes
+
+import (
+ "net/http"
+ "net/http/httptest"
+ "strings"
+ "testing"
+
+ "github.com/Wei-Shaw/sub2api/internal/config"
+ "github.com/Wei-Shaw/sub2api/internal/handler"
+ servermiddleware "github.com/Wei-Shaw/sub2api/internal/server/middleware"
+ "github.com/gin-gonic/gin"
+ "github.com/stretchr/testify/require"
+)
+
+func newGatewayRoutesTestRouter() *gin.Engine {
+ gin.SetMode(gin.TestMode)
+ router := gin.New()
+
+ RegisterGatewayRoutes(
+ router,
+ &handler.Handlers{
+ Gateway: &handler.GatewayHandler{},
+ OpenAIGateway: &handler.OpenAIGatewayHandler{},
+ SoraGateway: &handler.SoraGatewayHandler{},
+ },
+ servermiddleware.APIKeyAuthMiddleware(func(c *gin.Context) {
+ c.Next()
+ }),
+ nil,
+ nil,
+ nil,
+ nil,
+ &config.Config{},
+ )
+
+ return router
+}
+
+func TestGatewayRoutesOpenAIResponsesCompactPathIsRegistered(t *testing.T) {
+ router := newGatewayRoutesTestRouter()
+
+ for _, path := range []string{"/v1/responses/compact", "/responses/compact"} {
+ req := httptest.NewRequest(http.MethodPost, path, strings.NewReader(`{"model":"gpt-5"}`))
+ req.Header.Set("Content-Type", "application/json")
+ w := httptest.NewRecorder()
+
+ router.ServeHTTP(w, req)
+ require.NotEqual(t, http.StatusNotFound, w.Code, "path=%s should hit OpenAI responses handler", path)
+ }
+}
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 9132a1fc..3a6003fc 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -5429,6 +5429,11 @@ func extractUpstreamErrorMessage(body []byte) string {
return m
}
+ // ChatGPT 内部 API 风格:{"detail":"..."}
+ if d := gjson.GetBytes(body, "detail").String(); strings.TrimSpace(d) != "" {
+ return d
+ }
+
// 兜底:尝试顶层 message
return gjson.GetBytes(body, "message").String()
}
diff --git a/backend/internal/service/openai_codex_transform.go b/backend/internal/service/openai_codex_transform.go
index 9bc48cf6..9a983b0d 100644
--- a/backend/internal/service/openai_codex_transform.go
+++ b/backend/internal/service/openai_codex_transform.go
@@ -77,7 +77,7 @@ type codexTransformResult struct {
PromptCacheKey string
}
-func applyCodexOAuthTransform(reqBody map[string]any, isCodexCLI bool) codexTransformResult {
+func applyCodexOAuthTransform(reqBody map[string]any, isCodexCLI bool, isCompact bool) codexTransformResult {
result := codexTransformResult{}
// 工具续链需求会影响存储策略与 input 过滤逻辑。
needsToolContinuation := NeedsToolContinuation(reqBody)
@@ -95,15 +95,26 @@ func applyCodexOAuthTransform(reqBody map[string]any, isCodexCLI bool) codexTran
result.NormalizedModel = normalizedModel
}
- // OAuth 走 ChatGPT internal API 时,store 必须为 false;显式 true 也会强制覆盖。
- // 避免上游返回 "Store must be set to false"。
- if v, ok := reqBody["store"].(bool); !ok || v {
- reqBody["store"] = false
- result.Modified = true
- }
- if v, ok := reqBody["stream"].(bool); !ok || !v {
- reqBody["stream"] = true
- result.Modified = true
+ if isCompact {
+ if _, ok := reqBody["store"]; ok {
+ delete(reqBody, "store")
+ result.Modified = true
+ }
+ if _, ok := reqBody["stream"]; ok {
+ delete(reqBody, "stream")
+ result.Modified = true
+ }
+ } else {
+ // OAuth 走 ChatGPT internal API 时,store 必须为 false;显式 true 也会强制覆盖。
+ // 避免上游返回 "Store must be set to false"。
+ if v, ok := reqBody["store"].(bool); !ok || v {
+ reqBody["store"] = false
+ result.Modified = true
+ }
+ if v, ok := reqBody["stream"].(bool); !ok || !v {
+ reqBody["stream"] = true
+ result.Modified = true
+ }
}
// Strip parameters unsupported by codex models via the Responses API.
diff --git a/backend/internal/service/openai_codex_transform_test.go b/backend/internal/service/openai_codex_transform_test.go
index 7ee4bbc8..dd8dd07c 100644
--- a/backend/internal/service/openai_codex_transform_test.go
+++ b/backend/internal/service/openai_codex_transform_test.go
@@ -18,7 +18,7 @@ func TestApplyCodexOAuthTransform_ToolContinuationPreservesInput(t *testing.T) {
"tool_choice": "auto",
}
- applyCodexOAuthTransform(reqBody, false)
+ applyCodexOAuthTransform(reqBody, false, false)
// 未显式设置 store=true,默认为 false。
store, ok := reqBody["store"].(bool)
@@ -53,7 +53,7 @@ func TestApplyCodexOAuthTransform_ExplicitStoreFalsePreserved(t *testing.T) {
"tool_choice": "auto",
}
- applyCodexOAuthTransform(reqBody, false)
+ applyCodexOAuthTransform(reqBody, false, false)
store, ok := reqBody["store"].(bool)
require.True(t, ok)
@@ -72,13 +72,29 @@ func TestApplyCodexOAuthTransform_ExplicitStoreTrueForcedFalse(t *testing.T) {
"tool_choice": "auto",
}
- applyCodexOAuthTransform(reqBody, false)
+ applyCodexOAuthTransform(reqBody, false, false)
store, ok := reqBody["store"].(bool)
require.True(t, ok)
require.False(t, store)
}
+func TestApplyCodexOAuthTransform_CompactForcesNonStreaming(t *testing.T) {
+ reqBody := map[string]any{
+ "model": "gpt-5.1-codex",
+ "store": true,
+ "stream": true,
+ }
+
+ result := applyCodexOAuthTransform(reqBody, true, true)
+
+ _, hasStore := reqBody["store"]
+ require.False(t, hasStore)
+ _, hasStream := reqBody["stream"]
+ require.False(t, hasStream)
+ require.True(t, result.Modified)
+}
+
func TestApplyCodexOAuthTransform_NonContinuationDefaultsStoreFalseAndStripsIDs(t *testing.T) {
// 非续链场景:未设置 store 时默认 false,并移除 input 中的 id。
@@ -89,7 +105,7 @@ func TestApplyCodexOAuthTransform_NonContinuationDefaultsStoreFalseAndStripsIDs(
},
}
- applyCodexOAuthTransform(reqBody, false)
+ applyCodexOAuthTransform(reqBody, false, false)
store, ok := reqBody["store"].(bool)
require.True(t, ok)
@@ -138,7 +154,7 @@ func TestApplyCodexOAuthTransform_NormalizeCodexTools_PreservesResponsesFunction
},
}
- applyCodexOAuthTransform(reqBody, false)
+ applyCodexOAuthTransform(reqBody, false, false)
tools, ok := reqBody["tools"].([]any)
require.True(t, ok)
@@ -158,7 +174,7 @@ func TestApplyCodexOAuthTransform_EmptyInput(t *testing.T) {
"input": []any{},
}
- applyCodexOAuthTransform(reqBody, false)
+ applyCodexOAuthTransform(reqBody, false, false)
input, ok := reqBody["input"].([]any)
require.True(t, ok)
@@ -193,7 +209,7 @@ func TestApplyCodexOAuthTransform_CodexCLI_PreservesExistingInstructions(t *test
"instructions": "existing instructions",
}
- result := applyCodexOAuthTransform(reqBody, true) // isCodexCLI=true
+ result := applyCodexOAuthTransform(reqBody, true, false) // isCodexCLI=true
instructions, ok := reqBody["instructions"].(string)
require.True(t, ok)
@@ -210,7 +226,7 @@ func TestApplyCodexOAuthTransform_CodexCLI_SuppliesDefaultWhenEmpty(t *testing.T
// 没有 instructions 字段
}
- result := applyCodexOAuthTransform(reqBody, true) // isCodexCLI=true
+ result := applyCodexOAuthTransform(reqBody, true, false) // isCodexCLI=true
instructions, ok := reqBody["instructions"].(string)
require.True(t, ok)
@@ -226,7 +242,7 @@ func TestApplyCodexOAuthTransform_NonCodexCLI_OverridesInstructions(t *testing.T
"instructions": "old instructions",
}
- result := applyCodexOAuthTransform(reqBody, false) // isCodexCLI=false
+ result := applyCodexOAuthTransform(reqBody, false, false) // isCodexCLI=false
instructions, ok := reqBody["instructions"].(string)
require.True(t, ok)
diff --git a/backend/internal/service/openai_gateway_messages.go b/backend/internal/service/openai_gateway_messages.go
new file mode 100644
index 00000000..4fe89732
--- /dev/null
+++ b/backend/internal/service/openai_gateway_messages.go
@@ -0,0 +1,378 @@
+package service
+
+import (
+ "bufio"
+ "context"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "strings"
+ "time"
+
+ "github.com/Wei-Shaw/sub2api/internal/pkg/apicompat"
+ "github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+ "github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
+ "github.com/gin-gonic/gin"
+ "go.uber.org/zap"
+)
+
+// ForwardAsAnthropic accepts an Anthropic Messages request body, converts it
+// to OpenAI Responses API format, forwards to the OpenAI upstream, and converts
+// the response back to Anthropic Messages format. This enables Claude Code
+// clients to access OpenAI models through the standard /v1/messages endpoint.
+func (s *OpenAIGatewayService) ForwardAsAnthropic(
+ ctx context.Context,
+ c *gin.Context,
+ account *Account,
+ body []byte,
+ promptCacheKey string,
+) (*OpenAIForwardResult, error) {
+ startTime := time.Now()
+
+ // 1. Parse Anthropic request
+ var anthropicReq apicompat.AnthropicRequest
+ if err := json.Unmarshal(body, &anthropicReq); err != nil {
+ return nil, fmt.Errorf("parse anthropic request: %w", err)
+ }
+ originalModel := anthropicReq.Model
+ isStream := anthropicReq.Stream
+
+ // 2. Convert Anthropic → Responses
+ responsesReq, err := apicompat.AnthropicToResponses(&anthropicReq)
+ if err != nil {
+ return nil, fmt.Errorf("convert anthropic to responses: %w", err)
+ }
+
+ // 3. Model mapping
+ mappedModel := account.GetMappedModel(originalModel)
+ responsesReq.Model = mappedModel
+
+ logger.L().Info("openai messages: model mapping applied",
+ zap.Int64("account_id", account.ID),
+ zap.String("original_model", originalModel),
+ zap.String("mapped_model", mappedModel),
+ zap.Bool("stream", isStream),
+ )
+
+ // 4. Marshal Responses request body, then apply OAuth codex transform
+ responsesBody, err := json.Marshal(responsesReq)
+ if err != nil {
+ return nil, fmt.Errorf("marshal responses request: %w", err)
+ }
+
+ if account.Type == AccountTypeOAuth {
+ var reqBody map[string]any
+ if err := json.Unmarshal(responsesBody, &reqBody); err != nil {
+ return nil, fmt.Errorf("unmarshal for codex transform: %w", err)
+ }
+ applyCodexOAuthTransform(reqBody, false)
+ // OAuth codex transform forces stream=true upstream, so always use
+ // the streaming response handler regardless of what the client asked.
+ isStream = true
+ responsesBody, err = json.Marshal(reqBody)
+ if err != nil {
+ return nil, fmt.Errorf("remarshal after codex transform: %w", err)
+ }
+ }
+
+ // 5. Get access token
+ token, _, err := s.GetAccessToken(ctx, account)
+ if err != nil {
+ return nil, fmt.Errorf("get access token: %w", err)
+ }
+
+ // 6. Build upstream request
+ upstreamReq, err := s.buildUpstreamRequest(ctx, c, account, responsesBody, token, isStream, promptCacheKey, false)
+ if err != nil {
+ return nil, fmt.Errorf("build upstream request: %w", err)
+ }
+
+ // 7. Send request
+ proxyURL := ""
+ if account.Proxy != nil {
+ proxyURL = account.Proxy.URL()
+ }
+ resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
+ if err != nil {
+ safeErr := sanitizeUpstreamErrorMessage(err.Error())
+ setOpsUpstreamError(c, 0, safeErr, "")
+ appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+ Platform: account.Platform,
+ AccountID: account.ID,
+ AccountName: account.Name,
+ UpstreamStatusCode: 0,
+ Kind: "request_error",
+ Message: safeErr,
+ })
+ writeAnthropicError(c, http.StatusBadGateway, "api_error", "Upstream request failed")
+ return nil, fmt.Errorf("upstream request failed: %s", safeErr)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ // 8. Handle error response with failover
+ if resp.StatusCode >= 400 {
+ if s.shouldFailoverUpstreamError(resp.StatusCode) {
+ respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
+ _ = resp.Body.Close()
+
+ upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(respBody))
+ upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
+ upstreamDetail := ""
+ if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
+ maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
+ if maxBytes <= 0 {
+ maxBytes = 2048
+ }
+ upstreamDetail = truncateString(string(respBody), maxBytes)
+ }
+ appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+ Platform: account.Platform,
+ AccountID: account.ID,
+ AccountName: account.Name,
+ UpstreamStatusCode: resp.StatusCode,
+ UpstreamRequestID: resp.Header.Get("x-request-id"),
+ Kind: "failover",
+ Message: upstreamMsg,
+ Detail: upstreamDetail,
+ })
+ if s.rateLimitService != nil {
+ s.rateLimitService.HandleUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
+ }
+ return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody}
+ }
+ // Non-failover error: return Anthropic-formatted error to client
+ return s.handleAnthropicErrorResponse(resp, c)
+ }
+
+ // 9. Handle normal response
+ if isStream {
+ return s.handleAnthropicStreamingResponse(resp, c, originalModel, startTime)
+ }
+ return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, startTime)
+}
+
+// handleAnthropicErrorResponse reads an upstream error and returns it in
+// Anthropic error format.
+func (s *OpenAIGatewayService) handleAnthropicErrorResponse(
+ resp *http.Response,
+ c *gin.Context,
+) (*OpenAIForwardResult, error) {
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
+
+ upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(body))
+ if upstreamMsg == "" {
+ upstreamMsg = fmt.Sprintf("Upstream error: %d", resp.StatusCode)
+ }
+ upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
+
+ // Record upstream error details for ops logging
+ upstreamDetail := ""
+ if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
+ maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
+ if maxBytes <= 0 {
+ maxBytes = 2048
+ }
+ upstreamDetail = truncateString(string(body), maxBytes)
+ }
+ setOpsUpstreamError(c, resp.StatusCode, upstreamMsg, upstreamDetail)
+
+ errType := "api_error"
+ switch {
+ case resp.StatusCode == 400:
+ errType = "invalid_request_error"
+ case resp.StatusCode == 404:
+ errType = "not_found_error"
+ case resp.StatusCode == 429:
+ errType = "rate_limit_error"
+ case resp.StatusCode >= 500:
+ errType = "api_error"
+ }
+
+ writeAnthropicError(c, resp.StatusCode, errType, upstreamMsg)
+ return nil, fmt.Errorf("upstream error: %d %s", resp.StatusCode, upstreamMsg)
+}
+
+// handleAnthropicNonStreamingResponse reads a Responses API JSON response,
+// converts it to Anthropic Messages format, and writes it to the client.
+func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
+ resp *http.Response,
+ c *gin.Context,
+ originalModel string,
+ startTime time.Time,
+) (*OpenAIForwardResult, error) {
+ requestID := resp.Header.Get("x-request-id")
+
+ respBody, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return nil, fmt.Errorf("read upstream response: %w", err)
+ }
+
+ var responsesResp apicompat.ResponsesResponse
+ if err := json.Unmarshal(respBody, &responsesResp); err != nil {
+ return nil, fmt.Errorf("parse responses response: %w", err)
+ }
+
+ anthropicResp := apicompat.ResponsesToAnthropic(&responsesResp, originalModel)
+
+ var usage OpenAIUsage
+ if responsesResp.Usage != nil {
+ usage = OpenAIUsage{
+ InputTokens: responsesResp.Usage.InputTokens,
+ OutputTokens: responsesResp.Usage.OutputTokens,
+ }
+ if responsesResp.Usage.InputTokensDetails != nil {
+ usage.CacheReadInputTokens = responsesResp.Usage.InputTokensDetails.CachedTokens
+ }
+ }
+
+ if s.responseHeaderFilter != nil {
+ responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
+ }
+ c.JSON(http.StatusOK, anthropicResp)
+
+ return &OpenAIForwardResult{
+ RequestID: requestID,
+ Usage: usage,
+ Model: originalModel,
+ Stream: false,
+ Duration: time.Since(startTime),
+ }, nil
+}
+
+// handleAnthropicStreamingResponse reads Responses SSE events from upstream,
+// converts each to Anthropic SSE events, and writes them to the client.
+func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
+ resp *http.Response,
+ c *gin.Context,
+ originalModel string,
+ startTime time.Time,
+) (*OpenAIForwardResult, error) {
+ requestID := resp.Header.Get("x-request-id")
+
+ if s.responseHeaderFilter != nil {
+ responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
+ }
+ c.Writer.Header().Set("Content-Type", "text/event-stream")
+ c.Writer.Header().Set("Cache-Control", "no-cache")
+ c.Writer.Header().Set("Connection", "keep-alive")
+ c.Writer.WriteHeader(http.StatusOK)
+
+ state := apicompat.NewResponsesEventToAnthropicState()
+ state.Model = originalModel
+ var usage OpenAIUsage
+ var firstTokenMs *int
+ firstChunk := true
+
+ scanner := bufio.NewScanner(resp.Body)
+ scanner.Buffer(make([]byte, 0, 64*1024), 1024*1024)
+
+ for scanner.Scan() {
+ line := scanner.Text()
+
+ if !strings.HasPrefix(line, "data: ") || line == "data: [DONE]" {
+ continue
+ }
+ payload := line[6:]
+
+ if firstChunk {
+ firstChunk = false
+ ms := int(time.Since(startTime).Milliseconds())
+ firstTokenMs = &ms
+ }
+
+ // Parse the Responses SSE event
+ var event apicompat.ResponsesStreamEvent
+ if err := json.Unmarshal([]byte(payload), &event); err != nil {
+ logger.L().Warn("openai messages stream: failed to parse event",
+ zap.Error(err),
+ zap.String("request_id", requestID),
+ )
+ continue
+ }
+
+ // Extract usage from completion events
+ if (event.Type == "response.completed" || event.Type == "response.incomplete") &&
+ event.Response != nil && event.Response.Usage != nil {
+ usage = OpenAIUsage{
+ InputTokens: event.Response.Usage.InputTokens,
+ OutputTokens: event.Response.Usage.OutputTokens,
+ }
+ if event.Response.Usage.InputTokensDetails != nil {
+ usage.CacheReadInputTokens = event.Response.Usage.InputTokensDetails.CachedTokens
+ }
+ }
+
+ // Convert to Anthropic events
+ events := apicompat.ResponsesEventToAnthropicEvents(&event, state)
+ for _, evt := range events {
+ sse, err := apicompat.ResponsesAnthropicEventToSSE(evt)
+ if err != nil {
+ logger.L().Warn("openai messages stream: failed to marshal event",
+ zap.Error(err),
+ zap.String("request_id", requestID),
+ )
+ continue
+ }
+ if _, err := fmt.Fprint(c.Writer, sse); err != nil {
+ // Client disconnected — return collected usage
+ logger.L().Info("openai messages stream: client disconnected",
+ zap.String("request_id", requestID),
+ )
+ return &OpenAIForwardResult{
+ RequestID: requestID,
+ Usage: usage,
+ Model: originalModel,
+ Stream: true,
+ Duration: time.Since(startTime),
+ FirstTokenMs: firstTokenMs,
+ }, nil
+ }
+ }
+ if len(events) > 0 {
+ c.Writer.Flush()
+ }
+ }
+
+ if err := scanner.Err(); err != nil {
+ if !errors.Is(err, context.Canceled) && !errors.Is(err, context.DeadlineExceeded) {
+ logger.L().Warn("openai messages stream: read error",
+ zap.Error(err),
+ zap.String("request_id", requestID),
+ )
+ }
+ }
+
+ // Ensure the Anthropic stream is properly terminated
+ if finalEvents := apicompat.FinalizeResponsesAnthropicStream(state); len(finalEvents) > 0 {
+ for _, evt := range finalEvents {
+ sse, err := apicompat.ResponsesAnthropicEventToSSE(evt)
+ if err != nil {
+ continue
+ }
+ fmt.Fprint(c.Writer, sse) //nolint:errcheck
+ }
+ c.Writer.Flush()
+ }
+
+ return &OpenAIForwardResult{
+ RequestID: requestID,
+ Usage: usage,
+ Model: originalModel,
+ Stream: true,
+ Duration: time.Since(startTime),
+ FirstTokenMs: firstTokenMs,
+ }, nil
+}
+
+// writeAnthropicError writes an error response in Anthropic Messages API format.
+func writeAnthropicError(c *gin.Context, statusCode int, errType, message string) {
+ c.JSON(statusCode, gin.H{
+ "type": "error",
+ "error": gin.H{
+ "type": errType,
+ "message": message,
+ },
+ })
+}
diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go
index 1691e5a1..1bb46746 100644
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -25,6 +25,7 @@ import (
"github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
"github.com/Wei-Shaw/sub2api/internal/util/urlvalidator"
"github.com/gin-gonic/gin"
+ "github.com/google/uuid"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
"go.uber.org/zap"
@@ -49,6 +50,8 @@ const (
openAIWSRetryBackoffInitialDefault = 120 * time.Millisecond
openAIWSRetryBackoffMaxDefault = 2 * time.Second
openAIWSRetryJitterRatioDefault = 0.2
+ openAICompactSessionSeedKey = "openai_compact_session_seed"
+ codexCLIVersion = "0.104.0"
)
// OpenAI allowed headers whitelist (for non-passthrough).
@@ -898,6 +901,22 @@ func isOpenAIInstructionsRequiredError(upstreamStatusCode int, upstreamMsg strin
return false
}
+// ExtractSessionID extracts the raw session ID from headers or body without hashing.
+// Used by ForwardAsAnthropic to pass as prompt_cache_key for upstream cache.
+func (s *OpenAIGatewayService) ExtractSessionID(c *gin.Context, body []byte) string {
+ if c == nil {
+ return ""
+ }
+ sessionID := strings.TrimSpace(c.GetHeader("session_id"))
+ if sessionID == "" {
+ sessionID = strings.TrimSpace(c.GetHeader("conversation_id"))
+ }
+ if sessionID == "" && len(body) > 0 {
+ sessionID = strings.TrimSpace(gjson.GetBytes(body, "prompt_cache_key").String())
+ }
+ return sessionID
+}
+
// GenerateSessionHash generates a sticky-session hash for OpenAI requests.
//
// Priority:
@@ -1615,7 +1634,7 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
}
if account.Type == AccountTypeOAuth {
- codexResult := applyCodexOAuthTransform(reqBody, isCodexCLI)
+ codexResult := applyCodexOAuthTransform(reqBody, isCodexCLI, isOpenAIResponsesCompactPath(c))
if codexResult.Modified {
bodyModified = true
disablePatch()
@@ -2047,14 +2066,14 @@ func (s *OpenAIGatewayService) forwardOpenAIPassthrough(
return nil, fmt.Errorf("openai passthrough rejected before upstream: %s", rejectReason)
}
- normalizedBody, normalized, err := normalizeOpenAIPassthroughOAuthBody(body)
+ normalizedBody, normalized, err := normalizeOpenAIPassthroughOAuthBody(body, isOpenAIResponsesCompactPath(c))
if err != nil {
return nil, err
}
if normalized {
body = normalizedBody
- reqStream = true
}
+ reqStream = gjson.GetBytes(body, "stream").Bool()
}
logger.LegacyPrintf("service.openai_gateway",
@@ -2219,6 +2238,7 @@ func (s *OpenAIGatewayService) buildUpstreamRequestOpenAIPassthrough(
targetURL = buildOpenAIResponsesURL(validatedURL)
}
}
+ targetURL = appendOpenAIResponsesRequestPathSuffix(targetURL, openAIResponsesRequestPathSuffix(c))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, targetURL, bytes.NewReader(body))
if err != nil {
@@ -2252,7 +2272,15 @@ func (s *OpenAIGatewayService) buildUpstreamRequestOpenAIPassthrough(
if chatgptAccountID := account.GetChatGPTAccountID(); chatgptAccountID != "" {
req.Header.Set("chatgpt-account-id", chatgptAccountID)
}
- if req.Header.Get("accept") == "" {
+ if isOpenAIResponsesCompactPath(c) {
+ req.Header.Set("accept", "application/json")
+ if req.Header.Get("version") == "" {
+ req.Header.Set("version", codexCLIVersion)
+ }
+ if req.Header.Get("session_id") == "" {
+ req.Header.Set("session_id", resolveOpenAICompactSessionID(c))
+ }
+ } else if req.Header.Get("accept") == "" {
req.Header.Set("accept", "text/event-stream")
}
if req.Header.Get("OpenAI-Beta") == "" {
@@ -2599,6 +2627,7 @@ func (s *OpenAIGatewayService) buildUpstreamRequest(ctx context.Context, c *gin.
default:
targetURL = openaiPlatformAPIURL
}
+ targetURL = appendOpenAIResponsesRequestPathSuffix(targetURL, openAIResponsesRequestPathSuffix(c))
req, err := http.NewRequestWithContext(ctx, "POST", targetURL, bytes.NewReader(body))
if err != nil {
@@ -2635,7 +2664,17 @@ func (s *OpenAIGatewayService) buildUpstreamRequest(ctx context.Context, c *gin.
} else {
req.Header.Set("originator", "opencode")
}
- req.Header.Set("accept", "text/event-stream")
+ if isOpenAIResponsesCompactPath(c) {
+ req.Header.Set("accept", "application/json")
+ if req.Header.Get("version") == "" {
+ req.Header.Set("version", codexCLIVersion)
+ }
+ if req.Header.Get("session_id") == "" {
+ req.Header.Set("session_id", resolveOpenAICompactSessionID(c))
+ }
+ } else {
+ req.Header.Set("accept", "text/event-stream")
+ }
if promptCacheKey != "" {
req.Header.Set("conversation_id", promptCacheKey)
req.Header.Set("session_id", promptCacheKey)
@@ -3426,6 +3465,95 @@ func buildOpenAIResponsesURL(base string) string {
return normalized + "/v1/responses"
}
+func IsOpenAIResponsesCompactPathForTest(c *gin.Context) bool {
+ return isOpenAIResponsesCompactPath(c)
+}
+
+func OpenAICompactSessionSeedKeyForTest() string {
+ return openAICompactSessionSeedKey
+}
+
+func NormalizeOpenAICompactRequestBodyForTest(body []byte) ([]byte, bool, error) {
+ return normalizeOpenAICompactRequestBody(body)
+}
+
+func isOpenAIResponsesCompactPath(c *gin.Context) bool {
+ suffix := strings.TrimSpace(openAIResponsesRequestPathSuffix(c))
+ return suffix == "/compact" || strings.HasPrefix(suffix, "/compact/")
+}
+
+func normalizeOpenAICompactRequestBody(body []byte) ([]byte, bool, error) {
+ if len(body) == 0 {
+ return body, false, nil
+ }
+
+ normalized := []byte(`{}`)
+ for _, field := range []string{"model", "input", "instructions", "previous_response_id"} {
+ value := gjson.GetBytes(body, field)
+ if !value.Exists() {
+ continue
+ }
+ next, err := sjson.SetRawBytes(normalized, field, []byte(value.Raw))
+ if err != nil {
+ return body, false, fmt.Errorf("normalize compact body %s: %w", field, err)
+ }
+ normalized = next
+ }
+
+ if bytes.Equal(bytes.TrimSpace(body), bytes.TrimSpace(normalized)) {
+ return body, false, nil
+ }
+ return normalized, true, nil
+}
+
+func resolveOpenAICompactSessionID(c *gin.Context) string {
+ if c != nil {
+ if sessionID := strings.TrimSpace(c.GetHeader("session_id")); sessionID != "" {
+ return sessionID
+ }
+ if conversationID := strings.TrimSpace(c.GetHeader("conversation_id")); conversationID != "" {
+ return conversationID
+ }
+ if seed, ok := c.Get(openAICompactSessionSeedKey); ok {
+ if seedStr, ok := seed.(string); ok && strings.TrimSpace(seedStr) != "" {
+ return strings.TrimSpace(seedStr)
+ }
+ }
+ }
+ return uuid.NewString()
+}
+
+func openAIResponsesRequestPathSuffix(c *gin.Context) string {
+ if c == nil || c.Request == nil || c.Request.URL == nil {
+ return ""
+ }
+ normalizedPath := strings.TrimRight(strings.TrimSpace(c.Request.URL.Path), "/")
+ if normalizedPath == "" {
+ return ""
+ }
+ idx := strings.LastIndex(normalizedPath, "/responses")
+ if idx < 0 {
+ return ""
+ }
+ suffix := normalizedPath[idx+len("/responses"):]
+ if suffix == "" || suffix == "/" {
+ return ""
+ }
+ if !strings.HasPrefix(suffix, "/") {
+ return ""
+ }
+ return suffix
+}
+
+func appendOpenAIResponsesRequestPathSuffix(baseURL, suffix string) string {
+ trimmedBase := strings.TrimRight(strings.TrimSpace(baseURL), "/")
+ trimmedSuffix := strings.TrimSpace(suffix)
+ if trimmedBase == "" || trimmedSuffix == "" {
+ return trimmedBase
+ }
+ return trimmedBase + trimmedSuffix
+}
+
func (s *OpenAIGatewayService) replaceModelInResponseBody(body []byte, fromModel, toModel string) []byte {
// 使用 gjson/sjson 精确替换 model 字段,避免全量 JSON 反序列化
if m := gjson.GetBytes(body, "model"); m.Exists() && m.Str == fromModel {
@@ -3815,8 +3943,8 @@ func extractOpenAIRequestMetaFromBody(body []byte) (model string, stream bool, p
}
// normalizeOpenAIPassthroughOAuthBody 将透传 OAuth 请求体收敛为旧链路关键行为:
-// 1) store=false 2) stream=true
-func normalizeOpenAIPassthroughOAuthBody(body []byte) ([]byte, bool, error) {
+// 1) store=false 2) 非 compact 保持 stream=true;compact 强制 stream=false
+func normalizeOpenAIPassthroughOAuthBody(body []byte, compact bool) ([]byte, bool, error) {
if len(body) == 0 {
return body, false, nil
}
@@ -3824,22 +3952,40 @@ func normalizeOpenAIPassthroughOAuthBody(body []byte) ([]byte, bool, error) {
normalized := body
changed := false
- if store := gjson.GetBytes(normalized, "store"); !store.Exists() || store.Type != gjson.False {
- next, err := sjson.SetBytes(normalized, "store", false)
- if err != nil {
- return body, false, fmt.Errorf("normalize passthrough body store=false: %w", err)
+ if compact {
+ if store := gjson.GetBytes(normalized, "store"); store.Exists() {
+ next, err := sjson.DeleteBytes(normalized, "store")
+ if err != nil {
+ return body, false, fmt.Errorf("normalize passthrough body delete store: %w", err)
+ }
+ normalized = next
+ changed = true
}
- normalized = next
- changed = true
- }
-
- if stream := gjson.GetBytes(normalized, "stream"); !stream.Exists() || stream.Type != gjson.True {
- next, err := sjson.SetBytes(normalized, "stream", true)
- if err != nil {
- return body, false, fmt.Errorf("normalize passthrough body stream=true: %w", err)
+ if stream := gjson.GetBytes(normalized, "stream"); stream.Exists() {
+ next, err := sjson.DeleteBytes(normalized, "stream")
+ if err != nil {
+ return body, false, fmt.Errorf("normalize passthrough body delete stream: %w", err)
+ }
+ normalized = next
+ changed = true
+ }
+ } else {
+ if store := gjson.GetBytes(normalized, "store"); !store.Exists() || store.Type != gjson.False {
+ next, err := sjson.SetBytes(normalized, "store", false)
+ if err != nil {
+ return body, false, fmt.Errorf("normalize passthrough body store=false: %w", err)
+ }
+ normalized = next
+ changed = true
+ }
+ if stream := gjson.GetBytes(normalized, "stream"); !stream.Exists() || stream.Type != gjson.True {
+ next, err := sjson.SetBytes(normalized, "stream", true)
+ if err != nil {
+ return body, false, fmt.Errorf("normalize passthrough body stream=true: %w", err)
+ }
+ normalized = next
+ changed = true
}
- normalized = next
- changed = true
}
return normalized, changed, nil
diff --git a/backend/internal/service/openai_gateway_service_test.go b/backend/internal/service/openai_gateway_service_test.go
index c8efe6b3..1be1eb50 100644
--- a/backend/internal/service/openai_gateway_service_test.go
+++ b/backend/internal/service/openai_gateway_service_test.go
@@ -1288,6 +1288,92 @@ func TestOpenAIUpdateCodexUsageSnapshotFromHeaders(t *testing.T) {
}
}
+func TestOpenAIResponsesRequestPathSuffix(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+ rec := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(rec)
+
+ tests := []struct {
+ name string
+ path string
+ want string
+ }{
+ {name: "exact v1 responses", path: "/v1/responses", want: ""},
+ {name: "compact v1 responses", path: "/v1/responses/compact", want: "/compact"},
+ {name: "compact alias responses", path: "/responses/compact/", want: "/compact"},
+ {name: "nested suffix", path: "/openai/v1/responses/compact/detail", want: "/compact/detail"},
+ {name: "unrelated path", path: "/v1/chat/completions", want: ""},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ c.Request = httptest.NewRequest(http.MethodPost, tt.path, nil)
+ require.Equal(t, tt.want, openAIResponsesRequestPathSuffix(c))
+ })
+ }
+}
+
+func TestOpenAIBuildUpstreamRequestOpenAIPassthroughPreservesCompactPath(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+ rec := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(rec)
+ c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses/compact", bytes.NewReader([]byte(`{"model":"gpt-5"}`)))
+
+ svc := &OpenAIGatewayService{}
+ account := &Account{Type: AccountTypeOAuth}
+
+ req, err := svc.buildUpstreamRequestOpenAIPassthrough(c.Request.Context(), c, account, []byte(`{"model":"gpt-5"}`), "token")
+ require.NoError(t, err)
+ require.Equal(t, chatgptCodexURL+"/compact", req.URL.String())
+ require.Equal(t, "application/json", req.Header.Get("Accept"))
+ require.Equal(t, codexCLIVersion, req.Header.Get("Version"))
+ require.NotEmpty(t, req.Header.Get("Session_Id"))
+}
+
+func TestOpenAIBuildUpstreamRequestCompactForcesJSONAcceptForOAuth(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+ rec := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(rec)
+ c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses/compact", bytes.NewReader([]byte(`{"model":"gpt-5"}`)))
+
+ svc := &OpenAIGatewayService{}
+ account := &Account{
+ Type: AccountTypeOAuth,
+ Credentials: map[string]any{"chatgpt_account_id": "chatgpt-acc"},
+ }
+
+ req, err := svc.buildUpstreamRequest(c.Request.Context(), c, account, []byte(`{"model":"gpt-5"}`), "token", false, "", true)
+ require.NoError(t, err)
+ require.Equal(t, chatgptCodexURL+"/compact", req.URL.String())
+ require.Equal(t, "application/json", req.Header.Get("Accept"))
+ require.Equal(t, codexCLIVersion, req.Header.Get("Version"))
+ require.NotEmpty(t, req.Header.Get("Session_Id"))
+}
+
+func TestOpenAIBuildUpstreamRequestPreservesCompactPathForAPIKeyBaseURL(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+ rec := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(rec)
+ c.Request = httptest.NewRequest(http.MethodPost, "/responses/compact", bytes.NewReader([]byte(`{"model":"gpt-5"}`)))
+
+ svc := &OpenAIGatewayService{cfg: &config.Config{
+ Security: config.SecurityConfig{
+ URLAllowlist: config.URLAllowlistConfig{Enabled: false},
+ },
+ }}
+ account := &Account{
+ Type: AccountTypeAPIKey,
+ Platform: PlatformOpenAI,
+ Credentials: map[string]any{"base_url": "https://example.com/v1"},
+ }
+
+ req, err := svc.buildUpstreamRequest(c.Request.Context(), c, account, []byte(`{"model":"gpt-5"}`), "token", false, "", false)
+ require.NoError(t, err)
+ require.Equal(t, "https://example.com/v1/responses/compact", req.URL.String())
+}
+
+// ==================== P1-08 修复:model 替换性能优化测试 ====================
+
// ==================== P1-08 修复:model 替换性能优化测试 =============
func TestReplaceModelInSSELine(t *testing.T) {
svc := &OpenAIGatewayService{}
diff --git a/backend/internal/service/openai_oauth_passthrough_test.go b/backend/internal/service/openai_oauth_passthrough_test.go
index 0840d3b1..2191d124 100644
--- a/backend/internal/service/openai_oauth_passthrough_test.go
+++ b/backend/internal/service/openai_oauth_passthrough_test.go
@@ -236,6 +236,60 @@ func TestOpenAIGatewayService_OAuthPassthrough_StreamKeepsToolNameAndBodyNormali
require.NotContains(t, body, "\"name\":\"edit\"")
}
+func TestOpenAIGatewayService_OAuthPassthrough_CompactUsesJSONAndKeepsNonStreaming(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+
+ rec := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(rec)
+ c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses/compact", bytes.NewReader(nil))
+ c.Request.Header.Set("User-Agent", "codex_cli_rs/0.1.0")
+ c.Request.Header.Set("Content-Type", "application/json")
+
+ originalBody := []byte(`{"model":"gpt-5.1-codex","stream":true,"store":true,"instructions":"local-test-instructions","input":[{"type":"text","text":"compact me"}]}`)
+
+ resp := &http.Response{
+ StatusCode: http.StatusOK,
+ Header: http.Header{"Content-Type": []string{"application/json"}, "x-request-id": []string{"rid-compact"}},
+ Body: io.NopCloser(strings.NewReader(`{"id":"cmp_123","usage":{"input_tokens":11,"output_tokens":22}}`)),
+ }
+ upstream := &httpUpstreamRecorder{resp: resp}
+
+ svc := &OpenAIGatewayService{
+ cfg: &config.Config{Gateway: config.GatewayConfig{ForceCodexCLI: false}},
+ httpUpstream: upstream,
+ }
+
+ account := &Account{
+ ID: 123,
+ Name: "acc",
+ Platform: PlatformOpenAI,
+ Type: AccountTypeOAuth,
+ Concurrency: 1,
+ Credentials: map[string]any{"access_token": "oauth-token", "chatgpt_account_id": "chatgpt-acc"},
+ Extra: map[string]any{"openai_passthrough": true},
+ Status: StatusActive,
+ Schedulable: true,
+ RateMultiplier: f64p(1),
+ }
+
+ result, err := svc.Forward(context.Background(), c, account, originalBody)
+ require.NoError(t, err)
+ require.NotNil(t, result)
+ require.False(t, result.Stream)
+
+ require.False(t, gjson.GetBytes(upstream.lastBody, "store").Exists())
+ require.False(t, gjson.GetBytes(upstream.lastBody, "stream").Exists())
+ require.Equal(t, "gpt-5.1-codex", gjson.GetBytes(upstream.lastBody, "model").String())
+ require.Equal(t, "compact me", gjson.GetBytes(upstream.lastBody, "input.0.text").String())
+ require.Equal(t, "local-test-instructions", strings.TrimSpace(gjson.GetBytes(upstream.lastBody, "instructions").String()))
+ require.Equal(t, "application/json", upstream.lastReq.Header.Get("Accept"))
+ require.Equal(t, codexCLIVersion, upstream.lastReq.Header.Get("Version"))
+ require.NotEmpty(t, upstream.lastReq.Header.Get("Session_Id"))
+ require.Equal(t, "chatgpt.com", upstream.lastReq.Host)
+ require.Equal(t, "chatgpt-acc", upstream.lastReq.Header.Get("chatgpt-account-id"))
+ require.Contains(t, rec.Body.String(), `"id":"cmp_123"`)
+}
+
func TestOpenAIGatewayService_OAuthPassthrough_CodexMissingInstructionsRejectedBeforeUpstream(t *testing.T) {
gin.SetMode(gin.TestMode)
logSink, restore := captureStructuredLog(t)
diff --git a/backend/internal/web/embed_on.go b/backend/internal/web/embed_on.go
index f7ba5c9e..41ce4d48 100644
--- a/backend/internal/web/embed_on.go
+++ b/backend/internal/web/embed_on.go
@@ -83,14 +83,7 @@ func (s *FrontendServer) Middleware() gin.HandlerFunc {
path := c.Request.URL.Path
// Skip API routes
- if strings.HasPrefix(path, "/api/") ||
- strings.HasPrefix(path, "/v1/") ||
- strings.HasPrefix(path, "/v1beta/") ||
- strings.HasPrefix(path, "/sora/") ||
- strings.HasPrefix(path, "/antigravity/") ||
- strings.HasPrefix(path, "/setup/") ||
- path == "/health" ||
- path == "/responses" {
+ if shouldBypassEmbeddedFrontend(path) {
c.Next()
return
}
@@ -207,14 +200,7 @@ func ServeEmbeddedFrontend() gin.HandlerFunc {
return func(c *gin.Context) {
path := c.Request.URL.Path
- if strings.HasPrefix(path, "/api/") ||
- strings.HasPrefix(path, "/v1/") ||
- strings.HasPrefix(path, "/v1beta/") ||
- strings.HasPrefix(path, "/sora/") ||
- strings.HasPrefix(path, "/antigravity/") ||
- strings.HasPrefix(path, "/setup/") ||
- path == "/health" ||
- path == "/responses" {
+ if shouldBypassEmbeddedFrontend(path) {
c.Next()
return
}
@@ -235,6 +221,19 @@ func ServeEmbeddedFrontend() gin.HandlerFunc {
}
}
+func shouldBypassEmbeddedFrontend(path string) bool {
+ trimmed := strings.TrimSpace(path)
+ return strings.HasPrefix(trimmed, "/api/") ||
+ strings.HasPrefix(trimmed, "/v1/") ||
+ strings.HasPrefix(trimmed, "/v1beta/") ||
+ strings.HasPrefix(trimmed, "/sora/") ||
+ strings.HasPrefix(trimmed, "/antigravity/") ||
+ strings.HasPrefix(trimmed, "/setup/") ||
+ trimmed == "/health" ||
+ trimmed == "/responses" ||
+ strings.HasPrefix(trimmed, "/responses/")
+}
+
func serveIndexHTML(c *gin.Context, fsys fs.FS) {
file, err := fsys.Open("index.html")
if err != nil {
diff --git a/backend/internal/web/embed_test.go b/backend/internal/web/embed_test.go
index e2cbcf15..f270b624 100644
--- a/backend/internal/web/embed_test.go
+++ b/backend/internal/web/embed_test.go
@@ -367,6 +367,7 @@ func TestFrontendServer_Middleware(t *testing.T) {
"/setup/init",
"/health",
"/responses",
+ "/responses/compact",
}
for _, path := range apiPaths {
@@ -388,6 +389,32 @@ func TestFrontendServer_Middleware(t *testing.T) {
}
})
+ t.Run("skips_responses_compact_post_routes", func(t *testing.T) {
+ provider := &mockSettingsProvider{
+ settings: map[string]string{"test": "value"},
+ }
+
+ server, err := NewFrontendServer(provider)
+ require.NoError(t, err)
+
+ router := gin.New()
+ router.Use(server.Middleware())
+ nextCalled := false
+ router.POST("/responses/compact", func(c *gin.Context) {
+ nextCalled = true
+ c.String(http.StatusOK, `{"ok":true}`)
+ })
+
+ w := httptest.NewRecorder()
+ req := httptest.NewRequest(http.MethodPost, "/responses/compact", strings.NewReader(`{"model":"gpt-5"}`))
+ req.Header.Set("Content-Type", "application/json")
+ router.ServeHTTP(w, req)
+
+ assert.True(t, nextCalled, "next handler should be called for compact API route")
+ assert.Equal(t, http.StatusOK, w.Code)
+ assert.JSONEq(t, `{"ok":true}`, w.Body.String())
+ })
+
t.Run("serves_index_for_spa_routes", func(t *testing.T) {
provider := &mockSettingsProvider{
settings: map[string]string{"test": "value"},
@@ -543,6 +570,7 @@ func TestServeEmbeddedFrontend(t *testing.T) {
"/setup/init",
"/health",
"/responses",
+ "/responses/compact",
}
for _, path := range apiPaths {
diff --git a/frontend/src/components/account/CreateAccountModal.vue b/frontend/src/components/account/CreateAccountModal.vue
index 92725c07..14064078 100644
--- a/frontend/src/components/account/CreateAccountModal.vue
+++ b/frontend/src/components/account/CreateAccountModal.vue
@@ -1230,6 +1230,142 @@
+ {{ t('admin.accounts.openai.modelRestrictionDisabledByPassthrough') }} +
++ {{ t('admin.accounts.selectedModels', { count: allowedModels.length }) }} + {{ + t('admin.accounts.supportsAllModels') + }} +
++ {{ t('admin.accounts.mapRequestModels') }} +
++ {{ t('admin.accounts.openai.modelRestrictionDisabledByPassthrough') }} +
++ {{ t('admin.accounts.selectedModels', { count: allowedModels.length }) }} + {{ + t('admin.accounts.supportsAllModels') + }} +
++ {{ t('admin.accounts.mapRequestModels') }} +
+