From 838ada8864a82143b7ddeb483bbbaa8acd89ee5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=A5=9E=E4=B9=90?= <6682635@qq.com>
Date: Fri, 6 Mar 2026 20:46:10 +0800
Subject: [PATCH 1/6] fix(openai): restore ws usage window display
---
.../handler/openai_gateway_handler.go | 6 +
.../internal/service/account_usage_service.go | 236 ++++++++++++++++++
.../service/openai_gateway_service.go | 10 +
.../service/openai_gateway_service_test.go | 125 +++++++++-
.../internal/service/openai_ws_forwarder.go | 2 +
backend/internal/service/openai_ws_pool.go | 7 +
.../openai_ws_v2_passthrough_adapter.go | 22 +-
.../components/account/AccountUsageCell.vue | 38 +++
.../__tests__/AccountUsageCell.spec.ts | 55 ++++
9 files changed, 490 insertions(+), 11 deletions(-)
diff --git a/backend/internal/handler/openai_gateway_handler.go b/backend/internal/handler/openai_gateway_handler.go
index 2da979b5..644b634d 100644
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -305,6 +305,9 @@ func (h *OpenAIGatewayHandler) Responses(c *gin.Context) {
return
}
if result != nil {
+ if account.Type == service.AccountTypeOAuth {
+ h.gatewayService.UpdateCodexUsageSnapshotFromHeaders(c.Request.Context(), account.ID, result.ResponseHeaders)
+ }
h.gatewayService.ReportOpenAIAccountScheduleResult(account.ID, true, result.FirstTokenMs)
} else {
h.gatewayService.ReportOpenAIAccountScheduleResult(account.ID, true, nil)
@@ -840,6 +843,9 @@ func (h *OpenAIGatewayHandler) ResponsesWebSocket(c *gin.Context) {
if turnErr != nil || result == nil {
return
}
+ if account.Type == service.AccountTypeOAuth {
+ h.gatewayService.UpdateCodexUsageSnapshotFromHeaders(ctx, account.ID, result.ResponseHeaders)
+ }
h.gatewayService.ReportOpenAIAccountScheduleResult(account.ID, true, result.FirstTokenMs)
h.submitUsageRecordTask(func(taskCtx context.Context) {
if err := h.gatewayService.RecordUsage(taskCtx, &service.OpenAIRecordUsageInput{
diff --git a/backend/internal/service/account_usage_service.go b/backend/internal/service/account_usage_service.go
index 6dee6c13..12cb87dd 100644
--- a/backend/internal/service/account_usage_service.go
+++ b/backend/internal/service/account_usage_service.go
@@ -1,13 +1,18 @@
package service
import (
+ "bytes"
"context"
+ "encoding/json"
"fmt"
"log"
+ "net/http"
"strings"
"sync"
"time"
+ httppool "github.com/Wei-Shaw/sub2api/internal/pkg/httpclient"
+ openaipkg "github.com/Wei-Shaw/sub2api/internal/pkg/openai"
"github.com/Wei-Shaw/sub2api/internal/pkg/pagination"
"github.com/Wei-Shaw/sub2api/internal/pkg/timezone"
"github.com/Wei-Shaw/sub2api/internal/pkg/usagestats"
@@ -90,6 +95,7 @@ type antigravityUsageCache struct {
const (
apiCacheTTL = 3 * time.Minute
windowStatsCacheTTL = 1 * time.Minute
+ openAIProbeCacheTTL = 10 * time.Minute
)
// UsageCache 封装账户使用量相关的缓存
@@ -97,6 +103,7 @@ type UsageCache struct {
apiCache sync.Map // accountID -> *apiUsageCache
windowStatsCache sync.Map // accountID -> *windowStatsCache
antigravityCache sync.Map // accountID -> *antigravityUsageCache
+ openAIProbeCache sync.Map // accountID -> time.Time
}
// NewUsageCache 创建 UsageCache 实例
@@ -224,6 +231,14 @@ func (s *AccountUsageService) GetUsage(ctx context.Context, accountID int64) (*U
return nil, fmt.Errorf("get account failed: %w", err)
}
+ if account.Platform == PlatformOpenAI && account.Type == AccountTypeOAuth {
+ usage, err := s.getOpenAIUsage(ctx, account)
+ if err == nil {
+ s.tryClearRecoverableAccountError(ctx, account)
+ }
+ return usage, err
+ }
+
if account.Platform == PlatformGemini {
usage, err := s.getGeminiUsage(ctx, account)
if err == nil {
@@ -288,6 +303,161 @@ func (s *AccountUsageService) GetUsage(ctx context.Context, accountID int64) (*U
return nil, fmt.Errorf("account type %s does not support usage query", account.Type)
}
+func (s *AccountUsageService) getOpenAIUsage(ctx context.Context, account *Account) (*UsageInfo, error) {
+ now := time.Now()
+ usage := &UsageInfo{UpdatedAt: &now}
+
+ if account == nil {
+ return usage, nil
+ }
+
+ if progress := buildCodexUsageProgressFromExtra(account.Extra, "5h", now); progress != nil {
+ usage.FiveHour = progress
+ }
+ if progress := buildCodexUsageProgressFromExtra(account.Extra, "7d", now); progress != nil {
+ usage.SevenDay = progress
+ }
+
+ if (usage.FiveHour == nil || usage.SevenDay == nil) && s.shouldProbeOpenAICodexSnapshot(account.ID, now) {
+ if updates, err := s.probeOpenAICodexSnapshot(ctx, account); err == nil && len(updates) > 0 {
+ mergeAccountExtra(account, updates)
+ if usage.UpdatedAt == nil {
+ usage.UpdatedAt = &now
+ }
+ if progress := buildCodexUsageProgressFromExtra(account.Extra, "5h", now); progress != nil {
+ usage.FiveHour = progress
+ }
+ if progress := buildCodexUsageProgressFromExtra(account.Extra, "7d", now); progress != nil {
+ usage.SevenDay = progress
+ }
+ }
+ }
+
+ if s.usageLogRepo == nil {
+ return usage, nil
+ }
+
+ if stats, err := s.usageLogRepo.GetAccountWindowStats(ctx, account.ID, now.Add(-5*time.Hour)); err == nil {
+ windowStats := windowStatsFromAccountStats(stats)
+ if hasMeaningfulWindowStats(windowStats) {
+ if usage.FiveHour == nil {
+ usage.FiveHour = &UsageProgress{Utilization: 0}
+ }
+ usage.FiveHour.WindowStats = windowStats
+ }
+ }
+
+ if stats, err := s.usageLogRepo.GetAccountWindowStats(ctx, account.ID, now.Add(-7*24*time.Hour)); err == nil {
+ windowStats := windowStatsFromAccountStats(stats)
+ if hasMeaningfulWindowStats(windowStats) {
+ if usage.SevenDay == nil {
+ usage.SevenDay = &UsageProgress{Utilization: 0}
+ }
+ usage.SevenDay.WindowStats = windowStats
+ }
+ }
+
+ return usage, nil
+}
+
+func (s *AccountUsageService) shouldProbeOpenAICodexSnapshot(accountID int64, now time.Time) bool {
+ if s == nil || s.cache == nil || accountID <= 0 {
+ return true
+ }
+ if cached, ok := s.cache.openAIProbeCache.Load(accountID); ok {
+ if ts, ok := cached.(time.Time); ok && now.Sub(ts) < openAIProbeCacheTTL {
+ return false
+ }
+ }
+ s.cache.openAIProbeCache.Store(accountID, now)
+ return true
+}
+
+func (s *AccountUsageService) probeOpenAICodexSnapshot(ctx context.Context, account *Account) (map[string]any, error) {
+ if account == nil || !account.IsOAuth() {
+ return nil, nil
+ }
+ accessToken := account.GetOpenAIAccessToken()
+ if accessToken == "" {
+ return nil, fmt.Errorf("no access token available")
+ }
+ modelID := openaipkg.DefaultTestModel
+ payload := createOpenAITestPayload(modelID, true)
+ payloadBytes, err := json.Marshal(payload)
+ if err != nil {
+ return nil, fmt.Errorf("marshal openai probe payload: %w", err)
+ }
+
+ reqCtx, cancel := context.WithTimeout(ctx, 15*time.Second)
+ defer cancel()
+ req, err := http.NewRequestWithContext(reqCtx, http.MethodPost, chatgptCodexURL, bytes.NewReader(payloadBytes))
+ if err != nil {
+ return nil, fmt.Errorf("create openai probe request: %w", err)
+ }
+ req.Host = "chatgpt.com"
+ req.Header.Set("Content-Type", "application/json")
+ req.Header.Set("Authorization", "Bearer "+accessToken)
+ req.Header.Set("Accept", "text/event-stream")
+ req.Header.Set("OpenAI-Beta", "responses=experimental")
+ req.Header.Set("Originator", "codex_cli_rs")
+ req.Header.Set("Version", codexCLIVersion)
+ req.Header.Set("User-Agent", codexCLIUserAgent)
+ if s.identityCache != nil {
+ if fp, fpErr := s.identityCache.GetFingerprint(reqCtx, account.ID); fpErr == nil && fp != nil && strings.TrimSpace(fp.UserAgent) != "" {
+ req.Header.Set("User-Agent", strings.TrimSpace(fp.UserAgent))
+ }
+ }
+ if chatgptAccountID := account.GetChatGPTAccountID(); chatgptAccountID != "" {
+ req.Header.Set("chatgpt-account-id", chatgptAccountID)
+ }
+
+ proxyURL := ""
+ if account.ProxyID != nil && account.Proxy != nil {
+ proxyURL = account.Proxy.URL()
+ }
+ client, err := httppool.GetClient(httppool.Options{
+ ProxyURL: proxyURL,
+ Timeout: 15 * time.Second,
+ ResponseHeaderTimeout: 10 * time.Second,
+ })
+ if err != nil {
+ return nil, fmt.Errorf("build openai probe client: %w", err)
+ }
+ resp, err := client.Do(req)
+ if err != nil {
+ return nil, fmt.Errorf("openai codex probe request failed: %w", err)
+ }
+ defer func() { _ = resp.Body.Close() }()
+
+ if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+ return nil, fmt.Errorf("openai codex probe returned status %d", resp.StatusCode)
+ }
+ if snapshot := ParseCodexRateLimitHeaders(resp.Header); snapshot != nil {
+ updates := buildCodexUsageExtraUpdates(snapshot, time.Now())
+ if len(updates) > 0 {
+ go func(accountID int64, updates map[string]any) {
+ updateCtx, updateCancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer updateCancel()
+ _ = s.accountRepo.UpdateExtra(updateCtx, accountID, updates)
+ }(account.ID, updates)
+ return updates, nil
+ }
+ }
+ return nil, nil
+}
+
+func mergeAccountExtra(account *Account, updates map[string]any) {
+ if account == nil || len(updates) == 0 {
+ return
+ }
+ if account.Extra == nil {
+ account.Extra = make(map[string]any, len(updates))
+ }
+ for k, v := range updates {
+ account.Extra[k] = v
+ }
+}
+
func (s *AccountUsageService) getGeminiUsage(ctx context.Context, account *Account) (*UsageInfo, error) {
now := time.Now()
usage := &UsageInfo{
@@ -519,6 +689,72 @@ func windowStatsFromAccountStats(stats *usagestats.AccountStats) *WindowStats {
}
}
+func hasMeaningfulWindowStats(stats *WindowStats) bool {
+ if stats == nil {
+ return false
+ }
+ return stats.Requests > 0 || stats.Tokens > 0 || stats.Cost > 0 || stats.StandardCost > 0 || stats.UserCost > 0
+}
+
+func buildCodexUsageProgressFromExtra(extra map[string]any, window string, now time.Time) *UsageProgress {
+ if len(extra) == 0 {
+ return nil
+ }
+
+ var (
+ usedPercentKey string
+ resetAfterKey string
+ resetAtKey string
+ )
+
+ switch window {
+ case "5h":
+ usedPercentKey = "codex_5h_used_percent"
+ resetAfterKey = "codex_5h_reset_after_seconds"
+ resetAtKey = "codex_5h_reset_at"
+ case "7d":
+ usedPercentKey = "codex_7d_used_percent"
+ resetAfterKey = "codex_7d_reset_after_seconds"
+ resetAtKey = "codex_7d_reset_at"
+ default:
+ return nil
+ }
+
+ usedRaw, ok := extra[usedPercentKey]
+ if !ok {
+ return nil
+ }
+
+ progress := &UsageProgress{Utilization: parseExtraFloat64(usedRaw)}
+ if resetAtRaw, ok := extra[resetAtKey]; ok {
+ if resetAt, err := parseTime(fmt.Sprint(resetAtRaw)); err == nil {
+ progress.ResetsAt = &resetAt
+ progress.RemainingSeconds = int(time.Until(resetAt).Seconds())
+ if progress.RemainingSeconds < 0 {
+ progress.RemainingSeconds = 0
+ }
+ }
+ }
+ if progress.ResetsAt == nil {
+ if resetAfterSeconds := parseExtraInt(extra[resetAfterKey]); resetAfterSeconds > 0 {
+ base := now
+ if updatedAtRaw, ok := extra["codex_usage_updated_at"]; ok {
+ if updatedAt, err := parseTime(fmt.Sprint(updatedAtRaw)); err == nil {
+ base = updatedAt
+ }
+ }
+ resetAt := base.Add(time.Duration(resetAfterSeconds) * time.Second)
+ progress.ResetsAt = &resetAt
+ progress.RemainingSeconds = int(time.Until(resetAt).Seconds())
+ if progress.RemainingSeconds < 0 {
+ progress.RemainingSeconds = 0
+ }
+ }
+ }
+
+ return progress
+}
+
func (s *AccountUsageService) GetAccountUsageStats(ctx context.Context, accountID int64, startTime, endTime time.Time) (*usagestats.AccountUsageStatsResponse, error) {
stats, err := s.usageLogRepo.GetAccountUsageStats(ctx, accountID, startTime, endTime)
if err != nil {
diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go
index 9970fc19..1691e5a1 100644
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -210,6 +210,7 @@ type OpenAIForwardResult struct {
ReasoningEffort *string
Stream bool
OpenAIWSMode bool
+ ResponseHeaders http.Header
Duration time.Duration
FirstTokenMs *int
}
@@ -3747,6 +3748,15 @@ func (s *OpenAIGatewayService) updateCodexUsageSnapshot(ctx context.Context, acc
}()
}
+func (s *OpenAIGatewayService) UpdateCodexUsageSnapshotFromHeaders(ctx context.Context, accountID int64, headers http.Header) {
+ if accountID <= 0 || headers == nil {
+ return
+ }
+ if snapshot := ParseCodexRateLimitHeaders(headers); snapshot != nil {
+ s.updateCodexUsageSnapshot(ctx, accountID, snapshot)
+ }
+}
+
func getOpenAIReasoningEffortFromReqBody(reqBody map[string]any) (value string, present bool) {
if reqBody == nil {
return "", false
diff --git a/backend/internal/service/openai_gateway_service_test.go b/backend/internal/service/openai_gateway_service_test.go
index 8fc29e75..6c8f4f52 100644
--- a/backend/internal/service/openai_gateway_service_test.go
+++ b/backend/internal/service/openai_gateway_service_test.go
@@ -28,6 +28,22 @@ type stubOpenAIAccountRepo struct {
accounts []Account
}
+type snapshotUpdateAccountRepo struct {
+ stubOpenAIAccountRepo
+ updateExtraCalls chan map[string]any
+}
+
+func (r *snapshotUpdateAccountRepo) UpdateExtra(ctx context.Context, id int64, updates map[string]any) error {
+ if r.updateExtraCalls != nil {
+ copied := make(map[string]any, len(updates))
+ for k, v := range updates {
+ copied[k] = v
+ }
+ r.updateExtraCalls <- copied
+ }
+ return nil
+}
+
func (r stubOpenAIAccountRepo) GetByID(ctx context.Context, id int64) (*Account, error) {
for i := range r.accounts {
if r.accounts[i].ID == id {
@@ -1248,8 +1264,115 @@ func TestOpenAIValidateUpstreamBaseURLEnabledEnforcesAllowlist(t *testing.T) {
}
}
-// ==================== P1-08 修复:model 替换性能优化测试 ====================
+func TestOpenAIUpdateCodexUsageSnapshotFromHeaders(t *testing.T) {
+ repo := &snapshotUpdateAccountRepo{updateExtraCalls: make(chan map[string]any, 1)}
+ svc := &OpenAIGatewayService{accountRepo: repo}
+ headers := http.Header{}
+ headers.Set("x-codex-primary-used-percent", "12")
+ headers.Set("x-codex-secondary-used-percent", "34")
+ headers.Set("x-codex-primary-window-minutes", "300")
+ headers.Set("x-codex-secondary-window-minutes", "10080")
+ headers.Set("x-codex-primary-reset-after-seconds", "600")
+ headers.Set("x-codex-secondary-reset-after-seconds", "86400")
+ svc.UpdateCodexUsageSnapshotFromHeaders(context.Background(), 123, headers)
+
+ select {
+ case updates := <-repo.updateExtraCalls:
+ require.Equal(t, 12.0, updates["codex_5h_used_percent"])
+ require.Equal(t, 34.0, updates["codex_7d_used_percent"])
+ require.Equal(t, 600, updates["codex_5h_reset_after_seconds"])
+ require.Equal(t, 86400, updates["codex_7d_reset_after_seconds"])
+ case <-time.After(2 * time.Second):
+ t.Fatal("expected UpdateExtra to be called")
+ }
+}
+
+func TestOpenAIResponsesRequestPathSuffix(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+ rec := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(rec)
+
+ tests := []struct {
+ name string
+ path string
+ want string
+ }{
+ {name: "exact v1 responses", path: "/v1/responses", want: ""},
+ {name: "compact v1 responses", path: "/v1/responses/compact", want: "/compact"},
+ {name: "compact alias responses", path: "/responses/compact/", want: "/compact"},
+ {name: "nested suffix", path: "/openai/v1/responses/compact/detail", want: "/compact/detail"},
+ {name: "unrelated path", path: "/v1/chat/completions", want: ""},
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ c.Request = httptest.NewRequest(http.MethodPost, tt.path, nil)
+ require.Equal(t, tt.want, openAIResponsesRequestPathSuffix(c))
+ })
+ }
+}
+
+func TestOpenAIBuildUpstreamRequestOpenAIPassthroughPreservesCompactPath(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+ rec := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(rec)
+ c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses/compact", bytes.NewReader([]byte(`{"model":"gpt-5"}`)))
+
+ svc := &OpenAIGatewayService{}
+ account := &Account{Type: AccountTypeOAuth}
+
+ req, err := svc.buildUpstreamRequestOpenAIPassthrough(c.Request.Context(), c, account, []byte(`{"model":"gpt-5"}`), "token")
+ require.NoError(t, err)
+ require.Equal(t, chatgptCodexURL+"/compact", req.URL.String())
+ require.Equal(t, "application/json", req.Header.Get("Accept"))
+ require.Equal(t, codexCLIVersion, req.Header.Get("Version"))
+ require.NotEmpty(t, req.Header.Get("Session_Id"))
+}
+
+func TestOpenAIBuildUpstreamRequestCompactForcesJSONAcceptForOAuth(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+ rec := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(rec)
+ c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses/compact", bytes.NewReader([]byte(`{"model":"gpt-5"}`)))
+
+ svc := &OpenAIGatewayService{}
+ account := &Account{
+ Type: AccountTypeOAuth,
+ Credentials: map[string]any{"chatgpt_account_id": "chatgpt-acc"},
+ }
+
+ req, err := svc.buildUpstreamRequest(c.Request.Context(), c, account, []byte(`{"model":"gpt-5"}`), "token", false, "", true)
+ require.NoError(t, err)
+ require.Equal(t, chatgptCodexURL+"/compact", req.URL.String())
+ require.Equal(t, "application/json", req.Header.Get("Accept"))
+ require.Equal(t, codexCLIVersion, req.Header.Get("Version"))
+ require.NotEmpty(t, req.Header.Get("Session_Id"))
+}
+
+func TestOpenAIBuildUpstreamRequestPreservesCompactPathForAPIKeyBaseURL(t *testing.T) {
+ gin.SetMode(gin.TestMode)
+ rec := httptest.NewRecorder()
+ c, _ := gin.CreateTestContext(rec)
+ c.Request = httptest.NewRequest(http.MethodPost, "/responses/compact", bytes.NewReader([]byte(`{"model":"gpt-5"}`)))
+
+ svc := &OpenAIGatewayService{cfg: &config.Config{
+ Security: config.SecurityConfig{
+ URLAllowlist: config.URLAllowlistConfig{Enabled: false},
+ },
+ }}
+ account := &Account{
+ Type: AccountTypeAPIKey,
+ Platform: PlatformOpenAI,
+ Credentials: map[string]any{"base_url": "https://example.com/v1"},
+ }
+
+ req, err := svc.buildUpstreamRequest(c.Request.Context(), c, account, []byte(`{"model":"gpt-5"}`), "token", false, "", false)
+ require.NoError(t, err)
+ require.Equal(t, "https://example.com/v1/responses/compact", req.URL.String())
+}
+
+// ==================== P1-08 修复:model 替换性能优化测试 =============
func TestReplaceModelInSSELine(t *testing.T) {
svc := &OpenAIGatewayService{}
diff --git a/backend/internal/service/openai_ws_forwarder.go b/backend/internal/service/openai_ws_forwarder.go
index 7b6591fa..91218cd2 100644
--- a/backend/internal/service/openai_ws_forwarder.go
+++ b/backend/internal/service/openai_ws_forwarder.go
@@ -2309,6 +2309,7 @@ func (s *OpenAIGatewayService) forwardOpenAIWSV2(
ReasoningEffort: extractOpenAIReasoningEffort(reqBody, originalModel),
Stream: reqStream,
OpenAIWSMode: true,
+ ResponseHeaders: lease.HandshakeHeaders(),
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
}, nil
@@ -2919,6 +2920,7 @@ func (s *OpenAIGatewayService) ProxyResponsesWebSocketFromClient(
ReasoningEffort: extractOpenAIReasoningEffortFromBody(payload, originalModel),
Stream: reqStream,
OpenAIWSMode: true,
+ ResponseHeaders: lease.HandshakeHeaders(),
Duration: time.Since(turnStart),
FirstTokenMs: firstTokenMs,
}, nil
diff --git a/backend/internal/service/openai_ws_pool.go b/backend/internal/service/openai_ws_pool.go
index db6a96a7..5950e028 100644
--- a/backend/internal/service/openai_ws_pool.go
+++ b/backend/internal/service/openai_ws_pool.go
@@ -126,6 +126,13 @@ func (l *openAIWSConnLease) HandshakeHeader(name string) string {
return l.conn.handshakeHeader(name)
}
+func (l *openAIWSConnLease) HandshakeHeaders() http.Header {
+ if l == nil || l.conn == nil {
+ return nil
+ }
+ return cloneHeader(l.conn.handshakeHeaders)
+}
+
func (l *openAIWSConnLease) IsPrewarmed() bool {
if l == nil || l.conn == nil {
return false
diff --git a/backend/internal/service/openai_ws_v2_passthrough_adapter.go b/backend/internal/service/openai_ws_v2_passthrough_adapter.go
index 3b429f4d..29a2640d 100644
--- a/backend/internal/service/openai_ws_v2_passthrough_adapter.go
+++ b/backend/internal/service/openai_ws_v2_passthrough_adapter.go
@@ -177,11 +177,12 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
CacheCreationInputTokens: turn.Usage.CacheCreationInputTokens,
CacheReadInputTokens: turn.Usage.CacheReadInputTokens,
},
- Model: turn.RequestModel,
- Stream: true,
- OpenAIWSMode: true,
- Duration: turn.Duration,
- FirstTokenMs: turn.FirstTokenMs,
+ Model: turn.RequestModel,
+ Stream: true,
+ OpenAIWSMode: true,
+ ResponseHeaders: cloneHeader(handshakeHeaders),
+ Duration: turn.Duration,
+ FirstTokenMs: turn.FirstTokenMs,
}
logOpenAIWSV2Passthrough(
"relay_turn_completed account_id=%d turn=%d request_id=%s terminal_event=%s duration_ms=%d first_token_ms=%d input_tokens=%d output_tokens=%d cache_read_tokens=%d",
@@ -223,11 +224,12 @@ func (s *OpenAIGatewayService) proxyResponsesWebSocketV2Passthrough(
CacheCreationInputTokens: relayResult.Usage.CacheCreationInputTokens,
CacheReadInputTokens: relayResult.Usage.CacheReadInputTokens,
},
- Model: relayResult.RequestModel,
- Stream: true,
- OpenAIWSMode: true,
- Duration: relayResult.Duration,
- FirstTokenMs: relayResult.FirstTokenMs,
+ Model: relayResult.RequestModel,
+ Stream: true,
+ OpenAIWSMode: true,
+ ResponseHeaders: cloneHeader(handshakeHeaders),
+ Duration: relayResult.Duration,
+ FirstTokenMs: relayResult.FirstTokenMs,
}
turnCount := int(completedTurns.Load())
diff --git a/frontend/src/components/account/AccountUsageCell.vue b/frontend/src/components/account/AccountUsageCell.vue
index 859bd7c9..20b4b629 100644
--- a/frontend/src/components/account/AccountUsageCell.vue
+++ b/frontend/src/components/account/AccountUsageCell.vue
@@ -90,6 +90,36 @@
color="emerald"
/>
+
+
+
+
+
-
@@ -313,6 +343,9 @@ const shouldFetchUsage = computed(() => {
if (props.account.platform === 'antigravity') {
return props.account.type === 'oauth'
}
+ if (props.account.platform === 'openai') {
+ return props.account.type === 'oauth'
+ }
return false
})
@@ -335,6 +368,11 @@ const hasCodexUsage = computed(() => {
return codex5hWindow.value.usedPercent !== null || codex7dWindow.value.usedPercent !== null
})
+const hasOpenAIUsageFallback = computed(() => {
+ if (props.account.platform !== 'openai' || props.account.type !== 'oauth') return false
+ return !!usageInfo.value?.five_hour || !!usageInfo.value?.seven_day
+})
+
const codex5hUsedPercent = computed(() => codex5hWindow.value.usedPercent)
const codex5hResetAt = computed(() => codex5hWindow.value.resetAt)
const codex7dUsedPercent = computed(() => codex7dWindow.value.usedPercent)
diff --git a/frontend/src/components/account/__tests__/AccountUsageCell.spec.ts b/frontend/src/components/account/__tests__/AccountUsageCell.spec.ts
index 0b61b3bd..12c512e3 100644
--- a/frontend/src/components/account/__tests__/AccountUsageCell.spec.ts
+++ b/frontend/src/components/account/__tests__/AccountUsageCell.spec.ts
@@ -67,4 +67,59 @@ describe('AccountUsageCell', () => {
expect(wrapper.text()).toContain('admin.accounts.usageWindow.gemini3Image|70|2026-03-01T09:00:00Z')
})
+
+ it('OpenAI OAuth 在无 codex 快照时会回退显示 usage 接口窗口', async () => {
+ getUsage.mockResolvedValue({
+ five_hour: {
+ utilization: 0,
+ resets_at: null,
+ remaining_seconds: 0,
+ window_stats: {
+ requests: 2,
+ tokens: 27700,
+ cost: 0.06,
+ standard_cost: 0.06,
+ user_cost: 0.06
+ }
+ },
+ seven_day: {
+ utilization: 0,
+ resets_at: null,
+ remaining_seconds: 0,
+ window_stats: {
+ requests: 2,
+ tokens: 27700,
+ cost: 0.06,
+ standard_cost: 0.06,
+ user_cost: 0.06
+ }
+ }
+ })
+
+ const wrapper = mount(AccountUsageCell, {
+ props: {
+ account: {
+ id: 2002,
+ platform: 'openai',
+ type: 'oauth',
+ extra: {}
+ } as any
+ },
+ global: {
+ stubs: {
+ UsageProgressBar: {
+ props: ['label', 'utilization', 'resetsAt', 'windowStats', 'color'],
+ template: '{{ label }}|{{ utilization }}|{{ windowStats?.tokens }}
'
+ },
+ AccountQuotaInfo: true
+ }
+ }
+ })
+
+ await flushPromises()
+
+ expect(getUsage).toHaveBeenCalledWith(2002)
+ expect(wrapper.text()).toContain('5h|0|27700')
+ expect(wrapper.text()).toContain('7d|0|27700')
+ })
})
From d90e56eb450cb8aceb2bdba779ef4362d1b4fc6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=A5=9E=E4=B9=90?= <6682635@qq.com>
Date: Fri, 6 Mar 2026 21:04:24 +0800
Subject: [PATCH 2/6] chore(openai): clean up ws usage window branch
---
.../internal/service/account_usage_service.go | 3 +-
.../service/openai_gateway_service_test.go | 84 -------------------
2 files changed, 2 insertions(+), 85 deletions(-)
diff --git a/backend/internal/service/account_usage_service.go b/backend/internal/service/account_usage_service.go
index 12cb87dd..2fa5f707 100644
--- a/backend/internal/service/account_usage_service.go
+++ b/backend/internal/service/account_usage_service.go
@@ -96,6 +96,7 @@ const (
apiCacheTTL = 3 * time.Minute
windowStatsCacheTTL = 1 * time.Minute
openAIProbeCacheTTL = 10 * time.Minute
+ openAICodexProbeVersion = "0.104.0"
)
// UsageCache 封装账户使用量相关的缓存
@@ -400,7 +401,7 @@ func (s *AccountUsageService) probeOpenAICodexSnapshot(ctx context.Context, acco
req.Header.Set("Accept", "text/event-stream")
req.Header.Set("OpenAI-Beta", "responses=experimental")
req.Header.Set("Originator", "codex_cli_rs")
- req.Header.Set("Version", codexCLIVersion)
+ req.Header.Set("Version", openAICodexProbeVersion)
req.Header.Set("User-Agent", codexCLIUserAgent)
if s.identityCache != nil {
if fp, fpErr := s.identityCache.GetFingerprint(reqCtx, account.ID); fpErr == nil && fp != nil && strings.TrimSpace(fp.UserAgent) != "" {
diff --git a/backend/internal/service/openai_gateway_service_test.go b/backend/internal/service/openai_gateway_service_test.go
index 6c8f4f52..c8efe6b3 100644
--- a/backend/internal/service/openai_gateway_service_test.go
+++ b/backend/internal/service/openai_gateway_service_test.go
@@ -1288,90 +1288,6 @@ func TestOpenAIUpdateCodexUsageSnapshotFromHeaders(t *testing.T) {
}
}
-func TestOpenAIResponsesRequestPathSuffix(t *testing.T) {
- gin.SetMode(gin.TestMode)
- rec := httptest.NewRecorder()
- c, _ := gin.CreateTestContext(rec)
-
- tests := []struct {
- name string
- path string
- want string
- }{
- {name: "exact v1 responses", path: "/v1/responses", want: ""},
- {name: "compact v1 responses", path: "/v1/responses/compact", want: "/compact"},
- {name: "compact alias responses", path: "/responses/compact/", want: "/compact"},
- {name: "nested suffix", path: "/openai/v1/responses/compact/detail", want: "/compact/detail"},
- {name: "unrelated path", path: "/v1/chat/completions", want: ""},
- }
-
- for _, tt := range tests {
- t.Run(tt.name, func(t *testing.T) {
- c.Request = httptest.NewRequest(http.MethodPost, tt.path, nil)
- require.Equal(t, tt.want, openAIResponsesRequestPathSuffix(c))
- })
- }
-}
-
-func TestOpenAIBuildUpstreamRequestOpenAIPassthroughPreservesCompactPath(t *testing.T) {
- gin.SetMode(gin.TestMode)
- rec := httptest.NewRecorder()
- c, _ := gin.CreateTestContext(rec)
- c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses/compact", bytes.NewReader([]byte(`{"model":"gpt-5"}`)))
-
- svc := &OpenAIGatewayService{}
- account := &Account{Type: AccountTypeOAuth}
-
- req, err := svc.buildUpstreamRequestOpenAIPassthrough(c.Request.Context(), c, account, []byte(`{"model":"gpt-5"}`), "token")
- require.NoError(t, err)
- require.Equal(t, chatgptCodexURL+"/compact", req.URL.String())
- require.Equal(t, "application/json", req.Header.Get("Accept"))
- require.Equal(t, codexCLIVersion, req.Header.Get("Version"))
- require.NotEmpty(t, req.Header.Get("Session_Id"))
-}
-
-func TestOpenAIBuildUpstreamRequestCompactForcesJSONAcceptForOAuth(t *testing.T) {
- gin.SetMode(gin.TestMode)
- rec := httptest.NewRecorder()
- c, _ := gin.CreateTestContext(rec)
- c.Request = httptest.NewRequest(http.MethodPost, "/v1/responses/compact", bytes.NewReader([]byte(`{"model":"gpt-5"}`)))
-
- svc := &OpenAIGatewayService{}
- account := &Account{
- Type: AccountTypeOAuth,
- Credentials: map[string]any{"chatgpt_account_id": "chatgpt-acc"},
- }
-
- req, err := svc.buildUpstreamRequest(c.Request.Context(), c, account, []byte(`{"model":"gpt-5"}`), "token", false, "", true)
- require.NoError(t, err)
- require.Equal(t, chatgptCodexURL+"/compact", req.URL.String())
- require.Equal(t, "application/json", req.Header.Get("Accept"))
- require.Equal(t, codexCLIVersion, req.Header.Get("Version"))
- require.NotEmpty(t, req.Header.Get("Session_Id"))
-}
-
-func TestOpenAIBuildUpstreamRequestPreservesCompactPathForAPIKeyBaseURL(t *testing.T) {
- gin.SetMode(gin.TestMode)
- rec := httptest.NewRecorder()
- c, _ := gin.CreateTestContext(rec)
- c.Request = httptest.NewRequest(http.MethodPost, "/responses/compact", bytes.NewReader([]byte(`{"model":"gpt-5"}`)))
-
- svc := &OpenAIGatewayService{cfg: &config.Config{
- Security: config.SecurityConfig{
- URLAllowlist: config.URLAllowlistConfig{Enabled: false},
- },
- }}
- account := &Account{
- Type: AccountTypeAPIKey,
- Platform: PlatformOpenAI,
- Credentials: map[string]any{"base_url": "https://example.com/v1"},
- }
-
- req, err := svc.buildUpstreamRequest(c.Request.Context(), c, account, []byte(`{"model":"gpt-5"}`), "token", false, "", false)
- require.NoError(t, err)
- require.Equal(t, "https://example.com/v1/responses/compact", req.URL.String())
-}
-
// ==================== P1-08 修复:model 替换性能优化测试 =============
func TestReplaceModelInSSELine(t *testing.T) {
svc := &OpenAIGatewayService{}
From 49d0301ddee34484a64e48489e273d55cc49db1a Mon Sep 17 00:00:00 2001
From: FizzlyCode <224935570+FizzlyCode@users.noreply.github.com>
Date: Fri, 6 Mar 2026 17:53:04 +0800
Subject: [PATCH 3/6] =?UTF-8?q?fix:=20Setup=20Token=20=E8=B4=A6=E5=8F=B7?=
=?UTF-8?q?=E4=BD=BF=E7=94=A8=E7=9C=9F=E5=AE=9E=20utilization=20=E5=80=BC?=
=?UTF-8?q?=E6=9B=BF=E4=BB=A3=E7=8A=B6=E6=80=81=E4=BC=B0=E7=AE=97?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
从响应头 anthropic-ratelimit-unified-5h-utilization 获取并存储真实
utilization 值,解决进度条始终显示 0% 的问题。窗口重置时清除旧值,
避免残留上个窗口的数据。
Co-Authored-By: Claude Opus 4.6
---
.../internal/service/account_usage_service.go | 32 ++++++++++++++-----
backend/internal/service/ratelimit_service.go | 15 +++++++++
2 files changed, 39 insertions(+), 8 deletions(-)
diff --git a/backend/internal/service/account_usage_service.go b/backend/internal/service/account_usage_service.go
index 6dee6c13..8f7a7f41 100644
--- a/backend/internal/service/account_usage_service.go
+++ b/backend/internal/service/account_usage_service.go
@@ -2,6 +2,7 @@ package service
import (
"context"
+ "encoding/json"
"fmt"
"log"
"strings"
@@ -666,15 +667,30 @@ func (s *AccountUsageService) estimateSetupTokenUsage(account *Account) *UsageIn
remaining = 0
}
- // 根据状态估算使用率 (百分比形式,100 = 100%)
+ // 优先使用响应头中存储的真实 utilization 值(0-1 小数,转为 0-100 百分比)
var utilization float64
- switch account.SessionWindowStatus {
- case "rejected":
- utilization = 100.0
- case "allowed_warning":
- utilization = 80.0
- default:
- utilization = 0.0
+ var found bool
+ if stored, ok := account.Extra["session_window_utilization"]; ok {
+ switch v := stored.(type) {
+ case float64:
+ utilization = v * 100
+ found = true
+ case json.Number:
+ if f, err := v.Float64(); err == nil {
+ utilization = f * 100
+ found = true
+ }
+ }
+ }
+
+ // 如果没有存储的 utilization,回退到状态估算
+ if !found {
+ switch account.SessionWindowStatus {
+ case "rejected":
+ utilization = 100.0
+ case "allowed_warning":
+ utilization = 80.0
+ }
}
info.FiveHour = &UsageProgress{
diff --git a/backend/internal/service/ratelimit_service.go b/backend/internal/service/ratelimit_service.go
index 9f16fb2b..f8f3154b 100644
--- a/backend/internal/service/ratelimit_service.go
+++ b/backend/internal/service/ratelimit_service.go
@@ -970,12 +970,27 @@ func (s *RateLimitService) UpdateSessionWindow(ctx context.Context, account *Acc
windowStart = &start
windowEnd = &end
slog.Info("account_session_window_initialized", "account_id", account.ID, "window_start", start, "window_end", end, "status", status)
+ // 窗口重置时清除旧的 utilization,避免残留上个窗口的数据
+ _ = s.accountRepo.UpdateExtra(ctx, account.ID, map[string]any{
+ "session_window_utilization": nil,
+ })
}
if err := s.accountRepo.UpdateSessionWindow(ctx, account.ID, windowStart, windowEnd, status); err != nil {
slog.Warn("session_window_update_failed", "account_id", account.ID, "error", err)
}
+ // 存储真实的 utilization 值(0-1 小数),供 estimateSetupTokenUsage 使用
+ if utilStr := headers.Get("anthropic-ratelimit-unified-5h-utilization"); utilStr != "" {
+ if util, err := strconv.ParseFloat(utilStr, 64); err == nil {
+ if err := s.accountRepo.UpdateExtra(ctx, account.ID, map[string]any{
+ "session_window_utilization": util,
+ }); err != nil {
+ slog.Warn("session_window_utilization_update_failed", "account_id", account.ID, "error", err)
+ }
+ }
+ }
+
// 如果状态为allowed且之前有限流,说明窗口已重置,清除限流状态
if status == "allowed" && account.IsRateLimited() {
if err := s.ClearRateLimit(ctx, account.ID); err != nil {
From 1a2143c1fee35864ee6ad627fd341739d2f0310a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=A5=9E=E4=B9=90?= <6682635@qq.com>
Date: Fri, 6 Mar 2026 21:17:27 +0800
Subject: [PATCH 4/6] fix(openai): adapt messages path to codex transform
signature
---
backend/internal/service/openai_gateway_messages.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/backend/internal/service/openai_gateway_messages.go b/backend/internal/service/openai_gateway_messages.go
index 4fe89732..ad8cff0b 100644
--- a/backend/internal/service/openai_gateway_messages.go
+++ b/backend/internal/service/openai_gateway_messages.go
@@ -67,7 +67,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
if err := json.Unmarshal(responsesBody, &reqBody); err != nil {
return nil, fmt.Errorf("unmarshal for codex transform: %w", err)
}
- applyCodexOAuthTransform(reqBody, false)
+ applyCodexOAuthTransform(reqBody, false, false)
// OAuth codex transform forces stream=true upstream, so always use
// the streaming response handler regardless of what the client asked.
isStream = true
From 5df3cafa99c45a59b3df6ac423191e61bc73cdd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=A5=9E=E4=B9=90?= <6682635@qq.com>
Date: Fri, 6 Mar 2026 21:31:36 +0800
Subject: [PATCH 5/6] style(go): format account usage service
---
backend/internal/service/account_usage_service.go | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/backend/internal/service/account_usage_service.go b/backend/internal/service/account_usage_service.go
index 2fa5f707..a84892e4 100644
--- a/backend/internal/service/account_usage_service.go
+++ b/backend/internal/service/account_usage_service.go
@@ -93,9 +93,9 @@ type antigravityUsageCache struct {
}
const (
- apiCacheTTL = 3 * time.Minute
- windowStatsCacheTTL = 1 * time.Minute
- openAIProbeCacheTTL = 10 * time.Minute
+ apiCacheTTL = 3 * time.Minute
+ windowStatsCacheTTL = 1 * time.Minute
+ openAIProbeCacheTTL = 10 * time.Minute
openAICodexProbeVersion = "0.104.0"
)
From 921599948b627323d25f2c08877019ab91ce7878 Mon Sep 17 00:00:00 2001
From: shaw
Date: Fri, 6 Mar 2026 22:39:33 +0800
Subject: [PATCH 6/6] =?UTF-8?q?feat:=20/v1/messages=E7=AB=AF=E7=82=B9?=
=?UTF-8?q?=E9=80=82=E9=85=8Dcodex=E8=B4=A6=E5=8F=B7=E6=B1=A0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../handler/openai_gateway_handler.go | 10 +-
.../pkg/apicompat/anthropic_responses_test.go | 201 +++++++++++
.../pkg/apicompat/anthropic_to_responses.go | 57 +++
.../pkg/apicompat/responses_to_anthropic.go | 2 +-
backend/internal/pkg/apicompat/types.go | 328 +++---------------
.../service/openai_gateway_messages.go | 25 +-
.../service/openai_gateway_service.go | 15 +-
7 files changed, 341 insertions(+), 297 deletions(-)
diff --git a/backend/internal/handler/openai_gateway_handler.go b/backend/internal/handler/openai_gateway_handler.go
index ccb61974..483b3032 100644
--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -670,8 +670,14 @@ func (h *OpenAIGatewayHandler) anthropicStreamingAwareError(c *gin.Context, stat
if streamStarted {
flusher, ok := c.Writer.(http.Flusher)
if ok {
- errorEvent := "event: error\ndata: " + `{"type":"error","error":{"type":` + strconv.Quote(errType) + `,"message":` + strconv.Quote(message) + `}}` + "\n\n"
- fmt.Fprint(c.Writer, errorEvent) //nolint:errcheck
+ errPayload, _ := json.Marshal(gin.H{
+ "type": "error",
+ "error": gin.H{
+ "type": errType,
+ "message": message,
+ },
+ })
+ fmt.Fprintf(c.Writer, "event: error\ndata: %s\n\n", errPayload) //nolint:errcheck
flusher.Flush()
}
return
diff --git a/backend/internal/pkg/apicompat/anthropic_responses_test.go b/backend/internal/pkg/apicompat/anthropic_responses_test.go
index 20eeb969..60c54539 100644
--- a/backend/internal/pkg/apicompat/anthropic_responses_test.go
+++ b/backend/internal/pkg/apicompat/anthropic_responses_test.go
@@ -532,3 +532,204 @@ func TestResponsesAnthropicEventToSSE(t *testing.T) {
assert.Contains(t, sse, "data: ")
assert.Contains(t, sse, `"resp_1"`)
}
+
+// ---------------------------------------------------------------------------
+// response.failed tests
+// ---------------------------------------------------------------------------
+
+func TestStreamingFailed(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+
+ // 1. response.created
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.created",
+ Response: &ResponsesResponse{ID: "resp_fail_1", Model: "gpt-5.2"},
+ }, state)
+
+ // 2. Some text output before failure
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.output_text.delta",
+ Delta: "Partial output before failure",
+ }, state)
+
+ // 3. response.failed
+ events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.failed",
+ Response: &ResponsesResponse{
+ Status: "failed",
+ Error: &ResponsesError{Code: "server_error", Message: "Internal error"},
+ Usage: &ResponsesUsage{InputTokens: 50, OutputTokens: 10},
+ },
+ }, state)
+
+ // Should close text block + message_delta + message_stop
+ require.Len(t, events, 3)
+ assert.Equal(t, "content_block_stop", events[0].Type)
+ assert.Equal(t, "message_delta", events[1].Type)
+ assert.Equal(t, "end_turn", events[1].Delta.StopReason)
+ assert.Equal(t, 50, events[1].Usage.InputTokens)
+ assert.Equal(t, 10, events[1].Usage.OutputTokens)
+ assert.Equal(t, "message_stop", events[2].Type)
+}
+
+func TestStreamingFailedNoOutput(t *testing.T) {
+ state := NewResponsesEventToAnthropicState()
+
+ // 1. response.created
+ ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.created",
+ Response: &ResponsesResponse{ID: "resp_fail_2", Model: "gpt-5.2"},
+ }, state)
+
+ // 2. response.failed with no prior output
+ events := ResponsesEventToAnthropicEvents(&ResponsesStreamEvent{
+ Type: "response.failed",
+ Response: &ResponsesResponse{
+ Status: "failed",
+ Error: &ResponsesError{Code: "rate_limit_error", Message: "Too many requests"},
+ Usage: &ResponsesUsage{InputTokens: 20, OutputTokens: 0},
+ },
+ }, state)
+
+ // Should emit message_delta + message_stop (no block to close)
+ require.Len(t, events, 2)
+ assert.Equal(t, "message_delta", events[0].Type)
+ assert.Equal(t, "end_turn", events[0].Delta.StopReason)
+ assert.Equal(t, "message_stop", events[1].Type)
+}
+
+func TestResponsesToAnthropic_Failed(t *testing.T) {
+ resp := &ResponsesResponse{
+ ID: "resp_fail_3",
+ Model: "gpt-5.2",
+ Status: "failed",
+ Error: &ResponsesError{Code: "server_error", Message: "Something went wrong"},
+ Output: []ResponsesOutput{},
+ Usage: &ResponsesUsage{InputTokens: 30, OutputTokens: 0},
+ }
+
+ anth := ResponsesToAnthropic(resp, "claude-opus-4-6")
+ // Failed status defaults to "end_turn" stop reason
+ assert.Equal(t, "end_turn", anth.StopReason)
+ // Should have at least an empty text block
+ require.Len(t, anth.Content, 1)
+ assert.Equal(t, "text", anth.Content[0].Type)
+}
+
+// ---------------------------------------------------------------------------
+// thinking → reasoning conversion tests
+// ---------------------------------------------------------------------------
+
+func TestAnthropicToResponses_ThinkingEnabled(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+ Thinking: &AnthropicThinking{Type: "enabled", BudgetTokens: 10000},
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+ require.NotNil(t, resp.Reasoning)
+ assert.Equal(t, "high", resp.Reasoning.Effort)
+ assert.Equal(t, "auto", resp.Reasoning.Summary)
+ assert.Contains(t, resp.Include, "reasoning.encrypted_content")
+ assert.NotContains(t, resp.Include, "reasoning.summary")
+}
+
+func TestAnthropicToResponses_ThinkingAdaptive(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+ Thinking: &AnthropicThinking{Type: "adaptive", BudgetTokens: 5000},
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+ require.NotNil(t, resp.Reasoning)
+ assert.Equal(t, "medium", resp.Reasoning.Effort)
+ assert.Equal(t, "auto", resp.Reasoning.Summary)
+ assert.NotContains(t, resp.Include, "reasoning.summary")
+}
+
+func TestAnthropicToResponses_ThinkingDisabled(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+ Thinking: &AnthropicThinking{Type: "disabled"},
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+ assert.Nil(t, resp.Reasoning)
+ assert.NotContains(t, resp.Include, "reasoning.summary")
+}
+
+func TestAnthropicToResponses_NoThinking(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+ assert.Nil(t, resp.Reasoning)
+}
+
+// ---------------------------------------------------------------------------
+// tool_choice conversion tests
+// ---------------------------------------------------------------------------
+
+func TestAnthropicToResponses_ToolChoiceAuto(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+ ToolChoice: json.RawMessage(`{"type":"auto"}`),
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+
+ var tc string
+ require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
+ assert.Equal(t, "auto", tc)
+}
+
+func TestAnthropicToResponses_ToolChoiceAny(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+ ToolChoice: json.RawMessage(`{"type":"any"}`),
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+
+ var tc string
+ require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
+ assert.Equal(t, "required", tc)
+}
+
+func TestAnthropicToResponses_ToolChoiceSpecific(t *testing.T) {
+ req := &AnthropicRequest{
+ Model: "gpt-5.2",
+ MaxTokens: 1024,
+ Messages: []AnthropicMessage{{Role: "user", Content: json.RawMessage(`"Hello"`)}},
+ ToolChoice: json.RawMessage(`{"type":"tool","name":"get_weather"}`),
+ }
+
+ resp, err := AnthropicToResponses(req)
+ require.NoError(t, err)
+
+ var tc map[string]any
+ require.NoError(t, json.Unmarshal(resp.ToolChoice, &tc))
+ assert.Equal(t, "function", tc["type"])
+ fn, ok := tc["function"].(map[string]any)
+ require.True(t, ok)
+ assert.Equal(t, "get_weather", fn["name"])
+}
diff --git a/backend/internal/pkg/apicompat/anthropic_to_responses.go b/backend/internal/pkg/apicompat/anthropic_to_responses.go
index 1a13658d..cc0c9e6c 100644
--- a/backend/internal/pkg/apicompat/anthropic_to_responses.go
+++ b/backend/internal/pkg/apicompat/anthropic_to_responses.go
@@ -2,6 +2,7 @@ package apicompat
import (
"encoding/json"
+ "fmt"
"strings"
)
@@ -44,9 +45,65 @@ func AnthropicToResponses(req *AnthropicRequest) (*ResponsesRequest, error) {
out.Tools = convertAnthropicToolsToResponses(req.Tools)
}
+ // Convert thinking → reasoning.
+ // generate_summary="auto" causes the upstream to emit reasoning_summary_text
+ // streaming events; the include array only needs reasoning.encrypted_content
+ // (already set above) for content continuity.
+ if req.Thinking != nil {
+ switch req.Thinking.Type {
+ case "enabled":
+ out.Reasoning = &ResponsesReasoning{Effort: "high", Summary: "auto"}
+ case "adaptive":
+ out.Reasoning = &ResponsesReasoning{Effort: "medium", Summary: "auto"}
+ }
+ // "disabled" or unknown → omit reasoning
+ }
+
+ // Convert tool_choice
+ if len(req.ToolChoice) > 0 {
+ tc, err := convertAnthropicToolChoiceToResponses(req.ToolChoice)
+ if err != nil {
+ return nil, fmt.Errorf("convert tool_choice: %w", err)
+ }
+ out.ToolChoice = tc
+ }
+
return out, nil
}
+// convertAnthropicToolChoiceToResponses maps Anthropic tool_choice to Responses format.
+//
+// {"type":"auto"} → "auto"
+// {"type":"any"} → "required"
+// {"type":"none"} → "none"
+// {"type":"tool","name":"X"} → {"type":"function","function":{"name":"X"}}
+func convertAnthropicToolChoiceToResponses(raw json.RawMessage) (json.RawMessage, error) {
+ var tc struct {
+ Type string `json:"type"`
+ Name string `json:"name"`
+ }
+ if err := json.Unmarshal(raw, &tc); err != nil {
+ return nil, err
+ }
+
+ switch tc.Type {
+ case "auto":
+ return json.Marshal("auto")
+ case "any":
+ return json.Marshal("required")
+ case "none":
+ return json.Marshal("none")
+ case "tool":
+ return json.Marshal(map[string]any{
+ "type": "function",
+ "function": map[string]string{"name": tc.Name},
+ })
+ default:
+ // Pass through unknown types as-is
+ return raw, nil
+ }
+}
+
// convertAnthropicToResponsesInput builds the Responses API input items array
// from the Anthropic system field and message list.
func convertAnthropicToResponsesInput(system json.RawMessage, msgs []AnthropicMessage) ([]ResponsesInputItem, error) {
diff --git a/backend/internal/pkg/apicompat/responses_to_anthropic.go b/backend/internal/pkg/apicompat/responses_to_anthropic.go
index 16770650..39d36cf4 100644
--- a/backend/internal/pkg/apicompat/responses_to_anthropic.go
+++ b/backend/internal/pkg/apicompat/responses_to_anthropic.go
@@ -153,7 +153,7 @@ func ResponsesEventToAnthropicEvents(
return resToAnthHandleReasoningDelta(evt, state)
case "response.reasoning_summary_text.done":
return resToAnthHandleBlockDone(state)
- case "response.completed", "response.incomplete":
+ case "response.completed", "response.incomplete", "response.failed":
return resToAnthHandleCompleted(evt, state)
default:
return nil
diff --git a/backend/internal/pkg/apicompat/types.go b/backend/internal/pkg/apicompat/types.go
index 92e85318..435f5032 100644
--- a/backend/internal/pkg/apicompat/types.go
+++ b/backend/internal/pkg/apicompat/types.go
@@ -1,7 +1,7 @@
// Package apicompat provides type definitions and conversion utilities for
-// translating between Anthropic Messages, OpenAI Chat Completions, and OpenAI
-// Responses API formats. It enables multi-protocol support so that clients
-// using different API formats can be served through a unified gateway.
+// translating between Anthropic Messages and OpenAI Responses API formats.
+// It enables multi-protocol support so that clients using different API
+// formats can be served through a unified gateway.
package apicompat
import "encoding/json"
@@ -21,6 +21,14 @@ type AnthropicRequest struct {
Temperature *float64 `json:"temperature,omitempty"`
TopP *float64 `json:"top_p,omitempty"`
StopSeqs []string `json:"stop_sequences,omitempty"`
+ Thinking *AnthropicThinking `json:"thinking,omitempty"`
+ ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
+}
+
+// AnthropicThinking configures extended thinking in the Anthropic API.
+type AnthropicThinking struct {
+ Type string `json:"type"` // "enabled" | "adaptive" | "disabled"
+ BudgetTokens int `json:"budget_tokens,omitempty"` // max thinking tokens
}
// AnthropicMessage is a single message in the Anthropic conversation.
@@ -120,143 +128,29 @@ type AnthropicDelta struct {
StopSequence *string `json:"stop_sequence,omitempty"`
}
-// ---------------------------------------------------------------------------
-// OpenAI Chat Completions API types
-// ---------------------------------------------------------------------------
-
-// ChatRequest is the request body for POST /v1/chat/completions.
-type ChatRequest struct {
- Model string `json:"model"`
- Messages []ChatMessage `json:"messages"`
- MaxTokens *int `json:"max_tokens,omitempty"`
- Temperature *float64 `json:"temperature,omitempty"`
- TopP *float64 `json:"top_p,omitempty"`
- Stream bool `json:"stream,omitempty"`
- Tools []ChatTool `json:"tools,omitempty"`
- Stop json.RawMessage `json:"stop,omitempty"` // string or []string
-}
-
-// ChatMessage is a single message in the Chat Completions conversation.
-type ChatMessage struct {
- Role string `json:"role"` // "system" | "user" | "assistant" | "tool"
- Content json.RawMessage `json:"content,omitempty"` // string or []ChatContentPart
-
- // assistant fields
- ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
-
- // tool fields
- ToolCallID string `json:"tool_call_id,omitempty"`
-
- // Copilot-specific reasoning passthrough
- ReasoningText string `json:"reasoning_text,omitempty"`
- ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
-}
-
-// ChatContentPart is a typed content part in a multi-part message.
-type ChatContentPart struct {
- Type string `json:"type"` // "text" | "image_url"
- Text string `json:"text,omitempty"`
-}
-
-// ChatToolCall represents a tool invocation in an assistant message.
-// In streaming deltas, Index identifies which tool call is being updated.
-type ChatToolCall struct {
- Index int `json:"index"`
- ID string `json:"id,omitempty"`
- Type string `json:"type,omitempty"` // "function"
- Function ChatFunctionCall `json:"function"`
-}
-
-// ChatFunctionCall holds the function name and arguments.
-type ChatFunctionCall struct {
- Name string `json:"name"`
- Arguments string `json:"arguments"`
-}
-
-// ChatTool describes a tool available to the model.
-type ChatTool struct {
- Type string `json:"type"` // "function"
- Function ChatFunction `json:"function"`
-}
-
-// ChatFunction is the function definition inside a ChatTool.
-type ChatFunction struct {
- Name string `json:"name"`
- Description string `json:"description,omitempty"`
- Parameters json.RawMessage `json:"parameters,omitempty"` // JSON Schema
-}
-
-// ChatResponse is the non-streaming response from POST /v1/chat/completions.
-type ChatResponse struct {
- ID string `json:"id"`
- Object string `json:"object"` // "chat.completion"
- Created int64 `json:"created"`
- Model string `json:"model"`
- Choices []ChatChoice `json:"choices"`
- Usage *ChatUsage `json:"usage,omitempty"`
-}
-
-// ChatChoice is one completion choice.
-type ChatChoice struct {
- Index int `json:"index"`
- Message ChatMessage `json:"message"`
- FinishReason string `json:"finish_reason"`
-}
-
-// ChatUsage holds token counts in Chat Completions format.
-type ChatUsage struct {
- PromptTokens int `json:"prompt_tokens"`
- CompletionTokens int `json:"completion_tokens"`
- TotalTokens int `json:"total_tokens"`
-}
-
-// ---------------------------------------------------------------------------
-// Chat Completions SSE types
-// ---------------------------------------------------------------------------
-
-// ChatStreamChunk is a single SSE chunk in the Chat Completions streaming protocol.
-type ChatStreamChunk struct {
- ID string `json:"id"`
- Object string `json:"object"` // "chat.completion.chunk"
- Created int64 `json:"created"`
- Model string `json:"model"`
- Choices []ChatStreamChoice `json:"choices"`
- Usage *ChatUsage `json:"usage,omitempty"`
-}
-
-// ChatStreamChoice is one choice inside a streaming chunk.
-type ChatStreamChoice struct {
- Index int `json:"index"`
- Delta ChatStreamDelta `json:"delta"`
- FinishReason *string `json:"finish_reason"`
-}
-
-// ChatStreamDelta carries incremental content in a streaming chunk.
-type ChatStreamDelta struct {
- Role string `json:"role,omitempty"`
- Content string `json:"content,omitempty"`
- ToolCalls []ChatToolCall `json:"tool_calls,omitempty"`
-
- // Copilot-specific reasoning passthrough (streaming)
- ReasoningText string `json:"reasoning_text,omitempty"`
- ReasoningOpaque string `json:"reasoning_opaque,omitempty"`
-}
-
// ---------------------------------------------------------------------------
// OpenAI Responses API types
// ---------------------------------------------------------------------------
// ResponsesRequest is the request body for POST /v1/responses.
type ResponsesRequest struct {
- Model string `json:"model"`
- Input json.RawMessage `json:"input"` // string or []ResponsesInputItem
- MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
- Temperature *float64 `json:"temperature,omitempty"`
- TopP *float64 `json:"top_p,omitempty"`
- Stream bool `json:"stream,omitempty"`
- Tools []ResponsesTool `json:"tools,omitempty"`
- Include []string `json:"include,omitempty"`
- Store *bool `json:"store,omitempty"`
+ Model string `json:"model"`
+ Input json.RawMessage `json:"input"` // string or []ResponsesInputItem
+ MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
+ Temperature *float64 `json:"temperature,omitempty"`
+ TopP *float64 `json:"top_p,omitempty"`
+ Stream bool `json:"stream,omitempty"`
+ Tools []ResponsesTool `json:"tools,omitempty"`
+ Include []string `json:"include,omitempty"`
+ Store *bool `json:"store,omitempty"`
+ Reasoning *ResponsesReasoning `json:"reasoning,omitempty"`
+ ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
+}
+
+// ResponsesReasoning configures reasoning effort in the Responses API.
+type ResponsesReasoning struct {
+ Effort string `json:"effort"` // "low" | "medium" | "high"
+ Summary string `json:"summary,omitempty"` // "auto" | "concise" | "detailed"
}
// ResponsesInputItem is one item in the Responses API input array.
@@ -305,6 +199,15 @@ type ResponsesResponse struct {
// incomplete_details is present when status="incomplete"
IncompleteDetails *ResponsesIncompleteDetails `json:"incomplete_details,omitempty"`
+
+ // Error is present when status="failed"
+ Error *ResponsesError `json:"error,omitempty"`
+}
+
+// ResponsesError describes an error in a failed response.
+type ResponsesError struct {
+ Code string `json:"code"`
+ Message string `json:"message"`
}
// ResponsesIncompleteDetails explains why a response is incomplete.
@@ -349,6 +252,16 @@ type ResponsesUsage struct {
OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
}
+// ResponsesInputTokensDetails breaks down input token usage.
+type ResponsesInputTokensDetails struct {
+ CachedTokens int `json:"cached_tokens,omitempty"`
+}
+
+// ResponsesOutputTokensDetails breaks down output token usage.
+type ResponsesOutputTokensDetails struct {
+ ReasoningTokens int `json:"reasoning_tokens,omitempty"`
+}
+
// ---------------------------------------------------------------------------
// Responses SSE event types
// ---------------------------------------------------------------------------
@@ -388,153 +301,6 @@ type ResponsesStreamEvent struct {
SequenceNumber int `json:"sequence_number,omitempty"`
}
-// ResponsesOutputReasoning is a reasoning output item in the Responses API.
-// This type represents the "type":"reasoning" output item that contains
-// extended thinking from the model.
-type ResponsesOutputReasoning struct {
- ID string `json:"id,omitempty"`
- Type string `json:"type"` // "reasoning"
- Status string `json:"status,omitempty"` // "in_progress" | "completed" | "incomplete"
- EncryptedContent string `json:"encrypted_content,omitempty"`
- Summary []ResponsesReasoningSummary `json:"summary,omitempty"`
-}
-
-// ResponsesReasoningSummary is a summary text block inside a reasoning output.
-type ResponsesReasoningSummary struct {
- Type string `json:"type"` // "summary_text"
- Text string `json:"text"`
-}
-
-// ResponsesStreamState maintains the state for converting Responses streaming
-// events to Chat Completions format. It tracks content blocks, tool calls,
-// reasoning blocks, and other streaming artifacts.
-type ResponsesStreamState struct {
- // Response metadata
- ID string
- Model string
- Created int64
-
- // Content tracking
- ContentIndex int
- CurrentText string
- CurrentItemID string
- PendingText []string // Text to accumulate before emitting
-
- // Tool call tracking
- ToolCalls []ResponsesToolCallState
- CurrentToolCall *ResponsesToolCallState
-
- // Reasoning tracking
- ReasoningBlocks []ResponsesReasoningState
- CurrentReasoning *ResponsesReasoningState
-
- // Usage tracking
- InputTokens int
- OutputTokens int
-
- // Status tracking
- Status string
- FinishReason string
-}
-
-// ResponsesToolCallState tracks a single tool call during streaming.
-type ResponsesToolCallState struct {
- Index int
- ItemID string
- CallID string
- Name string
- Arguments string
- Status string
- IsComplete bool
-}
-
-// ResponsesReasoningState tracks a reasoning block during streaming.
-type ResponsesReasoningState struct {
- ItemID string
- SummaryIndex int
- SummaryText string
- Status string
- IsComplete bool
-}
-
-// ResponsesUsageDetail provides additional token usage details in Responses format.
-type ResponsesUsageDetail struct {
- InputTokens int `json:"input_tokens"`
- OutputTokens int `json:"output_tokens"`
- TotalTokens int `json:"total_tokens"`
-
- // Optional detailed breakdown
- InputTokensDetails *ResponsesInputTokensDetails `json:"input_tokens_details,omitempty"`
- OutputTokensDetails *ResponsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
-}
-
-// ResponsesInputTokensDetails breaks down input token usage.
-type ResponsesInputTokensDetails struct {
- CachedTokens int `json:"cached_tokens,omitempty"`
-}
-
-// ResponsesOutputTokensDetails breaks down output token usage.
-type ResponsesOutputTokensDetails struct {
- ReasoningTokens int `json:"reasoning_tokens,omitempty"`
-}
-
-// ---------------------------------------------------------------------------
-// Finish reason mapping helpers
-// ---------------------------------------------------------------------------
-
-// ChatFinishToAnthropic maps a Chat Completions finish_reason to an Anthropic stop_reason.
-func ChatFinishToAnthropic(reason string) string {
- switch reason {
- case "stop":
- return "end_turn"
- case "tool_calls":
- return "tool_use"
- case "length":
- return "max_tokens"
- default:
- return "end_turn"
- }
-}
-
-// AnthropicStopToChat maps an Anthropic stop_reason to a Chat Completions finish_reason.
-func AnthropicStopToChat(reason string) string {
- switch reason {
- case "end_turn":
- return "stop"
- case "tool_use":
- return "tool_calls"
- case "max_tokens":
- return "length"
- default:
- return "stop"
- }
-}
-
-// ResponsesStatusToChat maps a Responses API status to a Chat Completions finish_reason.
-func ResponsesStatusToChat(status string, details *ResponsesIncompleteDetails) string {
- switch status {
- case "completed":
- return "stop"
- case "incomplete":
- if details != nil && details.Reason == "max_output_tokens" {
- return "length"
- }
- return "stop"
- default:
- return "stop"
- }
-}
-
-// ChatFinishToResponsesStatus maps a Chat Completions finish_reason to a Responses status.
-func ChatFinishToResponsesStatus(reason string) string {
- switch reason {
- case "length":
- return "incomplete"
- default:
- return "completed"
- }
-}
-
// ---------------------------------------------------------------------------
// Shared constants
// ---------------------------------------------------------------------------
diff --git a/backend/internal/service/openai_gateway_messages.go b/backend/internal/service/openai_gateway_messages.go
index 4fe89732..b728bb07 100644
--- a/backend/internal/service/openai_gateway_messages.go
+++ b/backend/internal/service/openai_gateway_messages.go
@@ -49,7 +49,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
mappedModel := account.GetMappedModel(originalModel)
responsesReq.Model = mappedModel
- logger.L().Info("openai messages: model mapping applied",
+ logger.L().Debug("openai messages: model mapping applied",
zap.Int64("account_id", account.ID),
zap.String("original_model", originalModel),
zap.String("mapped_model", mappedModel),
@@ -67,7 +67,7 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
if err := json.Unmarshal(responsesBody, &reqBody); err != nil {
return nil, fmt.Errorf("unmarshal for codex transform: %w", err)
}
- applyCodexOAuthTransform(reqBody, false)
+ applyCodexOAuthTransform(reqBody, false, false)
// OAuth codex transform forces stream=true upstream, so always use
// the streaming response handler regardless of what the client asked.
isStream = true
@@ -148,9 +148,9 @@ func (s *OpenAIGatewayService) ForwardAsAnthropic(
// 9. Handle normal response
if isStream {
- return s.handleAnthropicStreamingResponse(resp, c, originalModel, startTime)
+ return s.handleAnthropicStreamingResponse(resp, c, originalModel, mappedModel, startTime)
}
- return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, startTime)
+ return s.handleAnthropicNonStreamingResponse(resp, c, originalModel, mappedModel, startTime)
}
// handleAnthropicErrorResponse reads an upstream error and returns it in
@@ -200,6 +200,7 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
resp *http.Response,
c *gin.Context,
originalModel string,
+ mappedModel string,
startTime time.Time,
) (*OpenAIForwardResult, error) {
requestID := resp.Header.Get("x-request-id")
@@ -233,11 +234,12 @@ func (s *OpenAIGatewayService) handleAnthropicNonStreamingResponse(
c.JSON(http.StatusOK, anthropicResp)
return &OpenAIForwardResult{
- RequestID: requestID,
- Usage: usage,
- Model: originalModel,
- Stream: false,
- Duration: time.Since(startTime),
+ RequestID: requestID,
+ Usage: usage,
+ Model: originalModel,
+ BillingModel: mappedModel,
+ Stream: false,
+ Duration: time.Since(startTime),
}, nil
}
@@ -247,6 +249,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
resp *http.Response,
c *gin.Context,
originalModel string,
+ mappedModel string,
startTime time.Time,
) (*OpenAIForwardResult, error) {
requestID := resp.Header.Get("x-request-id")
@@ -293,7 +296,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
}
// Extract usage from completion events
- if (event.Type == "response.completed" || event.Type == "response.incomplete") &&
+ if (event.Type == "response.completed" || event.Type == "response.incomplete" || event.Type == "response.failed") &&
event.Response != nil && event.Response.Usage != nil {
usage = OpenAIUsage{
InputTokens: event.Response.Usage.InputTokens,
@@ -324,6 +327,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
RequestID: requestID,
Usage: usage,
Model: originalModel,
+ BillingModel: mappedModel,
Stream: true,
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
@@ -360,6 +364,7 @@ func (s *OpenAIGatewayService) handleAnthropicStreamingResponse(
RequestID: requestID,
Usage: usage,
Model: originalModel,
+ BillingModel: mappedModel,
Stream: true,
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
diff --git a/backend/internal/service/openai_gateway_service.go b/backend/internal/service/openai_gateway_service.go
index 2110f032..73a24cad 100644
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -207,7 +207,12 @@ type OpenAIUsage struct {
type OpenAIForwardResult struct {
RequestID string
Usage OpenAIUsage
- Model string
+ Model string // 原始模型(用于响应和日志显示)
+ // BillingModel is the model used for cost calculation.
+ // When non-empty, CalculateCost uses this instead of Model.
+ // This is set by the Anthropic Messages conversion path where
+ // the mapped upstream model differs from the client-facing model.
+ BillingModel string
// ReasoningEffort is extracted from request body (reasoning.effort) or derived from model suffix.
// Stored for usage records display; nil means not provided / not applicable.
ReasoningEffort *string
@@ -3610,7 +3615,11 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
multiplier = resolver.Resolve(ctx, user.ID, *apiKey.GroupID, apiKey.Group.RateMultiplier)
}
- cost, err := s.billingService.CalculateCost(result.Model, tokens, multiplier)
+ billingModel := result.Model
+ if result.BillingModel != "" {
+ billingModel = result.BillingModel
+ }
+ cost, err := s.billingService.CalculateCost(billingModel, tokens, multiplier)
if err != nil {
cost = &CostBreakdown{ActualCost: 0}
}
@@ -3630,7 +3639,7 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
APIKeyID: apiKey.ID,
AccountID: account.ID,
RequestID: result.RequestID,
- Model: result.Model,
+ Model: billingModel,
ReasoningEffort: result.ReasoningEffort,
InputTokens: actualInputTokens,
OutputTokens: result.Usage.OutputTokens,