feat(openai): port /responses/compact account support flow (PR #1555)

vansour/sub2api#1555 的 OpenAI compact 能力建模手工移植到当前 main:账号
级 compact 状态/auto-force_on-force_off 模式、compact-only 模型映射、调度器
tier 分层(已支持 > 未知 > 已知不支持)、管理后台 compact 主动探测,以及对应
i18n/状态徽章。普通 /responses 流量行为不变,无数据库迁移。
This commit is contained in:
shaw
2026-04-25 14:40:03 +08:00
parent b95ffce244
commit 095f457c57
32 changed files with 2534 additions and 189 deletions

View File

@@ -306,6 +306,10 @@ func (t *accountWriteThrottle) Allow(id int64, now time.Time) bool {
var defaultOpenAICodexSnapshotPersistThrottle = newAccountWriteThrottle(openAICodexSnapshotPersistMinInterval)
// ErrNoAvailableCompactAccounts indicates the request needs /responses/compact
// support but no compatible account is available.
var ErrNoAvailableCompactAccounts = errors.New("no available OpenAI accounts support /responses/compact")
// OpenAIGatewayService handles OpenAI API gateway operations
type OpenAIGatewayService struct {
accountRepo AccountRepository
@@ -442,11 +446,11 @@ func (s *OpenAIGatewayService) checkChannelPricingRestriction(ctx context.Contex
return s.channelService.IsModelRestricted(ctx, *groupID, billingModel)
}
func (s *OpenAIGatewayService) isUpstreamModelRestrictedByChannel(ctx context.Context, groupID int64, account *Account, requestedModel string) bool {
func (s *OpenAIGatewayService) isUpstreamModelRestrictedByChannel(ctx context.Context, groupID int64, account *Account, requestedModel string, requireCompact bool) bool {
if s.channelService == nil {
return false
}
upstreamModel := resolveOpenAIForwardModel(account, requestedModel, "")
upstreamModel := resolveOpenAIAccountUpstreamModelForRequest(account, requestedModel, requireCompact)
if upstreamModel == "" {
return false
}
@@ -1208,10 +1212,94 @@ func (s *OpenAIGatewayService) SelectAccountForModel(ctx context.Context, groupI
// SelectAccountForModelWithExclusions selects an account supporting the requested model while excluding specified accounts.
// SelectAccountForModelWithExclusions 选择支持指定模型的账号,同时排除指定的账号。
func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*Account, error) {
return s.selectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs, 0)
return s.selectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs, false, 0)
}
func (s *OpenAIGatewayService) selectAccountForModelWithExclusions(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, stickyAccountID int64) (*Account, error) {
// noAvailableOpenAISelectionError builds the standard "no account available" error
// while preserving the compact-specific error when applicable.
func noAvailableOpenAISelectionError(requestedModel string, compactBlocked bool) error {
if compactBlocked {
return ErrNoAvailableCompactAccounts
}
if requestedModel != "" {
return fmt.Errorf("no available OpenAI accounts supporting model: %s", requestedModel)
}
return errors.New("no available OpenAI accounts")
}
// openAICompactSupportTier classifies an OpenAI account by compact capability.
// 0 = explicitly unsupported, 1 = unknown / not yet probed, 2 = explicitly supported.
func openAICompactSupportTier(account *Account) int {
if account == nil || !account.IsOpenAI() {
return 0
}
supported, known := account.OpenAICompactSupportKnown()
if !known {
return 1
}
if supported {
return 2
}
return 0
}
// isOpenAIAccountEligibleForRequest centralises the schedulable / OpenAI / model /
// compact-support checks used during account selection.
func isOpenAIAccountEligibleForRequest(account *Account, requestedModel string, requireCompact bool) bool {
if account == nil || !account.IsSchedulable() || !account.IsOpenAI() {
return false
}
if requestedModel != "" && !account.IsModelSupported(requestedModel) {
return false
}
if requireCompact && openAICompactSupportTier(account) == 0 {
return false
}
return true
}
// prioritizeOpenAICompactAccounts re-orders a slice so that accounts with known
// compact support are tried first, followed by unknown, then explicitly unsupported.
// The relative order within each tier is preserved.
func prioritizeOpenAICompactAccounts(accounts []*Account) []*Account {
if len(accounts) == 0 {
return nil
}
supported := make([]*Account, 0, len(accounts))
unknown := make([]*Account, 0, len(accounts))
unsupported := make([]*Account, 0, len(accounts))
for _, account := range accounts {
switch openAICompactSupportTier(account) {
case 2:
supported = append(supported, account)
case 1:
unknown = append(unknown, account)
default:
unsupported = append(unsupported, account)
}
}
out := make([]*Account, 0, len(accounts))
out = append(out, supported...)
out = append(out, unknown...)
out = append(out, unsupported...)
return out
}
// resolveOpenAIAccountUpstreamModelForRequest resolves the upstream model that
// would be sent for a given request, honouring compact-only mappings when the
// caller is on the /responses/compact path.
func resolveOpenAIAccountUpstreamModelForRequest(account *Account, requestedModel string, requireCompact bool) string {
upstreamModel := resolveOpenAIForwardModel(account, requestedModel, "")
if upstreamModel == "" {
return ""
}
if requireCompact {
return resolveOpenAICompactForwardModel(account, upstreamModel)
}
return upstreamModel
}
func (s *OpenAIGatewayService) selectAccountForModelWithExclusions(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, requireCompact bool, stickyAccountID int64) (*Account, error) {
if s.checkChannelPricingRestriction(ctx, groupID, requestedModel) {
slog.Warn("channel pricing restriction blocked request",
"group_id", derefGroupID(groupID),
@@ -1221,7 +1309,7 @@ func (s *OpenAIGatewayService) selectAccountForModelWithExclusions(ctx context.C
// 1. 尝试粘性会话命中
// Try sticky session hit
if account := s.tryStickySessionHit(ctx, groupID, sessionHash, requestedModel, excludedIDs, stickyAccountID); account != nil {
if account := s.tryStickySessionHit(ctx, groupID, sessionHash, requestedModel, excludedIDs, requireCompact, stickyAccountID); account != nil {
return account, nil
}
@@ -1234,13 +1322,10 @@ func (s *OpenAIGatewayService) selectAccountForModelWithExclusions(ctx context.C
// 3. 按优先级 + LRU 选择最佳账号
// Select by priority + LRU
selected := s.selectBestAccount(ctx, groupID, accounts, requestedModel, excludedIDs)
selected, compactBlocked := s.selectBestAccount(ctx, groupID, accounts, requestedModel, excludedIDs, requireCompact)
if selected == nil {
if requestedModel != "" {
return nil, fmt.Errorf("no available OpenAI accounts supporting model: %s", requestedModel)
}
return nil, errors.New("no available OpenAI accounts")
return nil, noAvailableOpenAISelectionError(requestedModel, compactBlocked)
}
// 4. 设置粘性会话绑定
@@ -1257,7 +1342,7 @@ func (s *OpenAIGatewayService) selectAccountForModelWithExclusions(ctx context.C
//
// tryStickySessionHit attempts to get account from sticky session.
// Returns account if hit and usable; clears session and returns nil if account is unavailable.
func (s *OpenAIGatewayService) tryStickySessionHit(ctx context.Context, groupID *int64, sessionHash, requestedModel string, excludedIDs map[int64]struct{}, stickyAccountID int64) *Account {
func (s *OpenAIGatewayService) tryStickySessionHit(ctx context.Context, groupID *int64, sessionHash, requestedModel string, excludedIDs map[int64]struct{}, requireCompact bool, stickyAccountID int64) *Account {
if sessionHash == "" {
return nil
}
@@ -1289,19 +1374,16 @@ func (s *OpenAIGatewayService) tryStickySessionHit(ctx context.Context, groupID
// 验证账号是否可用于当前请求
// Verify account is usable for current request
if !account.IsSchedulable() || !account.IsOpenAI() {
if !isOpenAIAccountEligibleForRequest(account, requestedModel, false) {
return nil
}
if requestedModel != "" && !account.IsModelSupported(requestedModel) {
return nil
}
account = s.recheckSelectedOpenAIAccountFromDB(ctx, account, requestedModel)
account = s.recheckSelectedOpenAIAccountFromDB(ctx, account, requestedModel, requireCompact)
if account == nil {
_ = s.deleteStickySessionAccountID(ctx, groupID, sessionHash)
return nil
}
if groupID != nil && s.needsUpstreamChannelRestrictionCheck(ctx, groupID) &&
s.isUpstreamModelRestrictedByChannel(ctx, *groupID, account, requestedModel) {
s.isUpstreamModelRestrictedByChannel(ctx, *groupID, account, requestedModel, requireCompact) {
_ = s.deleteStickySessionAccountID(ctx, groupID, sessionHash)
return nil
}
@@ -1316,9 +1398,13 @@ func (s *OpenAIGatewayService) tryStickySessionHit(ctx context.Context, groupID
// 返回 nil 表示无可用账号。
//
// selectBestAccount selects the best account from candidates (priority + LRU).
// Returns nil if no available account.
func (s *OpenAIGatewayService) selectBestAccount(ctx context.Context, groupID *int64, accounts []Account, requestedModel string, excludedIDs map[int64]struct{}) *Account {
// Returns nil if no available account. The second return reports whether at
// least one candidate was filtered out solely because it lacks compact support
// (only meaningful when requireCompact=true).
func (s *OpenAIGatewayService) selectBestAccount(ctx context.Context, groupID *int64, accounts []Account, requestedModel string, excludedIDs map[int64]struct{}, requireCompact bool) (*Account, bool) {
var selected *Account
selectedCompactTier := -1
compactBlocked := false
needsUpstreamCheck := s.needsUpstreamChannelRestrictionCheck(ctx, groupID)
for i := range accounts {
@@ -1330,31 +1416,50 @@ func (s *OpenAIGatewayService) selectBestAccount(ctx context.Context, groupID *i
continue
}
fresh := s.resolveFreshSchedulableOpenAIAccount(ctx, acc, requestedModel)
fresh := s.resolveFreshSchedulableOpenAIAccount(ctx, acc, requestedModel, false)
if fresh == nil {
continue
}
fresh = s.recheckSelectedOpenAIAccountFromDB(ctx, fresh, requestedModel)
fresh = s.recheckSelectedOpenAIAccountFromDB(ctx, fresh, requestedModel, false)
if fresh == nil {
continue
}
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, fresh, requestedModel) {
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, fresh, requestedModel, requireCompact) {
continue
}
compactTier := 0
if requireCompact {
compactTier = openAICompactSupportTier(fresh)
if compactTier == 0 {
compactBlocked = true
continue
}
}
// 选择优先级最高且最久未使用的账号
// Select highest priority and least recently used
if selected == nil {
selected = fresh
selectedCompactTier = compactTier
continue
}
// compact 模式下高 tier 优先;同 tier 内才比较 priority/LRU。
if requireCompact && compactTier != selectedCompactTier {
if compactTier > selectedCompactTier {
selected = fresh
selectedCompactTier = compactTier
}
continue
}
if s.isBetterAccount(fresh, selected) {
selected = fresh
selectedCompactTier = compactTier
}
}
return selected
return selected, compactBlocked
}
// isBetterAccount 判断 candidate 是否比 current 更优。
@@ -1392,6 +1497,10 @@ func (s *OpenAIGatewayService) isBetterAccount(candidate, current *Account) bool
// SelectAccountWithLoadAwareness selects an account with load-awareness and wait plan.
func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) {
return s.selectAccountWithLoadAwareness(ctx, groupID, sessionHash, requestedModel, excludedIDs, false)
}
func (s *OpenAIGatewayService) selectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, requireCompact bool) (*AccountSelectionResult, error) {
if s.checkChannelPricingRestriction(ctx, groupID, requestedModel) {
slog.Warn("channel pricing restriction blocked request",
"group_id", derefGroupID(groupID),
@@ -1408,7 +1517,7 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
}
}
if s.concurrencyService == nil || !cfg.LoadBatchEnabled {
account, err := s.selectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs, stickyAccountID)
account, err := s.selectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs, requireCompact, stickyAccountID)
if err != nil {
return nil, err
}
@@ -1461,12 +1570,11 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
if clearSticky {
_ = s.deleteStickySessionAccountID(ctx, groupID, sessionHash)
}
if !clearSticky && account.IsSchedulable() && account.IsOpenAI() &&
(requestedModel == "" || account.IsModelSupported(requestedModel)) {
account = s.recheckSelectedOpenAIAccountFromDB(ctx, account, requestedModel)
if !clearSticky && isOpenAIAccountEligibleForRequest(account, requestedModel, false) {
account = s.recheckSelectedOpenAIAccountFromDB(ctx, account, requestedModel, requireCompact)
if account == nil {
_ = s.deleteStickySessionAccountID(ctx, groupID, sessionHash)
} else if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, account, requestedModel) {
} else if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, account, requestedModel, requireCompact) {
_ = s.deleteStickySessionAccountID(ctx, groupID, sessionHash)
} else {
result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
@@ -1491,6 +1599,7 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
}
// ============ Layer 2: Load-aware selection ============
baseCandidateCount := 0
candidates := make([]*Account, 0, len(accounts))
for i := range accounts {
acc := &accounts[i]
@@ -1506,9 +1615,10 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
if requestedModel != "" && !acc.IsModelSupported(requestedModel) {
continue
}
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, acc, requestedModel) {
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, acc, requestedModel, requireCompact) {
continue
}
baseCandidateCount++
candidates = append(candidates, acc)
}
@@ -1528,12 +1638,19 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
if err != nil {
ordered := append([]*Account(nil), candidates...)
sortAccountsByPriorityAndLastUsed(ordered, false)
if requireCompact {
ordered = prioritizeOpenAICompactAccounts(ordered)
}
for _, acc := range ordered {
fresh := s.resolveFreshSchedulableOpenAIAccount(ctx, acc, requestedModel)
fresh := s.resolveFreshSchedulableOpenAIAccount(ctx, acc, requestedModel, false)
if fresh == nil {
continue
}
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, fresh, requestedModel) {
fresh = s.recheckSelectedOpenAIAccountFromDB(ctx, fresh, requestedModel, requireCompact)
if fresh == nil {
continue
}
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, fresh, requestedModel, requireCompact) {
continue
}
result, err := s.tryAcquireAccountSlot(ctx, fresh.ID, fresh.Concurrency)
@@ -1581,12 +1698,35 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
})
shuffleWithinSortGroups(available)
for _, item := range available {
fresh := s.resolveFreshSchedulableOpenAIAccount(ctx, item.account, requestedModel)
selectionOrder := make([]accountWithLoad, 0, len(available))
if requireCompact {
appendTier := func(out []accountWithLoad, tier int) []accountWithLoad {
for _, item := range available {
if openAICompactSupportTier(item.account) == tier {
out = append(out, item)
}
}
return out
}
selectionOrder = appendTier(selectionOrder, 2)
selectionOrder = appendTier(selectionOrder, 1)
// tier 0 候选作为兜底追加DB recheck 时若发现 cache tier 0 实际
// 已升级为 1/2探测刚跑完cache 尚未刷新),仍可正常命中。
selectionOrder = appendTier(selectionOrder, 0)
} else {
selectionOrder = append(selectionOrder, available...)
}
for _, item := range selectionOrder {
fresh := s.resolveFreshSchedulableOpenAIAccount(ctx, item.account, requestedModel, false)
if fresh == nil {
continue
}
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, fresh, requestedModel) {
fresh = s.recheckSelectedOpenAIAccountFromDB(ctx, fresh, requestedModel, requireCompact)
if fresh == nil {
continue
}
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, fresh, requestedModel, requireCompact) {
continue
}
result, err := s.tryAcquireAccountSlot(ctx, fresh.ID, fresh.Concurrency)
@@ -1602,12 +1742,19 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
// ============ Layer 3: Fallback wait ============
sortAccountsByPriorityAndLastUsed(candidates, false)
if requireCompact {
candidates = prioritizeOpenAICompactAccounts(candidates)
}
for _, acc := range candidates {
fresh := s.resolveFreshSchedulableOpenAIAccount(ctx, acc, requestedModel)
fresh := s.resolveFreshSchedulableOpenAIAccount(ctx, acc, requestedModel, false)
if fresh == nil {
continue
}
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, fresh, requestedModel) {
fresh = s.recheckSelectedOpenAIAccountFromDB(ctx, fresh, requestedModel, requireCompact)
if fresh == nil {
continue
}
if needsUpstreamCheck && s.isUpstreamModelRestrictedByChannel(ctx, *groupID, fresh, requestedModel, requireCompact) {
continue
}
return s.newSelectionResult(ctx, fresh, false, nil, &AccountWaitPlan{
@@ -1618,6 +1765,9 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
})
}
if requireCompact && baseCandidateCount > 0 {
return nil, ErrNoAvailableCompactAccounts
}
return nil, ErrNoAvailableAccounts
}
@@ -1648,7 +1798,7 @@ func (s *OpenAIGatewayService) tryAcquireAccountSlot(ctx context.Context, accoun
return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency)
}
func (s *OpenAIGatewayService) resolveFreshSchedulableOpenAIAccount(ctx context.Context, account *Account, requestedModel string) *Account {
func (s *OpenAIGatewayService) resolveFreshSchedulableOpenAIAccount(ctx context.Context, account *Account, requestedModel string, requireCompact bool) *Account {
if account == nil {
return nil
}
@@ -1662,20 +1812,20 @@ func (s *OpenAIGatewayService) resolveFreshSchedulableOpenAIAccount(ctx context.
fresh = current
}
if !fresh.IsSchedulable() || !fresh.IsOpenAI() {
return nil
}
if requestedModel != "" && !fresh.IsModelSupported(requestedModel) {
if !isOpenAIAccountEligibleForRequest(fresh, requestedModel, requireCompact) {
return nil
}
return fresh
}
func (s *OpenAIGatewayService) recheckSelectedOpenAIAccountFromDB(ctx context.Context, account *Account, requestedModel string) *Account {
func (s *OpenAIGatewayService) recheckSelectedOpenAIAccountFromDB(ctx context.Context, account *Account, requestedModel string, requireCompact bool) *Account {
if account == nil {
return nil
}
if s.schedulerSnapshot == nil || s.accountRepo == nil {
if !isOpenAIAccountEligibleForRequest(account, requestedModel, requireCompact) {
return nil
}
return account
}
@@ -1683,10 +1833,7 @@ func (s *OpenAIGatewayService) recheckSelectedOpenAIAccountFromDB(ctx context.Co
if err != nil || latest == nil {
return nil
}
if !latest.IsSchedulable() || !latest.IsOpenAI() {
return nil
}
if requestedModel != "" && !latest.IsModelSupported(requestedModel) {
if !isOpenAIAccountEligibleForRequest(latest, requestedModel, requireCompact) {
return nil
}
return latest
@@ -2007,17 +2154,35 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
return nil, err
}
// Compact-only model 映射:仅在 /responses/compact 路径生效,且优先级高于
// OAuth 模型规范化(避免 OAuth 规范化覆盖 compact-only 自定义模型)。
isCompactRequest := isOpenAIResponsesCompactPath(c)
compactMapped := false
if isCompactRequest {
compactMappedModel := resolveOpenAICompactForwardModel(account, billingModel)
if compactMappedModel != "" && compactMappedModel != billingModel {
compactMapped = true
upstreamModel = compactMappedModel
reqBody["model"] = compactMappedModel
bodyModified = true
markPatchSet("model", compactMappedModel)
logger.LegacyPrintf("service.openai_gateway", "[OpenAI] Compact model mapping applied: %s -> %s (account: %s, isCodexCLI: %v)", billingModel, compactMappedModel, account.Name, isCodexCLI)
}
}
// OpenAI OAuth 账号走 ChatGPT internal Codex endpoint需要将模型名规范化为
// 上游可识别的 Codex/GPT 系列。API Key 账号则应保留原始/映射后的模型名,
// 以兼容自定义 base_url 的 OpenAI-compatible 上游。
if model, ok := reqBody["model"].(string); ok {
upstreamModel = normalizeOpenAIModelForUpstream(account, model)
if upstreamModel != "" && upstreamModel != model {
logger.LegacyPrintf("service.openai_gateway", "[OpenAI] Upstream model resolved: %s -> %s (account: %s, type: %s, isCodexCLI: %v)",
model, upstreamModel, account.Name, account.Type, isCodexCLI)
reqBody["model"] = upstreamModel
bodyModified = true
markPatchSet("model", upstreamModel)
if !compactMapped {
upstreamModel = normalizeOpenAIModelForUpstream(account, model)
if upstreamModel != "" && upstreamModel != model {
logger.LegacyPrintf("service.openai_gateway", "[OpenAI] Upstream model resolved: %s -> %s (account: %s, type: %s, isCodexCLI: %v)",
model, upstreamModel, account.Name, account.Type, isCodexCLI)
reqBody["model"] = upstreamModel
bodyModified = true
markPatchSet("model", upstreamModel)
}
}
// 移除 gpt-5.2-codex 以下的版本 verbosity 参数
@@ -2040,7 +2205,7 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
}
if account.Type == AccountTypeOAuth {
codexResult := applyCodexOAuthTransform(reqBody, isCodexCLI, isOpenAIResponsesCompactPath(c))
codexResult := applyCodexOAuthTransform(reqBody, isCodexCLI, isCompactRequest)
if codexResult.Modified {
bodyModified = true
disablePatch()
@@ -2515,6 +2680,19 @@ func (s *OpenAIGatewayService) forwardOpenAIPassthrough(
reqStream bool,
startTime time.Time,
) (*OpenAIForwardResult, error) {
upstreamPassthroughModel := ""
if isOpenAIResponsesCompactPath(c) {
compactMappedModel := resolveOpenAICompactForwardModel(account, reqModel)
if compactMappedModel != "" && compactMappedModel != reqModel {
nextBody, setErr := sjson.SetBytes(body, "model", compactMappedModel)
if setErr != nil {
return nil, fmt.Errorf("set compact passthrough model: %w", setErr)
}
body = nextBody
upstreamPassthroughModel = compactMappedModel
}
}
if account != nil && account.Type == AccountTypeOAuth {
if rejectReason := detectOpenAIPassthroughInstructionsRejectReason(reqModel, body); rejectReason != "" {
rejectMsg := "OpenAI codex passthrough requires a non-empty instructions field"
@@ -2640,14 +2818,14 @@ func (s *OpenAIGatewayService) forwardOpenAIPassthrough(
var usage *OpenAIUsage
var firstTokenMs *int
if reqStream {
result, err := s.handleStreamingResponsePassthrough(ctx, resp, c, account, startTime)
result, err := s.handleStreamingResponsePassthrough(ctx, resp, c, account, startTime, reqModel, upstreamPassthroughModel)
if err != nil {
return nil, err
}
usage = result.usage
firstTokenMs = result.firstTokenMs
} else {
usage, err = s.handleNonStreamingResponsePassthrough(ctx, resp, c)
usage, err = s.handleNonStreamingResponsePassthrough(ctx, resp, c, reqModel, upstreamPassthroughModel)
if err != nil {
return nil, err
}
@@ -2665,6 +2843,7 @@ func (s *OpenAIGatewayService) forwardOpenAIPassthrough(
RequestID: resp.Header.Get("x-request-id"),
Usage: *usage,
Model: reqModel,
UpstreamModel: upstreamPassthroughModel,
ServiceTier: extractOpenAIServiceTierFromBody(body),
ReasoningEffort: reasoningEffort,
Stream: reqStream,
@@ -2974,6 +3153,8 @@ func (s *OpenAIGatewayService) handleStreamingResponsePassthrough(
c *gin.Context,
account *Account,
startTime time.Time,
originalModel string,
mappedModel string,
) (*openaiStreamingResultPassthrough, error) {
writeOpenAIPassthroughResponseHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
@@ -3008,11 +3189,20 @@ func (s *OpenAIGatewayService) handleStreamingResponsePassthrough(
scanner.Buffer(scanBuf[:0], maxLineSize)
defer putSSEScannerBuf64K(scanBuf)
needModelReplace := strings.TrimSpace(originalModel) != "" && strings.TrimSpace(mappedModel) != "" && strings.TrimSpace(originalModel) != strings.TrimSpace(mappedModel)
for scanner.Scan() {
line := scanner.Text()
if data, ok := extractOpenAISSEDataLine(line); ok {
dataBytes := []byte(data)
trimmedData := strings.TrimSpace(data)
if needModelReplace && strings.Contains(data, mappedModel) {
line = s.replaceModelInSSELine(line, mappedModel, originalModel)
if replacedData, replaced := extractOpenAISSEDataLine(line); replaced {
dataBytes = []byte(replacedData)
trimmedData = strings.TrimSpace(replacedData)
}
}
if trimmedData == "[DONE]" {
sawDone = true
}
@@ -3073,6 +3263,8 @@ func (s *OpenAIGatewayService) handleNonStreamingResponsePassthrough(
ctx context.Context,
resp *http.Response,
c *gin.Context,
originalModel string,
mappedModel string,
) (*OpenAIUsage, error) {
body, err := ReadUpstreamResponseBody(resp.Body, s.cfg, c, openAITooLargeError)
if err != nil {
@@ -3084,7 +3276,7 @@ func (s *OpenAIGatewayService) handleNonStreamingResponsePassthrough(
// stream=false was requested. Without this conversion the client would
// receive raw SSE text or a terminal event with empty output.
if isEventStreamResponse(resp.Header) {
return s.handlePassthroughSSEToJSON(resp, c, body)
return s.handlePassthroughSSEToJSON(resp, c, body, originalModel, mappedModel)
}
usage := &OpenAIUsage{}
@@ -3106,14 +3298,18 @@ func (s *OpenAIGatewayService) handleNonStreamingResponsePassthrough(
if contentType == "" {
contentType = "application/json"
}
if originalModel != "" && mappedModel != "" && originalModel != mappedModel {
body = s.replaceModelInResponseBody(body, mappedModel, originalModel)
}
c.Data(resp.StatusCode, contentType, body)
return usage, nil
}
// handlePassthroughSSEToJSON converts an SSE response body into a JSON
// response for the passthrough path. It mirrors handleSSEToJSON but skips
// model replacement (passthrough does not remap models).
func (s *OpenAIGatewayService) handlePassthroughSSEToJSON(resp *http.Response, c *gin.Context, body []byte) (*OpenAIUsage, error) {
// response for the passthrough path. It mirrors handleSSEToJSON while
// preserving passthrough payloads, except compact-only model remapping may
// rewrite model fields back to the original requested model.
func (s *OpenAIGatewayService) handlePassthroughSSEToJSON(resp *http.Response, c *gin.Context, body []byte, originalModel string, mappedModel string) (*OpenAIUsage, error) {
bodyText := string(body)
finalResponse, ok := extractCodexFinalResponse(bodyText)
@@ -3132,6 +3328,9 @@ func (s *OpenAIGatewayService) handlePassthroughSSEToJSON(resp *http.Response, c
}
}
body = finalResponse
if originalModel != "" && mappedModel != "" && originalModel != mappedModel {
body = s.replaceModelInResponseBody(body, mappedModel, originalModel)
}
// Correct tool calls in final response
body = s.correctToolCallsInResponseBody(body)
} else {
@@ -3144,6 +3343,10 @@ func (s *OpenAIGatewayService) handlePassthroughSSEToJSON(resp *http.Response, c
return nil, s.writeOpenAINonStreamingProtocolError(resp, c, msg)
}
usage = s.parseSSEUsageFromBody(bodyText)
if originalModel != "" && mappedModel != "" && originalModel != mappedModel {
bodyText = s.replaceModelInSSEBody(bodyText, mappedModel, originalModel)
}
body = []byte(bodyText)
}
writeOpenAIPassthroughResponseHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)