Merge pull request #1853 from gaoren002/fix/codex-image-generation-bridge
fix(openai): 完善 Codex 在 Responses 链路下的图片生成兼容性
This commit is contained in:
@@ -45,6 +45,11 @@ type codexTransformResult struct {
|
||||
PromptCacheKey string
|
||||
}
|
||||
|
||||
const (
|
||||
codexImageGenerationBridgeMarker = "<sub2api-codex-image-generation>"
|
||||
codexImageGenerationBridgeText = codexImageGenerationBridgeMarker + "\nWhen the user asks for raster image generation or editing, use the OpenAI Responses native `image_generation` tool attached to this request. The local Codex client may not expose an `image_gen` namespace, but that does not mean image generation is unavailable. Do not ask the user to switch to CLI fallback solely because `image_gen` is absent.\n</sub2api-codex-image-generation>"
|
||||
)
|
||||
|
||||
func applyCodexOAuthTransform(reqBody map[string]any, isCodexCLI bool, isCompact bool) codexTransformResult {
|
||||
result := codexTransformResult{}
|
||||
// 工具续链需求会影响存储策略与 input 过滤逻辑。
|
||||
@@ -300,6 +305,61 @@ func normalizeOpenAIResponsesImageGenerationTools(reqBody map[string]any) bool {
|
||||
return modified
|
||||
}
|
||||
|
||||
func ensureOpenAIResponsesImageGenerationTool(reqBody map[string]any) bool {
|
||||
if len(reqBody) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
tool := map[string]any{
|
||||
"type": "image_generation",
|
||||
"output_format": "png",
|
||||
}
|
||||
|
||||
rawTools, ok := reqBody["tools"]
|
||||
if !ok || rawTools == nil {
|
||||
reqBody["tools"] = []any{tool}
|
||||
return true
|
||||
}
|
||||
|
||||
tools, ok := rawTools.([]any)
|
||||
if !ok {
|
||||
reqBody["tools"] = []any{tool}
|
||||
return true
|
||||
}
|
||||
for _, rawTool := range tools {
|
||||
toolMap, ok := rawTool.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(firstNonEmptyString(toolMap["type"])) == "image_generation" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
reqBody["tools"] = append(tools, tool)
|
||||
return true
|
||||
}
|
||||
|
||||
func applyCodexImageGenerationBridgeInstructions(reqBody map[string]any) bool {
|
||||
if len(reqBody) == 0 || !hasOpenAIImageGenerationTool(reqBody) {
|
||||
return false
|
||||
}
|
||||
|
||||
existing, _ := reqBody["instructions"].(string)
|
||||
if strings.Contains(existing, codexImageGenerationBridgeMarker) {
|
||||
return false
|
||||
}
|
||||
|
||||
existing = strings.TrimRight(existing, " \t\r\n")
|
||||
if strings.TrimSpace(existing) == "" {
|
||||
reqBody["instructions"] = codexImageGenerationBridgeText
|
||||
return true
|
||||
}
|
||||
|
||||
reqBody["instructions"] = existing + "\n\n" + codexImageGenerationBridgeText
|
||||
return true
|
||||
}
|
||||
|
||||
func validateOpenAIResponsesImageModel(reqBody map[string]any, model string) error {
|
||||
if !hasOpenAIImageGenerationTool(reqBody) {
|
||||
return nil
|
||||
@@ -311,6 +371,82 @@ func validateOpenAIResponsesImageModel(reqBody map[string]any, model string) err
|
||||
return fmt.Errorf("/v1/responses image_generation requests require a Responses-capable text model; image-only model %q is not allowed", model)
|
||||
}
|
||||
|
||||
func normalizeOpenAIResponsesImageOnlyModel(reqBody map[string]any) bool {
|
||||
if len(reqBody) == 0 {
|
||||
return false
|
||||
}
|
||||
imageModel := strings.TrimSpace(firstNonEmptyString(reqBody["model"]))
|
||||
if !isOpenAIImageGenerationModel(imageModel) {
|
||||
return false
|
||||
}
|
||||
|
||||
modified := false
|
||||
tools, _ := reqBody["tools"].([]any)
|
||||
imageToolIndex := -1
|
||||
for i, rawTool := range tools {
|
||||
toolMap, ok := rawTool.(map[string]any)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(firstNonEmptyString(toolMap["type"])) == "image_generation" {
|
||||
imageToolIndex = i
|
||||
break
|
||||
}
|
||||
}
|
||||
if imageToolIndex < 0 {
|
||||
tools = append(tools, map[string]any{
|
||||
"type": "image_generation",
|
||||
"model": imageModel,
|
||||
})
|
||||
imageToolIndex = len(tools) - 1
|
||||
reqBody["tools"] = tools
|
||||
modified = true
|
||||
}
|
||||
|
||||
if toolMap, ok := tools[imageToolIndex].(map[string]any); ok {
|
||||
if strings.TrimSpace(firstNonEmptyString(toolMap["model"])) == "" {
|
||||
toolMap["model"] = imageModel
|
||||
modified = true
|
||||
}
|
||||
for _, key := range []string{
|
||||
"size",
|
||||
"quality",
|
||||
"background",
|
||||
"output_format",
|
||||
"output_compression",
|
||||
"moderation",
|
||||
"style",
|
||||
"partial_images",
|
||||
} {
|
||||
if value, exists := reqBody[key]; exists && value != nil {
|
||||
if _, toolHas := toolMap[key]; !toolHas {
|
||||
toolMap[key] = value
|
||||
}
|
||||
delete(reqBody, key)
|
||||
modified = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if prompt := strings.TrimSpace(firstNonEmptyString(reqBody["prompt"])); prompt != "" {
|
||||
if _, hasInput := reqBody["input"]; !hasInput {
|
||||
reqBody["input"] = prompt
|
||||
}
|
||||
delete(reqBody, "prompt")
|
||||
modified = true
|
||||
}
|
||||
|
||||
if _, ok := reqBody["tool_choice"]; !ok {
|
||||
reqBody["tool_choice"] = map[string]any{"type": "image_generation"}
|
||||
modified = true
|
||||
}
|
||||
if imageModel != openAIImagesResponsesMainModel {
|
||||
modified = true
|
||||
}
|
||||
reqBody["model"] = openAIImagesResponsesMainModel
|
||||
return modified
|
||||
}
|
||||
|
||||
func normalizeOpenAIModelForUpstream(account *Account, model string) string {
|
||||
if account == nil || account.Type == AccountTypeOAuth {
|
||||
return normalizeCodexModel(model)
|
||||
|
||||
@@ -243,6 +243,159 @@ func TestNormalizeOpenAIResponsesImageGenerationTools_RewritesLegacyFields(t *te
|
||||
require.False(t, hasCompression)
|
||||
}
|
||||
|
||||
func TestEnsureOpenAIResponsesImageGenerationTool_NoTools(t *testing.T) {
|
||||
reqBody := map[string]any{
|
||||
"model": "gpt-5.4",
|
||||
"input": "draw a cat",
|
||||
}
|
||||
|
||||
modified := ensureOpenAIResponsesImageGenerationTool(reqBody)
|
||||
require.True(t, modified)
|
||||
|
||||
tools, ok := reqBody["tools"].([]any)
|
||||
require.True(t, ok)
|
||||
require.Len(t, tools, 1)
|
||||
tool, ok := tools[0].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, "image_generation", tool["type"])
|
||||
require.Equal(t, "png", tool["output_format"])
|
||||
}
|
||||
|
||||
func TestEnsureOpenAIResponsesImageGenerationTool_AppendsToExistingTools(t *testing.T) {
|
||||
reqBody := map[string]any{
|
||||
"model": "gpt-5.4",
|
||||
"tools": []any{
|
||||
map[string]any{"type": "web_search"},
|
||||
},
|
||||
}
|
||||
|
||||
modified := ensureOpenAIResponsesImageGenerationTool(reqBody)
|
||||
require.True(t, modified)
|
||||
|
||||
tools, ok := reqBody["tools"].([]any)
|
||||
require.True(t, ok)
|
||||
require.Len(t, tools, 2)
|
||||
first, ok := tools[0].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, "web_search", first["type"])
|
||||
second, ok := tools[1].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, "image_generation", second["type"])
|
||||
require.Equal(t, "png", second["output_format"])
|
||||
}
|
||||
|
||||
func TestEnsureOpenAIResponsesImageGenerationTool_PreservesExistingImageTool(t *testing.T) {
|
||||
reqBody := map[string]any{
|
||||
"model": "gpt-5.4",
|
||||
"tools": []any{
|
||||
map[string]any{"type": "image_generation", "output_format": "webp"},
|
||||
map[string]any{"type": "web_search"},
|
||||
},
|
||||
}
|
||||
|
||||
modified := ensureOpenAIResponsesImageGenerationTool(reqBody)
|
||||
require.False(t, modified)
|
||||
|
||||
tools, ok := reqBody["tools"].([]any)
|
||||
require.True(t, ok)
|
||||
require.Len(t, tools, 2)
|
||||
tool, ok := tools[0].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, "webp", tool["output_format"])
|
||||
}
|
||||
|
||||
func TestApplyCodexImageGenerationBridgeInstructions_AppendsBridgeOnce(t *testing.T) {
|
||||
reqBody := map[string]any{
|
||||
"instructions": "existing instructions",
|
||||
"tools": []any{
|
||||
map[string]any{"type": "image_generation", "output_format": "png"},
|
||||
},
|
||||
}
|
||||
|
||||
modified := applyCodexImageGenerationBridgeInstructions(reqBody)
|
||||
require.True(t, modified)
|
||||
|
||||
instructions, ok := reqBody["instructions"].(string)
|
||||
require.True(t, ok)
|
||||
require.Contains(t, instructions, "existing instructions")
|
||||
require.Contains(t, instructions, codexImageGenerationBridgeMarker)
|
||||
require.Contains(t, instructions, "Responses native `image_generation` tool")
|
||||
|
||||
modified = applyCodexImageGenerationBridgeInstructions(reqBody)
|
||||
require.False(t, modified)
|
||||
}
|
||||
|
||||
func TestApplyCodexImageGenerationBridgeInstructions_SkipsWithoutImageTool(t *testing.T) {
|
||||
reqBody := map[string]any{
|
||||
"instructions": "existing instructions",
|
||||
"tools": []any{
|
||||
map[string]any{"type": "web_search"},
|
||||
},
|
||||
}
|
||||
|
||||
modified := applyCodexImageGenerationBridgeInstructions(reqBody)
|
||||
require.False(t, modified)
|
||||
require.Equal(t, "existing instructions", reqBody["instructions"])
|
||||
}
|
||||
|
||||
func TestNormalizeOpenAIResponsesImageOnlyModel_BuildsImageToolRequest(t *testing.T) {
|
||||
reqBody := map[string]any{
|
||||
"model": "gpt-image-2",
|
||||
"prompt": "draw a cat",
|
||||
"size": "1024x1024",
|
||||
"output_format": "png",
|
||||
}
|
||||
|
||||
modified := normalizeOpenAIResponsesImageOnlyModel(reqBody)
|
||||
require.True(t, modified)
|
||||
require.Equal(t, openAIImagesResponsesMainModel, reqBody["model"])
|
||||
require.Equal(t, "draw a cat", reqBody["input"])
|
||||
_, hasPrompt := reqBody["prompt"]
|
||||
require.False(t, hasPrompt)
|
||||
_, hasTopLevelSize := reqBody["size"]
|
||||
require.False(t, hasTopLevelSize)
|
||||
|
||||
tools, ok := reqBody["tools"].([]any)
|
||||
require.True(t, ok)
|
||||
require.Len(t, tools, 1)
|
||||
tool, ok := tools[0].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, "image_generation", tool["type"])
|
||||
require.Equal(t, "gpt-image-2", tool["model"])
|
||||
require.Equal(t, "1024x1024", tool["size"])
|
||||
require.Equal(t, "png", tool["output_format"])
|
||||
|
||||
choice, ok := reqBody["tool_choice"].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, "image_generation", choice["type"])
|
||||
}
|
||||
|
||||
func TestNormalizeOpenAIResponsesImageOnlyModel_PreservesExistingImageTool(t *testing.T) {
|
||||
reqBody := map[string]any{
|
||||
"model": "gpt-image-2",
|
||||
"input": "draw a cat",
|
||||
"tools": []any{
|
||||
map[string]any{
|
||||
"type": "image_generation",
|
||||
"model": "gpt-image-1.5",
|
||||
},
|
||||
},
|
||||
"tool_choice": "auto",
|
||||
}
|
||||
|
||||
modified := normalizeOpenAIResponsesImageOnlyModel(reqBody)
|
||||
require.True(t, modified)
|
||||
require.Equal(t, openAIImagesResponsesMainModel, reqBody["model"])
|
||||
require.Equal(t, "auto", reqBody["tool_choice"])
|
||||
|
||||
tools, ok := reqBody["tools"].([]any)
|
||||
require.True(t, ok)
|
||||
require.Len(t, tools, 1)
|
||||
tool, ok := tools[0].(map[string]any)
|
||||
require.True(t, ok)
|
||||
require.Equal(t, "gpt-image-1.5", tool["model"])
|
||||
}
|
||||
|
||||
func TestValidateOpenAIResponsesImageModel_RejectsImageOnlyModel(t *testing.T) {
|
||||
err := validateOpenAIResponsesImageModel(map[string]any{
|
||||
"tools": []any{
|
||||
|
||||
@@ -1935,11 +1935,22 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
|
||||
markPatchSet("instructions", "You are a helpful coding assistant.")
|
||||
}
|
||||
|
||||
if isCodexCLI && ensureOpenAIResponsesImageGenerationTool(reqBody) {
|
||||
bodyModified = true
|
||||
disablePatch()
|
||||
logger.LegacyPrintf("service.openai_gateway", "[OpenAI] Injected /responses image_generation tool for Codex client")
|
||||
}
|
||||
|
||||
if normalizeOpenAIResponsesImageGenerationTools(reqBody) {
|
||||
bodyModified = true
|
||||
disablePatch()
|
||||
logger.LegacyPrintf("service.openai_gateway", "[OpenAI] Normalized /responses image_generation tool payload")
|
||||
}
|
||||
if isCodexCLI && applyCodexImageGenerationBridgeInstructions(reqBody) {
|
||||
bodyModified = true
|
||||
disablePatch()
|
||||
logger.LegacyPrintf("service.openai_gateway", "[OpenAI] Added Codex image_generation bridge instructions")
|
||||
}
|
||||
|
||||
// 对所有请求执行模型映射(包含 Codex CLI)。
|
||||
billingModel := account.GetMappedModel(reqModel)
|
||||
@@ -1950,6 +1961,20 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
|
||||
markPatchSet("model", billingModel)
|
||||
}
|
||||
upstreamModel := billingModel
|
||||
if normalizeOpenAIResponsesImageOnlyModel(reqBody) {
|
||||
bodyModified = true
|
||||
disablePatch()
|
||||
if model, ok := reqBody["model"].(string); ok {
|
||||
upstreamModel = strings.TrimSpace(model)
|
||||
}
|
||||
logger.LegacyPrintf(
|
||||
"service.openai_gateway",
|
||||
"[OpenAI] Normalized /responses image-only model request inbound_model=%s image_model=%s upstream_model=%s",
|
||||
reqModel,
|
||||
billingModel,
|
||||
upstreamModel,
|
||||
)
|
||||
}
|
||||
if err := validateOpenAIResponsesImageModel(reqBody, upstreamModel); err != nil {
|
||||
setOpsUpstreamError(c, http.StatusBadRequest, err.Error(), "")
|
||||
c.JSON(http.StatusBadRequest, gin.H{
|
||||
@@ -4118,22 +4143,39 @@ func extractCodexFinalResponse(body string) ([]byte, bool) {
|
||||
// Returns (nil, false) if no content was found in deltas.
|
||||
func reconstructResponseOutputFromSSE(bodyText string) ([]byte, bool) {
|
||||
acc := apicompat.NewBufferedResponseAccumulator()
|
||||
imageOutputs := make([]json.RawMessage, 0, 1)
|
||||
seenImages := make(map[string]struct{})
|
||||
lines := strings.Split(bodyText, "\n")
|
||||
for _, line := range lines {
|
||||
data, ok := extractOpenAISSEDataLine(line)
|
||||
if !ok || data == "" || data == "[DONE]" {
|
||||
continue
|
||||
}
|
||||
if imageOutput, ok := extractImageGenerationOutputFromSSEData([]byte(data), seenImages); ok {
|
||||
imageOutputs = append(imageOutputs, imageOutput)
|
||||
}
|
||||
var event apicompat.ResponsesStreamEvent
|
||||
if err := json.Unmarshal([]byte(data), &event); err != nil {
|
||||
continue
|
||||
}
|
||||
acc.ProcessEvent(&event)
|
||||
}
|
||||
if !acc.HasContent() {
|
||||
if !acc.HasContent() && len(imageOutputs) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
output := acc.BuildOutput()
|
||||
|
||||
var output []json.RawMessage
|
||||
if acc.HasContent() {
|
||||
outputJSON, err := json.Marshal(acc.BuildOutput())
|
||||
if err != nil {
|
||||
return nil, false
|
||||
}
|
||||
if err := json.Unmarshal(outputJSON, &output); err != nil {
|
||||
return nil, false
|
||||
}
|
||||
}
|
||||
output = append(output, imageOutputs...)
|
||||
|
||||
outputJSON, err := json.Marshal(output)
|
||||
if err != nil {
|
||||
return nil, false
|
||||
@@ -4141,6 +4183,33 @@ func reconstructResponseOutputFromSSE(bodyText string) ([]byte, bool) {
|
||||
return outputJSON, true
|
||||
}
|
||||
|
||||
func extractImageGenerationOutputFromSSEData(data []byte, seen map[string]struct{}) (json.RawMessage, bool) {
|
||||
if len(data) == 0 || !gjson.ValidBytes(data) {
|
||||
return nil, false
|
||||
}
|
||||
if gjson.GetBytes(data, "type").String() != "response.output_item.done" {
|
||||
return nil, false
|
||||
}
|
||||
item := gjson.GetBytes(data, "item")
|
||||
if !item.Exists() || !item.IsObject() || item.Get("type").String() != "image_generation_call" {
|
||||
return nil, false
|
||||
}
|
||||
if strings.TrimSpace(item.Get("result").String()) == "" {
|
||||
return nil, false
|
||||
}
|
||||
key := strings.TrimSpace(item.Get("id").String())
|
||||
if key == "" {
|
||||
key = strings.TrimSpace(item.Get("output_format").String()) + "|" + strings.TrimSpace(item.Get("result").String())
|
||||
}
|
||||
if key != "" && seen != nil {
|
||||
if _, exists := seen[key]; exists {
|
||||
return nil, false
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
}
|
||||
return json.RawMessage(item.Raw), true
|
||||
}
|
||||
|
||||
func (s *OpenAIGatewayService) parseSSEUsageFromBody(body string) *OpenAIUsage {
|
||||
usage := &OpenAIUsage{}
|
||||
lines := strings.Split(body, "\n")
|
||||
|
||||
@@ -18,6 +18,7 @@ import (
|
||||
"github.com/cespare/xxhash/v2"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// 编译期接口断言
|
||||
@@ -1880,6 +1881,33 @@ func TestHandleSSEToJSON_CompletedEventReturnsJSON(t *testing.T) {
|
||||
require.NotContains(t, rec.Body.String(), "data:")
|
||||
}
|
||||
|
||||
func TestHandleSSEToJSON_ReconstructsImageGenerationOutputItemDone(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
c, _ := gin.CreateTestContext(rec)
|
||||
c.Request = httptest.NewRequest(http.MethodPost, "/", nil)
|
||||
|
||||
svc := &OpenAIGatewayService{cfg: &config.Config{}}
|
||||
resp := &http.Response{
|
||||
StatusCode: http.StatusOK,
|
||||
Header: http.Header{"Content-Type": []string{"text/event-stream"}},
|
||||
}
|
||||
body := []byte(strings.Join([]string{
|
||||
`data: {"type":"response.output_item.done","item":{"id":"ig_123","type":"image_generation_call","result":"aGVsbG8=","revised_prompt":"draw a cat","output_format":"png"}}`,
|
||||
`data: {"type":"response.completed","response":{"id":"resp_img","model":"gpt-5.4","output":[],"usage":{"input_tokens":7,"output_tokens":9,"output_tokens_details":{"image_tokens":4}}}}`,
|
||||
`data: [DONE]`,
|
||||
}, "\n"))
|
||||
|
||||
usage, err := svc.handleSSEToJSON(resp, c, body, "gpt-5.4", "gpt-5.4")
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, usage)
|
||||
require.Equal(t, 4, usage.ImageOutputTokens)
|
||||
require.NotContains(t, rec.Body.String(), "data:")
|
||||
require.Equal(t, "image_generation_call", gjson.Get(rec.Body.String(), "output.0.type").String())
|
||||
require.Equal(t, "aGVsbG8=", gjson.Get(rec.Body.String(), "output.0.result").String())
|
||||
require.Equal(t, "draw a cat", gjson.Get(rec.Body.String(), "output.0.revised_prompt").String())
|
||||
}
|
||||
|
||||
func TestHandleSSEToJSON_NoFinalResponseKeepsSSEBody(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
rec := httptest.NewRecorder()
|
||||
|
||||
Reference in New Issue
Block a user