Files
sub2api/backend/internal/service/openai_images.go
shaw 0b85a8da88 fix: add io.LimitReader bounds to prevent OOM in image handling
Limit image download and multipart upload reads to 20MB to prevent
unbounded memory allocation from abnormal upstream responses.
2026-04-23 10:27:42 +08:00

2262 lines
65 KiB
Go

package service
import (
"bufio"
"bytes"
"context"
"crypto/sha256"
"crypto/sha3"
"encoding/base64"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"mime"
"mime/multipart"
"net/http"
"net/textproto"
"sort"
"strconv"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/Wei-Shaw/sub2api/internal/pkg/proxyurl"
"github.com/Wei-Shaw/sub2api/internal/util/responseheaders"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/imroc/req/v3"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
const (
openAIImagesGenerationsEndpoint = "/v1/images/generations"
openAIImagesEditsEndpoint = "/v1/images/edits"
openAIImagesGenerationsURL = "https://api.openai.com/v1/images/generations"
openAIImagesEditsURL = "https://api.openai.com/v1/images/edits"
openAIChatGPTStartURL = "https://chatgpt.com/"
openAIChatGPTFilesURL = "https://chatgpt.com/backend-api/files"
openAIChatGPTConversationInitURL = "https://chatgpt.com/backend-api/conversation/init"
openAIChatGPTConversationURL = "https://chatgpt.com/backend-api/f/conversation"
openAIChatGPTConversationPrepareURL = "https://chatgpt.com/backend-api/f/conversation/prepare"
openAIChatGPTChatRequirementsURL = "https://chatgpt.com/backend-api/sentinel/chat-requirements"
openAIImageBackendUserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
openAIImageRequirementsDiff = "0fffff"
openAIImageLifecycleTimeout = 2 * time.Minute
openAIImageMaxDownloadBytes = 20 << 20 // 20MB per image download
openAIImageMaxUploadPartSize = 20 << 20 // 20MB per multipart upload part
)
type OpenAIImagesCapability string
const (
OpenAIImagesCapabilityBasic OpenAIImagesCapability = "images-basic"
OpenAIImagesCapabilityNative OpenAIImagesCapability = "images-native"
)
type OpenAIImagesUpload struct {
FieldName string
FileName string
ContentType string
Data []byte
Width int
Height int
}
type OpenAIImagesRequest struct {
Endpoint string
ContentType string
Multipart bool
Model string
ExplicitModel bool
Prompt string
Stream bool
N int
Size string
ExplicitSize bool
SizeTier string
ResponseFormat string
HasMask bool
HasNativeOptions bool
RequiredCapability OpenAIImagesCapability
Uploads []OpenAIImagesUpload
Body []byte
bodyHash string
}
func (r *OpenAIImagesRequest) IsEdits() bool {
return r != nil && r.Endpoint == openAIImagesEditsEndpoint
}
func (r *OpenAIImagesRequest) StickySessionSeed() string {
if r == nil {
return ""
}
parts := []string{
"openai-images",
strings.TrimSpace(r.Endpoint),
strings.TrimSpace(r.Model),
strings.TrimSpace(r.Size),
strings.TrimSpace(r.Prompt),
}
seed := strings.Join(parts, "|")
if strings.TrimSpace(r.Prompt) == "" && r.bodyHash != "" {
seed += "|body=" + r.bodyHash
}
return seed
}
func (s *OpenAIGatewayService) ParseOpenAIImagesRequest(c *gin.Context, body []byte) (*OpenAIImagesRequest, error) {
if c == nil || c.Request == nil {
return nil, fmt.Errorf("missing request context")
}
endpoint := normalizeOpenAIImagesEndpointPath(c.Request.URL.Path)
if endpoint == "" {
return nil, fmt.Errorf("unsupported images endpoint")
}
contentType := strings.TrimSpace(c.GetHeader("Content-Type"))
req := &OpenAIImagesRequest{
Endpoint: endpoint,
ContentType: contentType,
N: 1,
Body: body,
}
if len(body) > 0 {
sum := sha256.Sum256(body)
req.bodyHash = hex.EncodeToString(sum[:8])
}
mediaType, _, err := mime.ParseMediaType(contentType)
if err == nil && strings.EqualFold(mediaType, "multipart/form-data") {
req.Multipart = true
if parseErr := parseOpenAIImagesMultipartRequest(body, contentType, req); parseErr != nil {
return nil, parseErr
}
} else {
if len(body) == 0 {
return nil, fmt.Errorf("request body is empty")
}
if !gjson.ValidBytes(body) {
return nil, fmt.Errorf("failed to parse request body")
}
if parseErr := parseOpenAIImagesJSONRequest(body, req); parseErr != nil {
return nil, parseErr
}
}
applyOpenAIImagesDefaults(req)
if err := validateOpenAIImagesModel(req.Model); err != nil {
return nil, err
}
req.SizeTier = normalizeOpenAIImageSizeTier(req.Size)
req.RequiredCapability = classifyOpenAIImagesCapability(req)
return req, nil
}
func parseOpenAIImagesJSONRequest(body []byte, req *OpenAIImagesRequest) error {
if modelResult := gjson.GetBytes(body, "model"); modelResult.Exists() {
req.Model = strings.TrimSpace(modelResult.String())
req.ExplicitModel = req.Model != ""
}
req.Prompt = strings.TrimSpace(gjson.GetBytes(body, "prompt").String())
if streamResult := gjson.GetBytes(body, "stream"); streamResult.Exists() {
if streamResult.Type != gjson.True && streamResult.Type != gjson.False {
return fmt.Errorf("invalid stream field type")
}
req.Stream = streamResult.Bool()
}
if nResult := gjson.GetBytes(body, "n"); nResult.Exists() {
if nResult.Type != gjson.Number {
return fmt.Errorf("invalid n field type")
}
req.N = int(nResult.Int())
if req.N <= 0 {
return fmt.Errorf("n must be greater than 0")
}
}
if sizeResult := gjson.GetBytes(body, "size"); sizeResult.Exists() {
req.Size = strings.TrimSpace(sizeResult.String())
req.ExplicitSize = req.Size != ""
}
req.ResponseFormat = strings.ToLower(strings.TrimSpace(gjson.GetBytes(body, "response_format").String()))
req.HasMask = gjson.GetBytes(body, "mask").Exists()
req.HasNativeOptions = hasOpenAINativeImageOptions(func(path string) bool {
return gjson.GetBytes(body, path).Exists()
})
return nil
}
func parseOpenAIImagesMultipartRequest(body []byte, contentType string, req *OpenAIImagesRequest) error {
_, params, err := mime.ParseMediaType(contentType)
if err != nil {
return fmt.Errorf("invalid multipart content-type: %w", err)
}
boundary := strings.TrimSpace(params["boundary"])
if boundary == "" {
return fmt.Errorf("multipart boundary is required")
}
reader := multipart.NewReader(bytes.NewReader(body), boundary)
for {
part, err := reader.NextPart()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("read multipart body: %w", err)
}
name := strings.TrimSpace(part.FormName())
if name == "" {
_ = part.Close()
continue
}
data, err := io.ReadAll(io.LimitReader(part, openAIImageMaxUploadPartSize))
_ = part.Close()
if err != nil {
return fmt.Errorf("read multipart field %s: %w", name, err)
}
fileName := strings.TrimSpace(part.FileName())
if fileName != "" {
partContentType := strings.TrimSpace(part.Header.Get("Content-Type"))
if name == "mask" && len(data) > 0 {
req.HasMask = true
}
if name == "image" || strings.HasPrefix(name, "image[") {
width, height := parseOpenAIImageDimensions(part.Header)
req.Uploads = append(req.Uploads, OpenAIImagesUpload{
FieldName: name,
FileName: fileName,
ContentType: partContentType,
Data: data,
Width: width,
Height: height,
})
}
continue
}
value := strings.TrimSpace(string(data))
switch name {
case "model":
req.Model = value
req.ExplicitModel = value != ""
case "prompt":
req.Prompt = value
case "size":
req.Size = value
req.ExplicitSize = value != ""
case "response_format":
req.ResponseFormat = strings.ToLower(value)
case "stream":
parsed, err := strconv.ParseBool(value)
if err != nil {
return fmt.Errorf("invalid stream field value")
}
req.Stream = parsed
case "n":
n, err := strconv.Atoi(value)
if err != nil || n <= 0 {
return fmt.Errorf("n must be a positive integer")
}
req.N = n
default:
if isOpenAINativeImageOption(name) && value != "" {
req.HasNativeOptions = true
}
}
}
if len(req.Uploads) == 0 && req.IsEdits() {
return fmt.Errorf("image file is required")
}
return nil
}
func parseOpenAIImageDimensions(_ textproto.MIMEHeader) (int, int) {
return 0, 0
}
func applyOpenAIImagesDefaults(req *OpenAIImagesRequest) {
if req == nil {
return
}
if req.N <= 0 {
req.N = 1
}
if strings.TrimSpace(req.Model) != "" {
req.Model = strings.TrimSpace(req.Model)
return
}
req.Model = "gpt-image-2"
}
func isOpenAIImageGenerationModel(model string) bool {
return strings.HasPrefix(strings.ToLower(strings.TrimSpace(model)), "gpt-image-")
}
func validateOpenAIImagesModel(model string) error {
model = strings.TrimSpace(model)
if isOpenAIImageGenerationModel(model) {
return nil
}
if model == "" {
return fmt.Errorf("images endpoint requires an image model")
}
return fmt.Errorf("images endpoint requires an image model, got %q", model)
}
func normalizeOpenAIImagesEndpointPath(path string) string {
trimmed := strings.TrimSpace(path)
switch {
case strings.Contains(trimmed, "/images/generations"):
return openAIImagesGenerationsEndpoint
case strings.Contains(trimmed, "/images/edits"):
return openAIImagesEditsEndpoint
default:
return ""
}
}
func classifyOpenAIImagesCapability(req *OpenAIImagesRequest) OpenAIImagesCapability {
if req == nil {
return OpenAIImagesCapabilityNative
}
if req.ExplicitModel || req.ExplicitSize {
return OpenAIImagesCapabilityNative
}
model := strings.ToLower(strings.TrimSpace(req.Model))
if !strings.HasPrefix(model, "gpt-image-") {
return OpenAIImagesCapabilityNative
}
if req.Stream || req.N != 1 || req.HasMask || req.HasNativeOptions {
return OpenAIImagesCapabilityNative
}
if req.IsEdits() && !req.Multipart {
return OpenAIImagesCapabilityNative
}
if req.ResponseFormat != "" && req.ResponseFormat != "b64_json" {
return OpenAIImagesCapabilityNative
}
return OpenAIImagesCapabilityBasic
}
func hasOpenAINativeImageOptions(exists func(path string) bool) bool {
for _, path := range []string{
"background",
"quality",
"style",
"output_format",
"output_compression",
"moderation",
} {
if exists(path) {
return true
}
}
return false
}
func isOpenAINativeImageOption(name string) bool {
switch strings.TrimSpace(strings.ToLower(name)) {
case "background", "quality", "style", "output_format", "output_compression", "moderation":
return true
default:
return false
}
}
func normalizeOpenAIImageSizeTier(size string) string {
switch strings.ToLower(strings.TrimSpace(size)) {
case "1024x1024":
return "1K"
case "1536x1024", "1024x1536", "1792x1024", "1024x1792", "", "auto":
return "2K"
default:
return "2K"
}
}
func (s *OpenAIGatewayService) ForwardImages(
ctx context.Context,
c *gin.Context,
account *Account,
body []byte,
parsed *OpenAIImagesRequest,
channelMappedModel string,
) (*OpenAIForwardResult, error) {
if parsed == nil {
return nil, fmt.Errorf("parsed images request is required")
}
switch account.Type {
case AccountTypeAPIKey:
return s.forwardOpenAIImagesAPIKey(ctx, c, account, body, parsed, channelMappedModel)
case AccountTypeOAuth:
return s.forwardOpenAIImagesOAuth(ctx, c, account, parsed, channelMappedModel)
default:
return nil, fmt.Errorf("unsupported account type: %s", account.Type)
}
}
func (s *OpenAIGatewayService) forwardOpenAIImagesAPIKey(
ctx context.Context,
c *gin.Context,
account *Account,
body []byte,
parsed *OpenAIImagesRequest,
channelMappedModel string,
) (*OpenAIForwardResult, error) {
startTime := time.Now()
requestModel := strings.TrimSpace(parsed.Model)
if mapped := strings.TrimSpace(channelMappedModel); mapped != "" {
requestModel = mapped
}
if err := validateOpenAIImagesModel(requestModel); err != nil {
return nil, err
}
upstreamModel := account.GetMappedModel(requestModel)
if err := validateOpenAIImagesModel(upstreamModel); err != nil {
return nil, err
}
logger.LegacyPrintf(
"service.openai_gateway",
"[OpenAI] Images request routing request_model=%s upstream_model=%s endpoint=%s account_type=%s",
strings.TrimSpace(parsed.Model),
upstreamModel,
parsed.Endpoint,
account.Type,
)
forwardBody, forwardContentType, err := rewriteOpenAIImagesModel(body, parsed.ContentType, upstreamModel)
if err != nil {
return nil, err
}
if !parsed.Multipart {
setOpsUpstreamRequestBody(c, forwardBody)
}
token, _, err := s.GetAccessToken(ctx, account)
if err != nil {
return nil, err
}
upstreamReq, err := s.buildOpenAIImagesRequest(ctx, c, account, forwardBody, forwardContentType, token, parsed.Endpoint)
if err != nil {
return nil, err
}
proxyURL := ""
if account.ProxyID != nil && account.Proxy != nil {
proxyURL = account.Proxy.URL()
}
upstreamStart := time.Now()
resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
SetOpsLatencyMs(c, OpsUpstreamLatencyMsKey, time.Since(upstreamStart).Milliseconds())
if err != nil {
safeErr := sanitizeUpstreamErrorMessage(err.Error())
setOpsUpstreamError(c, 0, safeErr, "")
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: 0,
UpstreamURL: safeUpstreamURL(upstreamReq.URL.String()),
Kind: "request_error",
Message: safeErr,
})
return nil, fmt.Errorf("upstream request failed: %s", safeErr)
}
if resp.StatusCode >= 400 {
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
_ = resp.Body.Close()
resp.Body = io.NopCloser(bytes.NewReader(respBody))
upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(respBody))
upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
if s.shouldFailoverOpenAIUpstreamResponse(resp.StatusCode, upstreamMsg, respBody) {
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: resp.StatusCode,
UpstreamRequestID: resp.Header.Get("x-request-id"),
UpstreamURL: safeUpstreamURL(upstreamReq.URL.String()),
Kind: "failover",
Message: upstreamMsg,
})
s.handleFailoverSideEffects(ctx, resp, account)
return nil, &UpstreamFailoverError{
StatusCode: resp.StatusCode,
ResponseBody: respBody,
RetryableOnSameAccount: account.IsPoolMode() && isPoolModeRetryableStatus(resp.StatusCode),
}
}
return s.handleErrorResponse(ctx, resp, c, account, forwardBody)
}
defer func() { _ = resp.Body.Close() }()
var usage OpenAIUsage
imageCount := parsed.N
var firstTokenMs *int
if parsed.Stream {
streamUsage, streamCount, ttft, err := s.handleOpenAIImagesStreamingResponse(resp, c, startTime)
if err != nil {
return nil, err
}
usage = streamUsage
imageCount = streamCount
firstTokenMs = ttft
} else {
nonStreamUsage, nonStreamCount, err := s.handleOpenAIImagesNonStreamingResponse(resp, c)
if err != nil {
return nil, err
}
usage = nonStreamUsage
if nonStreamCount > 0 {
imageCount = nonStreamCount
}
}
return &OpenAIForwardResult{
RequestID: resp.Header.Get("x-request-id"),
Usage: usage,
Model: requestModel,
UpstreamModel: upstreamModel,
Stream: parsed.Stream,
ResponseHeaders: resp.Header.Clone(),
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
ImageCount: imageCount,
ImageSize: parsed.SizeTier,
}, nil
}
func (s *OpenAIGatewayService) buildOpenAIImagesRequest(
ctx context.Context,
c *gin.Context,
account *Account,
body []byte,
contentType string,
token string,
endpoint string,
) (*http.Request, error) {
targetURL := openAIImagesGenerationsURL
if endpoint == openAIImagesEditsEndpoint {
targetURL = openAIImagesEditsURL
}
baseURL := account.GetOpenAIBaseURL()
if baseURL != "" {
validatedURL, err := s.validateUpstreamBaseURL(baseURL)
if err != nil {
return nil, err
}
targetURL = buildOpenAIImagesURL(validatedURL, endpoint)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, targetURL, bytes.NewReader(body))
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+token)
for key, values := range c.Request.Header {
if !openaiPassthroughAllowedHeaders[strings.ToLower(key)] {
continue
}
for _, value := range values {
req.Header.Add(key, value)
}
}
customUA := account.GetOpenAIUserAgent()
if customUA != "" {
req.Header.Set("User-Agent", customUA)
}
if strings.TrimSpace(contentType) != "" {
req.Header.Set("Content-Type", contentType)
}
return req, nil
}
func buildOpenAIImagesURL(base string, endpoint string) string {
normalized := strings.TrimRight(strings.TrimSpace(base), "/")
relative := strings.TrimPrefix(strings.TrimSpace(endpoint), "/v1")
if strings.HasSuffix(normalized, endpoint) || strings.HasSuffix(normalized, relative) {
return normalized
}
if strings.HasSuffix(normalized, "/v1") {
return normalized + relative
}
return normalized + endpoint
}
func rewriteOpenAIImagesModel(body []byte, contentType string, model string) ([]byte, string, error) {
model = strings.TrimSpace(model)
if model == "" {
return body, contentType, nil
}
mediaType, _, err := mime.ParseMediaType(contentType)
if err == nil && strings.EqualFold(mediaType, "multipart/form-data") {
rewrittenBody, rewrittenType, rewriteErr := rewriteOpenAIImagesMultipartModel(body, contentType, model)
return rewrittenBody, rewrittenType, rewriteErr
}
rewritten, err := sjson.SetBytes(body, "model", model)
if err != nil {
return nil, "", fmt.Errorf("rewrite image request model: %w", err)
}
return rewritten, contentType, nil
}
func rewriteOpenAIImagesMultipartModel(body []byte, contentType string, model string) ([]byte, string, error) {
_, params, err := mime.ParseMediaType(contentType)
if err != nil {
return nil, "", fmt.Errorf("parse multipart content-type: %w", err)
}
boundary := strings.TrimSpace(params["boundary"])
if boundary == "" {
return nil, "", fmt.Errorf("multipart boundary is required")
}
reader := multipart.NewReader(bytes.NewReader(body), boundary)
var buffer bytes.Buffer
writer := multipart.NewWriter(&buffer)
modelWritten := false
for {
part, err := reader.NextPart()
if err == io.EOF {
break
}
if err != nil {
return nil, "", fmt.Errorf("read multipart body: %w", err)
}
formName := strings.TrimSpace(part.FormName())
partHeader := cloneMultipartHeader(part.Header)
target, err := writer.CreatePart(partHeader)
if err != nil {
_ = part.Close()
return nil, "", fmt.Errorf("create multipart part: %w", err)
}
if formName == "model" && part.FileName() == "" {
if _, err := target.Write([]byte(model)); err != nil {
_ = part.Close()
return nil, "", fmt.Errorf("rewrite multipart model: %w", err)
}
modelWritten = true
_ = part.Close()
continue
}
if _, err := io.Copy(target, part); err != nil {
_ = part.Close()
return nil, "", fmt.Errorf("copy multipart part: %w", err)
}
_ = part.Close()
}
if !modelWritten {
if err := writer.WriteField("model", model); err != nil {
return nil, "", fmt.Errorf("append multipart model field: %w", err)
}
}
if err := writer.Close(); err != nil {
return nil, "", fmt.Errorf("finalize multipart body: %w", err)
}
return buffer.Bytes(), writer.FormDataContentType(), nil
}
func cloneMultipartHeader(src textproto.MIMEHeader) textproto.MIMEHeader {
dst := make(textproto.MIMEHeader, len(src))
for key, values := range src {
copied := make([]string, len(values))
copy(copied, values)
dst[key] = copied
}
return dst
}
func (s *OpenAIGatewayService) handleOpenAIImagesNonStreamingResponse(resp *http.Response, c *gin.Context) (OpenAIUsage, int, error) {
body, err := ReadUpstreamResponseBody(resp.Body, s.cfg, c, openAITooLargeError)
if err != nil {
return OpenAIUsage{}, 0, err
}
responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
contentType := "application/json"
if s.cfg != nil && !s.cfg.Security.ResponseHeaders.Enabled {
if upstreamType := resp.Header.Get("Content-Type"); upstreamType != "" {
contentType = upstreamType
}
}
c.Data(resp.StatusCode, contentType, body)
usage, _ := extractOpenAIUsageFromJSONBytes(body)
return usage, extractOpenAIImageCountFromJSONBytes(body), nil
}
func (s *OpenAIGatewayService) handleOpenAIImagesStreamingResponse(
resp *http.Response,
c *gin.Context,
startTime time.Time,
) (OpenAIUsage, int, *int, error) {
responseheaders.WriteFilteredHeaders(c.Writer.Header(), resp.Header, s.responseHeaderFilter)
contentType := strings.TrimSpace(resp.Header.Get("Content-Type"))
if contentType == "" {
contentType = "text/event-stream"
}
c.Status(resp.StatusCode)
c.Header("Content-Type", contentType)
flusher, ok := c.Writer.(http.Flusher)
if !ok {
return OpenAIUsage{}, 0, nil, fmt.Errorf("streaming is not supported by response writer")
}
reader := bufio.NewReader(resp.Body)
usage := OpenAIUsage{}
imageCount := 0
var firstTokenMs *int
for {
line, err := reader.ReadBytes('\n')
if len(line) > 0 {
if firstTokenMs == nil {
ms := int(time.Since(startTime).Milliseconds())
firstTokenMs = &ms
}
if _, writeErr := c.Writer.Write(line); writeErr != nil {
return OpenAIUsage{}, 0, firstTokenMs, writeErr
}
flusher.Flush()
if data, ok := extractOpenAISSEDataLine(strings.TrimRight(string(line), "\r\n")); ok && data != "" && data != "[DONE]" {
dataBytes := []byte(data)
mergeOpenAIUsage(&usage, dataBytes)
if count := extractOpenAIImageCountFromJSONBytes(dataBytes); count > imageCount {
imageCount = count
}
}
}
if err == io.EOF {
break
}
if err != nil {
return OpenAIUsage{}, 0, firstTokenMs, err
}
}
return usage, imageCount, firstTokenMs, nil
}
func mergeOpenAIUsage(dst *OpenAIUsage, body []byte) {
if dst == nil {
return
}
if parsed, ok := extractOpenAIUsageFromJSONBytes(body); ok {
if parsed.InputTokens > 0 {
dst.InputTokens = parsed.InputTokens
}
if parsed.OutputTokens > 0 {
dst.OutputTokens = parsed.OutputTokens
}
if parsed.CacheReadInputTokens > 0 {
dst.CacheReadInputTokens = parsed.CacheReadInputTokens
}
if parsed.ImageOutputTokens > 0 {
dst.ImageOutputTokens = parsed.ImageOutputTokens
}
}
}
func extractOpenAIImageCountFromJSONBytes(body []byte) int {
if len(body) == 0 || !gjson.ValidBytes(body) {
return 0
}
data := gjson.GetBytes(body, "data")
if data.Exists() && data.IsArray() {
return len(data.Array())
}
return 0
}
func (s *OpenAIGatewayService) forwardOpenAIImagesOAuth(
ctx context.Context,
c *gin.Context,
account *Account,
parsed *OpenAIImagesRequest,
channelMappedModel string,
) (*OpenAIForwardResult, error) {
startTime := time.Now()
requestModel := strings.TrimSpace(parsed.Model)
if mapped := strings.TrimSpace(channelMappedModel); mapped != "" {
requestModel = mapped
}
if err := validateOpenAIImagesModel(requestModel); err != nil {
return nil, err
}
logger.LegacyPrintf(
"service.openai_gateway",
"[OpenAI] Images request routing request_model=%s endpoint=%s account_type=%s uploads=%d",
requestModel,
parsed.Endpoint,
account.Type,
len(parsed.Uploads),
)
token, _, err := s.GetAccessToken(ctx, account)
if err != nil {
return nil, err
}
client, err := newOpenAIBackendAPIClient(resolveOpenAIProxyURL(account))
if err != nil {
return nil, err
}
headers, err := s.buildOpenAIBackendAPIHeaders(account, token)
if err != nil {
return nil, err
}
if bootstrapErr := bootstrapOpenAIBackendAPI(ctx, client, headers); bootstrapErr != nil {
logger.LegacyPrintf("service.openai_gateway", "OpenAI image bootstrap failed: %v", bootstrapErr)
}
chatReqs, err := fetchOpenAIChatRequirements(ctx, client, headers)
if err != nil {
return nil, s.wrapOpenAIImageBackendError(ctx, c, account, err)
}
if chatReqs.Arkose.Required {
return nil, s.wrapOpenAIImageBackendError(
ctx,
c,
account,
newOpenAIImageSyntheticStatusError(
http.StatusForbidden,
"chat-requirements requires unsupported challenge (arkose)",
openAIChatGPTChatRequirementsURL,
),
)
}
parentMessageID := uuid.NewString()
proofToken := generateOpenAIProofToken(chatReqs.ProofOfWork.Required, chatReqs.ProofOfWork.Seed, chatReqs.ProofOfWork.Difficulty, headers.Get("User-Agent"))
_ = initializeOpenAIImageConversation(ctx, client, headers)
conduitToken, err := prepareOpenAIImageConversation(ctx, client, headers, parsed.Prompt, parentMessageID, chatReqs.Token, proofToken)
if err != nil {
return nil, s.wrapOpenAIImageBackendError(ctx, c, account, err)
}
uploads, err := uploadOpenAIImageFiles(ctx, client, headers, parsed.Uploads)
if err != nil {
return nil, s.wrapOpenAIImageBackendError(ctx, c, account, err)
}
convReq := buildOpenAIImageConversationRequest(parsed, parentMessageID, uploads)
if parsedContent, err := json.Marshal(convReq); err == nil {
setOpsUpstreamRequestBody(c, parsedContent)
}
convHeaders := cloneHTTPHeader(headers)
convHeaders.Set("Accept", "text/event-stream")
convHeaders.Set("Content-Type", "application/json")
convHeaders.Set("openai-sentinel-chat-requirements-token", chatReqs.Token)
if conduitToken != "" {
convHeaders.Set("x-conduit-token", conduitToken)
}
if proofToken != "" {
convHeaders.Set("openai-sentinel-proof-token", proofToken)
}
resp, err := client.R().
SetContext(ctx).
DisableAutoReadResponse().
SetHeaders(headerToMap(convHeaders)).
SetBodyJsonMarshal(convReq).
Post(openAIChatGPTConversationURL)
if err != nil {
return nil, fmt.Errorf("openai image conversation request failed: %w", err)
}
defer func() {
if resp != nil && resp.Body != nil {
_ = resp.Body.Close()
}
}()
if resp.StatusCode >= 400 {
return nil, s.wrapOpenAIImageBackendError(ctx, c, account, handleOpenAIImageBackendError(resp))
}
conversationID, pointerInfos, usage, firstTokenMs, err := readOpenAIImageConversationStream(resp, startTime)
if err != nil {
return nil, err
}
pointerInfos = mergeOpenAIImagePointerInfos(pointerInfos, nil)
logger.LegacyPrintf(
"service.openai_gateway",
"[OpenAI] Image extraction stream conversation_id=%s total_assets=%d file_service_assets=%d direct_assets=%d",
conversationID,
len(pointerInfos),
countOpenAIFileServicePointerInfos(pointerInfos),
countOpenAIDirectImageAssets(pointerInfos),
)
lifecycleCtx, releaseLifecycleCtx := detachOpenAIImageLifecycleContext(ctx, openAIImageLifecycleTimeout)
defer releaseLifecycleCtx()
if conversationID != "" && !hasOpenAIFileServicePointerInfos(pointerInfos) {
polledPointers, pollErr := pollOpenAIImageConversation(lifecycleCtx, client, headers, conversationID)
if pollErr != nil {
return nil, s.wrapOpenAIImageBackendError(ctx, c, account, pollErr)
}
pointerInfos = mergeOpenAIImagePointerInfos(pointerInfos, polledPointers)
}
pointerInfos = preferOpenAIFileServicePointerInfos(pointerInfos)
if len(pointerInfos) == 0 {
logger.LegacyPrintf("service.openai_gateway", "[OpenAI] Image extraction yielded no assets conversation_id=%s", conversationID)
return nil, fmt.Errorf("openai image conversation returned no downloadable images")
}
responseBody, imageCount, err := buildOpenAIImageResponse(lifecycleCtx, client, headers, conversationID, pointerInfos)
if err != nil {
return nil, s.wrapOpenAIImageBackendError(ctx, c, account, err)
}
c.Data(http.StatusOK, "application/json; charset=utf-8", responseBody)
return &OpenAIForwardResult{
RequestID: resp.Header.Get("x-request-id"),
Usage: usage,
Model: requestModel,
UpstreamModel: requestModel,
Stream: false,
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
ImageCount: imageCount,
ImageSize: parsed.SizeTier,
}, nil
}
func resolveOpenAIProxyURL(account *Account) string {
if account != nil && account.ProxyID != nil && account.Proxy != nil {
return account.Proxy.URL()
}
return ""
}
func newOpenAIBackendAPIClient(proxyURL string) (*req.Client, error) {
client := req.C().
SetTimeout(180 * time.Second).
ImpersonateChrome()
trimmed, _, err := proxyurl.Parse(proxyURL)
if err != nil {
return nil, err
}
if trimmed != "" {
client.SetProxyURL(trimmed)
}
return client, nil
}
func (s *OpenAIGatewayService) buildOpenAIBackendAPIHeaders(account *Account, token string) (http.Header, error) {
deviceID, sessionID := s.ensureOpenAIImageSessionCredentials(context.Background(), account)
headers := make(http.Header)
headers.Set("Authorization", "Bearer "+token)
headers.Set("Accept", "application/json")
headers.Set("Origin", "https://chatgpt.com")
headers.Set("Referer", "https://chatgpt.com/")
headers.Set("Sec-Fetch-Dest", "empty")
headers.Set("Sec-Fetch-Mode", "cors")
headers.Set("Sec-Fetch-Site", "same-origin")
headers.Set("User-Agent", openAIImageBackendUserAgent)
if customUA := strings.TrimSpace(account.GetOpenAIUserAgent()); customUA != "" {
headers.Set("User-Agent", customUA)
}
if chatgptAccountID := strings.TrimSpace(account.GetChatGPTAccountID()); chatgptAccountID != "" {
headers.Set("chatgpt-account-id", chatgptAccountID)
}
if deviceID != "" {
headers.Set("oai-device-id", deviceID)
headers.Set("Cookie", "oai-did="+deviceID)
}
if sessionID != "" {
headers.Set("oai-session-id", sessionID)
}
return headers, nil
}
func (s *OpenAIGatewayService) ensureOpenAIImageSessionCredentials(ctx context.Context, account *Account) (string, string) {
if account == nil {
return "", ""
}
deviceID := account.GetOpenAIDeviceID()
sessionID := account.GetOpenAISessionID()
if deviceID != "" && sessionID != "" {
return deviceID, sessionID
}
updates := map[string]any{}
if deviceID == "" {
deviceID = uuid.NewString()
updates["openai_device_id"] = deviceID
}
if sessionID == "" {
sessionID = uuid.NewString()
updates["openai_session_id"] = sessionID
}
if account.Extra == nil {
account.Extra = map[string]any{}
}
for key, value := range updates {
account.Extra[key] = value
}
if len(updates) == 0 || s == nil || s.accountRepo == nil {
return deviceID, sessionID
}
updateCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
if err := s.accountRepo.UpdateExtra(updateCtx, account.ID, updates); err != nil {
logger.LegacyPrintf("service.openai_gateway", "persist openai image session creds failed: account=%d err=%v", account.ID, err)
}
return deviceID, sessionID
}
func bootstrapOpenAIBackendAPI(ctx context.Context, client *req.Client, headers http.Header) error {
resp, err := client.R().
SetContext(ctx).
DisableAutoReadResponse().
SetHeaders(headerToMap(headers)).
Get(openAIChatGPTStartURL)
if err != nil {
return err
}
if resp != nil && resp.Body != nil {
_, _ = io.Copy(io.Discard, resp.Body)
_ = resp.Body.Close()
}
return nil
}
func initializeOpenAIImageConversation(ctx context.Context, client *req.Client, headers http.Header) error {
payload := map[string]any{
"gizmo_id": nil,
"requested_default_model": nil,
"conversation_id": nil,
"timezone_offset_min": openAITimezoneOffsetMinutes(),
"system_hints": []string{"picture_v2"},
}
resp, err := client.R().
SetContext(ctx).
SetHeaders(headerToMap(headers)).
SetBodyJsonMarshal(payload).
Post(openAIChatGPTConversationInitURL)
if err != nil {
return err
}
if !resp.IsSuccessState() {
return newOpenAIImageStatusError(resp, "conversation init failed")
}
return nil
}
type openAIChatRequirements struct {
Token string `json:"token"`
Turnstile struct {
Required bool `json:"required"`
} `json:"turnstile"`
Arkose struct {
Required bool `json:"required"`
} `json:"arkose"`
ProofOfWork struct {
Required bool `json:"required"`
Seed string `json:"seed"`
Difficulty string `json:"difficulty"`
} `json:"proofofwork"`
}
func fetchOpenAIChatRequirements(ctx context.Context, client *req.Client, headers http.Header) (*openAIChatRequirements, error) {
var lastErr error
for _, payload := range []map[string]any{
{"p": nil},
{"p": generateOpenAIRequirementsToken(headers.Get("User-Agent"))},
} {
var result openAIChatRequirements
resp, err := client.R().
SetContext(ctx).
SetHeaders(headerToMap(headers)).
SetBodyJsonMarshal(payload).
SetSuccessResult(&result).
Post(openAIChatGPTChatRequirementsURL)
if err != nil {
lastErr = err
continue
}
if resp.IsSuccessState() && strings.TrimSpace(result.Token) != "" {
return &result, nil
}
lastErr = newOpenAIImageStatusError(resp, "chat-requirements failed")
}
if lastErr == nil {
lastErr = fmt.Errorf("chat-requirements failed")
}
return nil, lastErr
}
func prepareOpenAIImageConversation(
ctx context.Context,
client *req.Client,
headers http.Header,
prompt string,
parentMessageID string,
chatToken string,
proofToken string,
) (string, error) {
messageID := uuid.NewString()
payload := map[string]any{
"action": "next",
"client_prepare_state": "success",
"fork_from_shared_post": false,
"parent_message_id": parentMessageID,
"model": "auto",
"timezone_offset_min": openAITimezoneOffsetMinutes(),
"timezone": openAITimezoneName(),
"conversation_mode": map[string]any{"kind": "primary_assistant"},
"system_hints": []string{"picture_v2"},
"supports_buffering": true,
"supported_encodings": []string{"v1"},
"partial_query": map[string]any{
"id": messageID,
"author": map[string]any{"role": "user"},
"content": map[string]any{
"content_type": "text",
"parts": []string{coalesceOpenAIFileName(prompt, "Generate an image.")},
},
},
"client_contextual_info": map[string]any{
"app_name": "chatgpt.com",
},
}
prepareHeaders := cloneHTTPHeader(headers)
prepareHeaders.Set("Accept", "*/*")
prepareHeaders.Set("Content-Type", "application/json")
if strings.TrimSpace(chatToken) != "" {
prepareHeaders.Set("openai-sentinel-chat-requirements-token", strings.TrimSpace(chatToken))
}
if strings.TrimSpace(proofToken) != "" {
prepareHeaders.Set("openai-sentinel-proof-token", strings.TrimSpace(proofToken))
}
var result struct {
ConduitToken string `json:"conduit_token"`
}
resp, err := client.R().
SetContext(ctx).
SetHeaders(headerToMap(prepareHeaders)).
SetBodyJsonMarshal(payload).
SetSuccessResult(&result).
Post(openAIChatGPTConversationPrepareURL)
if err != nil {
return "", err
}
if !resp.IsSuccessState() {
return "", newOpenAIImageStatusError(resp, "conversation prepare failed")
}
return strings.TrimSpace(result.ConduitToken), nil
}
type openAIUploadedImage struct {
FileID string
FileName string
FileSize int
MimeType string
Width int
Height int
}
func uploadOpenAIImageFiles(ctx context.Context, client *req.Client, headers http.Header, uploads []OpenAIImagesUpload) ([]openAIUploadedImage, error) {
if len(uploads) == 0 {
return nil, nil
}
results := make([]openAIUploadedImage, 0, len(uploads))
for i := range uploads {
item := uploads[i]
fileName := coalesceOpenAIFileName(item.FileName, "image.png")
payload := map[string]any{
"file_name": fileName,
"file_size": len(item.Data),
"use_case": "multimodal",
}
var created struct {
FileID string `json:"file_id"`
UploadURL string `json:"upload_url"`
}
resp, err := client.R().
SetContext(ctx).
SetHeaders(headerToMap(headers)).
SetBodyJsonMarshal(payload).
SetSuccessResult(&created).
Post(openAIChatGPTFilesURL)
if err != nil {
return nil, err
}
if !resp.IsSuccessState() || strings.TrimSpace(created.FileID) == "" || strings.TrimSpace(created.UploadURL) == "" {
return nil, newOpenAIImageStatusError(resp, "create upload slot failed")
}
uploadHeaders := map[string]string{
"Content-Type": coalesceOpenAIFileName(item.ContentType, "application/octet-stream"),
"Origin": "https://chatgpt.com",
"x-ms-blob-type": "BlockBlob",
"x-ms-version": "2020-04-08",
"User-Agent": headers.Get("User-Agent"),
}
putResp, err := client.R().
SetContext(ctx).
SetHeaders(uploadHeaders).
SetBody(item.Data).
DisableAutoReadResponse().
Put(created.UploadURL)
if err != nil {
return nil, err
}
if putResp.Response != nil && putResp.Body != nil {
_, _ = io.Copy(io.Discard, putResp.Body)
_ = putResp.Body.Close()
}
if putResp.StatusCode < 200 || putResp.StatusCode >= 300 {
return nil, newOpenAIImageStatusError(putResp, "upload image bytes failed")
}
uploadedResp, err := client.R().
SetContext(ctx).
SetHeaders(headerToMap(headers)).
SetBodyJsonMarshal(map[string]any{}).
Post(fmt.Sprintf("%s/%s/uploaded", openAIChatGPTFilesURL, created.FileID))
if err != nil {
return nil, err
}
if !uploadedResp.IsSuccessState() {
return nil, newOpenAIImageStatusError(uploadedResp, "mark upload complete failed")
}
results = append(results, openAIUploadedImage{
FileID: created.FileID,
FileName: fileName,
FileSize: len(item.Data),
MimeType: coalesceOpenAIFileName(item.ContentType, "application/octet-stream"),
Width: item.Width,
Height: item.Height,
})
}
return results, nil
}
func coalesceOpenAIFileName(value string, fallback string) string {
value = strings.TrimSpace(value)
if value == "" {
return fallback
}
return value
}
func buildOpenAIImageConversationRequest(parsed *OpenAIImagesRequest, parentMessageID string, uploads []openAIUploadedImage) map[string]any {
parts := []any{coalesceOpenAIFileName(parsed.Prompt, "Generate an image.")}
attachments := make([]map[string]any, 0, len(uploads))
if len(uploads) > 0 {
parts = make([]any, 0, len(uploads)+1)
for _, upload := range uploads {
parts = append(parts, map[string]any{
"content_type": "image_asset_pointer",
"asset_pointer": "file-service://" + upload.FileID,
"size_bytes": upload.FileSize,
"width": upload.Width,
"height": upload.Height,
})
attachment := map[string]any{
"id": upload.FileID,
"mimeType": upload.MimeType,
"name": upload.FileName,
"size": upload.FileSize,
}
if upload.Width > 0 {
attachment["width"] = upload.Width
}
if upload.Height > 0 {
attachment["height"] = upload.Height
}
attachments = append(attachments, attachment)
}
parts = append(parts, coalesceOpenAIFileName(parsed.Prompt, "Edit this image."))
}
contentType := "text"
if len(uploads) > 0 {
contentType = "multimodal_text"
}
metadata := map[string]any{
"developer_mode_connector_ids": []any{},
"selected_github_repos": []any{},
"selected_all_github_repos": false,
"system_hints": []string{"picture_v2"},
"serialization_metadata": map[string]any{
"custom_symbol_offsets": []any{},
},
}
message := map[string]any{
"id": uuid.NewString(),
"author": map[string]any{"role": "user"},
"content": map[string]any{
"content_type": contentType,
"parts": parts,
},
"metadata": metadata,
"create_time": float64(time.Now().UnixMilli()) / 1000,
}
if len(attachments) > 0 {
metadata["attachments"] = attachments
}
return map[string]any{
"action": "next",
"client_prepare_state": "sent",
"parent_message_id": parentMessageID,
"model": "auto",
"timezone_offset_min": openAITimezoneOffsetMinutes(),
"timezone": openAITimezoneName(),
"conversation_mode": map[string]any{"kind": "primary_assistant"},
"enable_message_followups": true,
"system_hints": []string{"picture_v2"},
"supports_buffering": true,
"supported_encodings": []string{"v1"},
"paragen_cot_summary_display_override": "allow",
"force_parallel_switch": "auto",
"client_contextual_info": map[string]any{
"is_dark_mode": false,
"time_since_loaded": 200,
"page_height": 900,
"page_width": 1440,
"pixel_ratio": 1,
"screen_height": 1080,
"screen_width": 1920,
"app_name": "chatgpt.com",
},
"messages": []any{message},
}
}
type openAIImagePointerInfo struct {
Pointer string
DownloadURL string
B64JSON string
MimeType string
Prompt string
}
type openAIImageToolMessage struct {
MessageID string
CreateTime float64
PointerInfos []openAIImagePointerInfo
}
func readOpenAIImageConversationStream(resp *req.Response, startTime time.Time) (string, []openAIImagePointerInfo, OpenAIUsage, *int, error) {
if resp == nil || resp.Body == nil {
return "", nil, OpenAIUsage{}, nil, fmt.Errorf("empty conversation response")
}
reader := bufio.NewReader(resp.Body)
var (
conversationID string
firstTokenMs *int
usage OpenAIUsage
pointers []openAIImagePointerInfo
)
for {
line, err := reader.ReadString('\n')
if strings.TrimSpace(line) != "" && firstTokenMs == nil {
ms := int(time.Since(startTime).Milliseconds())
firstTokenMs = &ms
}
if data, ok := extractOpenAISSEDataLine(strings.TrimRight(line, "\r\n")); ok && data != "" && data != "[DONE]" {
dataBytes := []byte(data)
if conversationID == "" {
conversationID = strings.TrimSpace(gjson.GetBytes(dataBytes, "v.conversation_id").String())
if conversationID == "" {
conversationID = strings.TrimSpace(gjson.GetBytes(dataBytes, "conversation_id").String())
}
}
mergeOpenAIUsage(&usage, dataBytes)
pointers = mergeOpenAIImagePointerInfos(pointers, collectOpenAIImagePointers(dataBytes))
}
if err == io.EOF {
break
}
if err != nil {
return "", nil, OpenAIUsage{}, firstTokenMs, err
}
}
return conversationID, pointers, usage, firstTokenMs, nil
}
func collectOpenAIImagePointers(body []byte) []openAIImagePointerInfo {
if len(body) == 0 {
return nil
}
prompt := ""
for _, path := range []string{
"message.metadata.dalle.prompt",
"metadata.dalle.prompt",
"revised_prompt",
} {
if value := strings.TrimSpace(gjson.GetBytes(body, path).String()); value != "" {
prompt = value
break
}
}
matches := openAIImagePointerMatches(body)
out := make([]openAIImagePointerInfo, 0, len(matches))
for _, pointer := range matches {
out = append(out, openAIImagePointerInfo{Pointer: pointer, Prompt: prompt})
}
return mergeOpenAIImagePointerInfos(out, collectOpenAIImageInlineAssets(body, prompt))
}
func openAIImagePointerMatches(body []byte) []string {
raw := string(body)
matches := make([]string, 0, 4)
for _, prefix := range []string{"file-service://", "sediment://"} {
start := 0
for {
idx := strings.Index(raw[start:], prefix)
if idx < 0 {
break
}
idx += start
end := idx + len(prefix)
for end < len(raw) {
ch := raw[end]
if ch != '-' && ch != '_' &&
(ch < '0' || ch > '9') &&
(ch < 'a' || ch > 'z') &&
(ch < 'A' || ch > 'Z') {
break
}
end++
}
matches = append(matches, raw[idx:end])
start = end
}
}
return dedupeStrings(matches)
}
func mergeOpenAIImagePointerInfos(existing []openAIImagePointerInfo, next []openAIImagePointerInfo) []openAIImagePointerInfo {
if len(next) == 0 {
return existing
}
seen := make(map[string]openAIImagePointerInfo, len(existing)+len(next))
out := make([]openAIImagePointerInfo, 0, len(existing)+len(next))
for _, item := range existing {
if key := item.identityKey(); key != "" {
seen[key] = item
}
out = append(out, item)
}
for _, item := range next {
key := item.identityKey()
if key == "" {
continue
}
if existingItem, ok := seen[key]; ok {
merged := mergeOpenAIImagePointerInfo(existingItem, item)
if merged != existingItem {
for i := range out {
if out[i].identityKey() == key {
out[i] = merged
break
}
}
seen[key] = merged
}
continue
}
seen[key] = item
out = append(out, item)
}
return out
}
func (i openAIImagePointerInfo) identityKey() string {
switch {
case strings.TrimSpace(i.Pointer) != "":
return "pointer:" + strings.TrimSpace(i.Pointer)
case strings.TrimSpace(i.DownloadURL) != "":
return "download:" + strings.TrimSpace(i.DownloadURL)
case strings.TrimSpace(i.B64JSON) != "":
b64 := strings.TrimSpace(i.B64JSON)
if len(b64) > 64 {
b64 = b64[:64]
}
return "b64:" + b64
default:
return ""
}
}
func mergeOpenAIImagePointerInfo(existing, next openAIImagePointerInfo) openAIImagePointerInfo {
merged := existing
if strings.TrimSpace(merged.Pointer) == "" {
merged.Pointer = next.Pointer
}
if strings.TrimSpace(merged.DownloadURL) == "" {
merged.DownloadURL = next.DownloadURL
}
if strings.TrimSpace(merged.B64JSON) == "" {
merged.B64JSON = next.B64JSON
}
if strings.TrimSpace(merged.MimeType) == "" {
merged.MimeType = next.MimeType
}
if strings.TrimSpace(merged.Prompt) == "" {
merged.Prompt = next.Prompt
}
return merged
}
func hasOpenAIFileServicePointerInfos(items []openAIImagePointerInfo) bool {
for _, item := range items {
if strings.HasPrefix(item.Pointer, "file-service://") {
return true
}
}
return false
}
func countOpenAIFileServicePointerInfos(items []openAIImagePointerInfo) int {
count := 0
for _, item := range items {
if strings.HasPrefix(item.Pointer, "file-service://") {
count++
}
}
return count
}
func countOpenAIDirectImageAssets(items []openAIImagePointerInfo) int {
count := 0
for _, item := range items {
if strings.TrimSpace(item.DownloadURL) != "" || strings.TrimSpace(item.B64JSON) != "" {
count++
}
}
return count
}
func preferOpenAIFileServicePointerInfos(items []openAIImagePointerInfo) []openAIImagePointerInfo {
if !hasOpenAIFileServicePointerInfos(items) {
return items
}
out := make([]openAIImagePointerInfo, 0, len(items))
for _, item := range items {
if strings.HasPrefix(item.Pointer, "file-service://") {
out = append(out, item)
}
}
return out
}
func extractOpenAIImageToolMessages(mapping map[string]any) []openAIImageToolMessage {
if len(mapping) == 0 {
return nil
}
out := make([]openAIImageToolMessage, 0, 4)
for messageID, raw := range mapping {
node, _ := raw.(map[string]any)
if node == nil {
continue
}
message, _ := node["message"].(map[string]any)
if message == nil {
continue
}
author, _ := message["author"].(map[string]any)
metadata, _ := message["metadata"].(map[string]any)
content, _ := message["content"].(map[string]any)
if author == nil || metadata == nil || content == nil {
continue
}
if role, _ := author["role"].(string); role != "tool" {
continue
}
if asyncTaskType, _ := metadata["async_task_type"].(string); asyncTaskType != "image_gen" {
continue
}
if contentType, _ := content["content_type"].(string); contentType != "multimodal_text" {
continue
}
prompt := ""
if title, _ := metadata["image_gen_title"].(string); strings.TrimSpace(title) != "" {
prompt = strings.TrimSpace(title)
}
item := openAIImageToolMessage{MessageID: messageID}
if createTime, ok := message["create_time"].(float64); ok {
item.CreateTime = createTime
}
parts, _ := content["parts"].([]any)
for _, part := range parts {
switch value := part.(type) {
case map[string]any:
if assetPointer, _ := value["asset_pointer"].(string); strings.TrimSpace(assetPointer) != "" {
for _, pointer := range openAIImagePointerMatches([]byte(assetPointer)) {
item.PointerInfos = append(item.PointerInfos, openAIImagePointerInfo{
Pointer: pointer,
Prompt: prompt,
})
}
}
case string:
for _, pointer := range openAIImagePointerMatches([]byte(value)) {
item.PointerInfos = append(item.PointerInfos, openAIImagePointerInfo{
Pointer: pointer,
Prompt: prompt,
})
}
}
}
if len(item.PointerInfos) == 0 {
continue
}
item.PointerInfos = mergeOpenAIImagePointerInfos(nil, item.PointerInfos)
out = append(out, item)
}
sort.Slice(out, func(i, j int) bool {
return out[i].CreateTime < out[j].CreateTime
})
return out
}
func pollOpenAIImageConversation(ctx context.Context, client *req.Client, headers http.Header, conversationID string) ([]openAIImagePointerInfo, error) {
conversationID = strings.TrimSpace(conversationID)
if conversationID == "" {
return nil, nil
}
deadline := time.Now().Add(90 * time.Second)
interval := 3 * time.Second
previewWait := 15 * time.Second
var (
lastErr error
firstToolAt time.Time
)
for time.Now().Before(deadline) {
resp, err := client.R().
SetContext(ctx).
SetHeaders(headerToMap(headers)).
DisableAutoReadResponse().
Get(fmt.Sprintf("https://chatgpt.com/backend-api/conversation/%s", conversationID))
if err != nil {
lastErr = err
} else {
if resp.StatusCode >= 200 && resp.StatusCode < 300 {
body, readErr := io.ReadAll(resp.Body)
_ = resp.Body.Close()
if readErr != nil {
lastErr = readErr
goto waitNextPoll
}
pointers := mergeOpenAIImagePointerInfos(nil, collectOpenAIImagePointers(body))
var decoded map[string]any
if err := json.Unmarshal(body, &decoded); err == nil {
if mapping, _ := decoded["mapping"].(map[string]any); len(mapping) > 0 {
toolMessages := extractOpenAIImageToolMessages(mapping)
if len(toolMessages) > 0 && firstToolAt.IsZero() {
firstToolAt = time.Now()
}
for _, msg := range toolMessages {
pointers = mergeOpenAIImagePointerInfos(pointers, msg.PointerInfos)
}
}
}
if hasOpenAIFileServicePointerInfos(pointers) {
return preferOpenAIFileServicePointerInfos(pointers), nil
}
if len(pointers) > 0 && !firstToolAt.IsZero() && time.Since(firstToolAt) >= previewWait {
return pointers, nil
}
} else {
statusErr := newOpenAIImageStatusError(resp, "conversation poll failed")
if isOpenAIImageTransientConversationNotFoundError(statusErr) {
lastErr = statusErr
goto waitNextPoll
}
return nil, statusErr
}
}
waitNextPoll:
timer := time.NewTimer(interval)
select {
case <-ctx.Done():
if !timer.Stop() {
<-timer.C
}
return nil, ctx.Err()
case <-timer.C:
}
}
return nil, lastErr
}
func buildOpenAIImageResponse(
ctx context.Context,
client *req.Client,
headers http.Header,
conversationID string,
pointers []openAIImagePointerInfo,
) ([]byte, int, error) {
type responseItem struct {
B64JSON string `json:"b64_json"`
RevisedPrompt string `json:"revised_prompt,omitempty"`
}
items := make([]responseItem, 0, len(pointers))
for _, pointer := range pointers {
data, err := resolveOpenAIImageBytes(ctx, client, headers, conversationID, pointer)
if err != nil {
return nil, 0, err
}
items = append(items, responseItem{
B64JSON: base64.StdEncoding.EncodeToString(data),
RevisedPrompt: pointer.Prompt,
})
}
payload := map[string]any{
"created": time.Now().Unix(),
"data": items,
}
body, err := json.Marshal(payload)
if err != nil {
return nil, 0, err
}
return body, len(items), nil
}
func resolveOpenAIImageBytes(
ctx context.Context,
client *req.Client,
headers http.Header,
conversationID string,
pointer openAIImagePointerInfo,
) ([]byte, error) {
if normalized := normalizeOpenAIImageBase64(pointer.B64JSON); normalized != "" {
return base64.StdEncoding.DecodeString(normalized)
}
if downloadURL := strings.TrimSpace(pointer.DownloadURL); downloadURL != "" {
return downloadOpenAIImageBytes(ctx, client, headers, downloadURL)
}
if strings.TrimSpace(pointer.Pointer) == "" {
return nil, fmt.Errorf("image asset is missing pointer, url, and base64 data")
}
downloadURL, err := fetchOpenAIImageDownloadURL(ctx, client, headers, conversationID, pointer.Pointer)
if err != nil {
return nil, err
}
return downloadOpenAIImageBytes(ctx, client, headers, downloadURL)
}
func normalizeOpenAIImageBase64(raw string) string {
raw = strings.TrimSpace(raw)
if raw == "" {
return ""
}
if strings.HasPrefix(strings.ToLower(raw), "data:") {
if idx := strings.Index(raw, ","); idx >= 0 && idx+1 < len(raw) {
raw = raw[idx+1:]
}
}
raw = strings.TrimSpace(raw)
raw = strings.TrimRight(raw, "=") + strings.Repeat("=", (4-len(raw)%4)%4)
if raw == "" {
return ""
}
if _, err := base64.StdEncoding.DecodeString(raw); err != nil {
return ""
}
return raw
}
func collectOpenAIImageInlineAssets(body []byte, fallbackPrompt string) []openAIImagePointerInfo {
if len(body) == 0 || !gjson.ValidBytes(body) {
return nil
}
var decoded any
if err := json.Unmarshal(body, &decoded); err != nil {
return nil
}
var out []openAIImagePointerInfo
walkOpenAIImageInlineAssets(decoded, strings.TrimSpace(fallbackPrompt), &out)
return out
}
func walkOpenAIImageInlineAssets(node any, prompt string, out *[]openAIImagePointerInfo) {
switch value := node.(type) {
case map[string]any:
localPrompt := prompt
for _, key := range []string{"revised_prompt", "image_gen_title", "prompt"} {
if v, ok := value[key].(string); ok && strings.TrimSpace(v) != "" {
localPrompt = strings.TrimSpace(v)
break
}
}
item := openAIImagePointerInfo{
Prompt: localPrompt,
Pointer: firstNonEmptyString(value["asset_pointer"], value["pointer"]),
DownloadURL: firstNonEmptyString(value["download_url"], value["url"], value["image_url"]),
B64JSON: firstNonEmptyString(value["b64_json"], value["base64"], value["image_base64"]),
MimeType: firstNonEmptyString(value["mime_type"], value["mimeType"], value["content_type"]),
}
switch {
case strings.HasPrefix(strings.TrimSpace(item.Pointer), "file-service://"),
strings.HasPrefix(strings.TrimSpace(item.Pointer), "sediment://"),
isLikelyOpenAIImageDownloadURL(item.DownloadURL),
normalizeOpenAIImageBase64(item.B64JSON) != "":
*out = append(*out, item)
}
for _, child := range value {
walkOpenAIImageInlineAssets(child, localPrompt, out)
}
case []any:
for _, child := range value {
walkOpenAIImageInlineAssets(child, prompt, out)
}
}
}
func firstNonEmptyString(values ...any) string {
for _, value := range values {
if s, ok := value.(string); ok && strings.TrimSpace(s) != "" {
return strings.TrimSpace(s)
}
}
return ""
}
func isLikelyOpenAIImageDownloadURL(raw string) bool {
raw = strings.TrimSpace(raw)
if raw == "" {
return false
}
if strings.HasPrefix(strings.ToLower(raw), "data:image/") {
return true
}
if !strings.HasPrefix(strings.ToLower(raw), "http://") && !strings.HasPrefix(strings.ToLower(raw), "https://") {
return false
}
lower := strings.ToLower(raw)
return strings.Contains(lower, "/download") ||
strings.Contains(lower, ".png") ||
strings.Contains(lower, ".jpg") ||
strings.Contains(lower, ".jpeg") ||
strings.Contains(lower, ".webp")
}
func detachOpenAIImageLifecycleContext(ctx context.Context, timeout time.Duration) (context.Context, context.CancelFunc) {
base := context.Background()
if ctx != nil {
base = context.WithoutCancel(ctx)
}
if timeout <= 0 {
return base, func() {}
}
return context.WithTimeout(base, timeout)
}
func fetchOpenAIImageDownloadURL(
ctx context.Context,
client *req.Client,
headers http.Header,
conversationID string,
pointer string,
) (string, error) {
url := ""
allowConversationRetry := false
switch {
case strings.HasPrefix(pointer, "file-service://"):
fileID := strings.TrimPrefix(pointer, "file-service://")
url = fmt.Sprintf("%s/%s/download", openAIChatGPTFilesURL, fileID)
case strings.HasPrefix(pointer, "sediment://"):
attachmentID := strings.TrimPrefix(pointer, "sediment://")
url = fmt.Sprintf("https://chatgpt.com/backend-api/conversation/%s/attachment/%s/download", conversationID, attachmentID)
allowConversationRetry = true
default:
return "", fmt.Errorf("unsupported image pointer: %s", pointer)
}
var lastErr error
for attempt := 0; attempt < 8; attempt++ {
var result struct {
DownloadURL string `json:"download_url"`
}
resp, err := client.R().
SetContext(ctx).
SetHeaders(headerToMap(headers)).
SetSuccessResult(&result).
Get(url)
if err != nil {
lastErr = err
} else if resp.IsSuccessState() && strings.TrimSpace(result.DownloadURL) != "" {
return strings.TrimSpace(result.DownloadURL), nil
} else {
statusErr := newOpenAIImageStatusError(resp, "fetch image download url failed")
if !allowConversationRetry || !isOpenAIImageTransientConversationNotFoundError(statusErr) {
return "", statusErr
}
lastErr = statusErr
}
if attempt == 7 {
break
}
timer := time.NewTimer(750 * time.Millisecond)
select {
case <-ctx.Done():
if !timer.Stop() {
<-timer.C
}
return "", ctx.Err()
case <-timer.C:
}
}
if lastErr == nil {
lastErr = fmt.Errorf("fetch image download url failed")
}
return "", lastErr
}
func downloadOpenAIImageBytes(ctx context.Context, client *req.Client, headers http.Header, downloadURL string) ([]byte, error) {
request := client.R().
SetContext(ctx).
DisableAutoReadResponse()
if strings.HasPrefix(downloadURL, openAIChatGPTStartURL) {
downloadHeaders := cloneHTTPHeader(headers)
downloadHeaders.Set("Accept", "image/*,*/*;q=0.8")
downloadHeaders.Del("Content-Type")
request.SetHeaders(headerToMap(downloadHeaders))
} else {
userAgent := strings.TrimSpace(headers.Get("User-Agent"))
if userAgent == "" {
userAgent = openAIImageBackendUserAgent
}
request.SetHeader("User-Agent", userAgent)
}
resp, err := request.Get(downloadURL)
if err != nil {
return nil, err
}
defer func() {
if resp != nil && resp.Body != nil {
_ = resp.Body.Close()
}
}()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return nil, newOpenAIImageStatusError(resp, "download image bytes failed")
}
return io.ReadAll(io.LimitReader(resp.Body, openAIImageMaxDownloadBytes))
}
func handleOpenAIImageBackendError(resp *req.Response) error {
return newOpenAIImageStatusError(resp, "backend-api request failed")
}
type openAIImageStatusError struct {
StatusCode int
Message string
ResponseBody []byte
ResponseHeaders http.Header
RequestID string
URL string
}
func (e *openAIImageStatusError) Error() string {
if e == nil {
return "openai image backend request failed"
}
if e.Message != "" {
return e.Message
}
if e.StatusCode > 0 {
return fmt.Sprintf("openai image backend request failed: status %d", e.StatusCode)
}
return "openai image backend request failed"
}
func newOpenAIImageStatusError(resp *req.Response, fallback string) error {
if resp == nil {
if strings.TrimSpace(fallback) == "" {
fallback = "openai image backend request failed"
}
return fmt.Errorf("%s", fallback)
}
statusCode := resp.StatusCode
headers := http.Header(nil)
requestID := ""
requestURL := ""
body := []byte(nil)
if resp.Response != nil {
headers = resp.Header.Clone()
requestID = strings.TrimSpace(resp.Header.Get("x-request-id"))
if resp.Request != nil && resp.Request.URL != nil {
requestURL = resp.Request.URL.String()
}
if resp.Body != nil {
body, _ = io.ReadAll(io.LimitReader(resp.Body, 2<<20))
_ = resp.Body.Close()
}
}
message := sanitizeUpstreamErrorMessage(extractUpstreamErrorMessage(body))
if message == "" {
prefix := strings.TrimSpace(fallback)
if prefix == "" {
prefix = "openai image backend request failed"
}
message = fmt.Sprintf("%s: status %d", prefix, statusCode)
}
return &openAIImageStatusError{
StatusCode: statusCode,
Message: message,
ResponseBody: body,
ResponseHeaders: headers,
RequestID: requestID,
URL: requestURL,
}
}
func newOpenAIImageSyntheticStatusError(statusCode int, message string, requestURL string) *openAIImageStatusError {
message = sanitizeUpstreamErrorMessage(strings.TrimSpace(message))
if message == "" {
message = "openai image backend request failed"
}
var body []byte
if payload, err := json.Marshal(map[string]string{"detail": message}); err == nil {
body = payload
}
return &openAIImageStatusError{
StatusCode: statusCode,
Message: message,
ResponseBody: body,
URL: strings.TrimSpace(requestURL),
}
}
func isOpenAIImageTransientConversationNotFoundError(err error) bool {
statusErr, ok := err.(*openAIImageStatusError)
if !ok || statusErr == nil || statusErr.StatusCode != http.StatusNotFound {
return false
}
msg := strings.ToLower(strings.TrimSpace(statusErr.Message))
if strings.Contains(msg, "conversation_not_found") {
return true
}
if strings.Contains(msg, "conversation") && strings.Contains(msg, "not found") {
return true
}
bodyMsg := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(statusErr.ResponseBody)))
if strings.Contains(bodyMsg, "conversation_not_found") {
return true
}
return strings.Contains(bodyMsg, "conversation") && strings.Contains(bodyMsg, "not found")
}
func (s *OpenAIGatewayService) wrapOpenAIImageBackendError(
ctx context.Context,
c *gin.Context,
account *Account,
err error,
) error {
var statusErr *openAIImageStatusError
if !errors.As(err, &statusErr) || statusErr == nil {
return err
}
upstreamMsg := sanitizeUpstreamErrorMessage(statusErr.Message)
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: statusErr.StatusCode,
UpstreamRequestID: statusErr.RequestID,
UpstreamURL: safeUpstreamURL(statusErr.URL),
Kind: "request_error",
Message: upstreamMsg,
})
setOpsUpstreamError(c, statusErr.StatusCode, upstreamMsg, "")
if s.shouldFailoverOpenAIUpstreamResponse(statusErr.StatusCode, upstreamMsg, statusErr.ResponseBody) {
if s.rateLimitService != nil {
s.rateLimitService.HandleUpstreamError(ctx, account, statusErr.StatusCode, statusErr.ResponseHeaders, statusErr.ResponseBody)
}
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: statusErr.StatusCode,
UpstreamRequestID: statusErr.RequestID,
UpstreamURL: safeUpstreamURL(statusErr.URL),
Kind: "failover",
Message: upstreamMsg,
})
retryableOnSameAccount := account.IsPoolMode() && isPoolModeRetryableStatus(statusErr.StatusCode)
if strings.Contains(strings.ToLower(statusErr.Message), "unsupported challenge") {
retryableOnSameAccount = false
}
return &UpstreamFailoverError{
StatusCode: statusErr.StatusCode,
ResponseBody: statusErr.ResponseBody,
RetryableOnSameAccount: retryableOnSameAccount,
}
}
return statusErr
}
func cloneHTTPHeader(src http.Header) http.Header {
dst := make(http.Header, len(src))
for key, values := range src {
copied := make([]string, len(values))
copy(copied, values)
dst[key] = copied
}
return dst
}
func headerToMap(header http.Header) map[string]string {
if len(header) == 0 {
return nil
}
result := make(map[string]string, len(header))
for key, values := range header {
if len(values) == 0 {
continue
}
result[key] = values[0]
}
return result
}
func openAITimezoneOffsetMinutes() int {
_, offset := time.Now().Zone()
return offset / 60
}
func openAITimezoneName() string {
return time.Now().Location().String()
}
func generateOpenAIRequirementsToken(userAgent string) string {
config := []any{
"core" + strconv.Itoa(3008),
time.Now().UTC().Format(time.RFC1123),
nil,
0.123456,
coalesceOpenAIFileName(strings.TrimSpace(userAgent), openAIImageBackendUserAgent),
nil,
"prod-openai-images",
"en-US",
"en-US,en",
0,
"navigator.webdriver",
"location",
"document.body",
float64(time.Now().UnixMilli()) / 1000,
uuid.NewString(),
"",
8,
time.Now().Unix(),
}
answer, solved := generateOpenAIChallengeAnswer(strconv.FormatInt(time.Now().UnixNano(), 10), openAIImageRequirementsDiff, config)
if solved {
return "gAAAAAC" + answer
}
return ""
}
func generateOpenAIChallengeAnswer(seed string, difficulty string, config []any) (string, bool) {
diffBytes, err := hex.DecodeString(difficulty)
if err != nil {
return "", false
}
p1 := []byte(jsonCompactSlice(config[:3], true))
p2 := []byte(jsonCompactSlice(config[4:9], false))
p3 := []byte(jsonCompactSlice(config[10:], false))
seedBytes := []byte(seed)
for i := 0; i < 100000; i++ {
payload := fmt.Sprintf("%s%d,%s,%d,%s", p1, i, p2, i>>1, p3)
encoded := base64.StdEncoding.EncodeToString([]byte(payload))
sum := sha3.Sum512(append(seedBytes, []byte(encoded)...))
if bytes.Compare(sum[:len(diffBytes)], diffBytes) <= 0 {
return encoded, true
}
}
return "", false
}
func jsonCompactSlice(values []any, trimSuffixComma bool) string {
raw, _ := json.Marshal(values)
text := string(raw)
if trimSuffixComma {
return strings.TrimSuffix(text, "]")
}
return strings.TrimPrefix(text, "[")
}
func generateOpenAIProofToken(required bool, seed string, difficulty string, userAgent string) string {
if !required || strings.TrimSpace(seed) == "" || strings.TrimSpace(difficulty) == "" {
return ""
}
screen := 3008
if len(seed)%2 == 0 {
screen = 4010
}
proofToken := []any{
screen,
time.Now().UTC().Format(time.RFC1123),
nil,
0,
coalesceOpenAIFileName(strings.TrimSpace(userAgent), openAIImageBackendUserAgent),
"https://chatgpt.com/",
"dpl=openai-images",
"en",
"en-US",
nil,
"plugins[object PluginArray]",
"_reactListening",
"alert",
}
diffLen := len(difficulty)
for i := 0; i < 100000; i++ {
proofToken[3] = i
raw, _ := json.Marshal(proofToken)
encoded := base64.StdEncoding.EncodeToString(raw)
sum := sha3.Sum512([]byte(seed + encoded))
if strings.Compare(hex.EncodeToString(sum[:])[:diffLen], difficulty) <= 0 {
return "gAAAAAB" + encoded
}
}
fallbackBase := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf("%q", seed)))
return "gAAAAABwQ8Lk5FbGpA2NcR9dShT6gYjU7VxZ4D" + fallbackBase
}
func dedupeStrings(values []string) []string {
if len(values) == 0 {
return nil
}
seen := make(map[string]struct{}, len(values))
out := make([]string, 0, len(values))
for _, value := range values {
if _, ok := seen[value]; ok {
continue
}
seen[value] = struct{}{}
out = append(out, value)
}
return out
}