From b77bf11b02f4f630f0dd5a67b12e81836b2d4cd3 Mon Sep 17 00:00:00 2001 From: feitianbubu Date: Sat, 18 Oct 2025 14:45:13 +0800 Subject: [PATCH 1/5] feat: add gemini veo3.1 --- relay/channel/task/gemini/adaptor.go | 279 +++++++++++++++++++++++++++ relay/relay_adaptor.go | 3 + 2 files changed, 282 insertions(+) create mode 100644 relay/channel/task/gemini/adaptor.go diff --git a/relay/channel/task/gemini/adaptor.go b/relay/channel/task/gemini/adaptor.go new file mode 100644 index 00000000..1b362a17 --- /dev/null +++ b/relay/channel/task/gemini/adaptor.go @@ -0,0 +1,279 @@ +package gemini + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/QuantumNous/new-api/constant" + "github.com/QuantumNous/new-api/dto" + "github.com/QuantumNous/new-api/model" + "github.com/QuantumNous/new-api/relay/channel" + relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/service" + "github.com/QuantumNous/new-api/setting/model_setting" + "github.com/gin-gonic/gin" +) + +// ============================ +// Request / Response structures +// ============================ + +type requestPayload struct { + Instances []map[string]any `json:"instances"` + Parameters map[string]any `json:"parameters,omitempty"` +} + +type submitResponse struct { + Name string `json:"name"` +} + +type operationVideo struct { + MimeType string `json:"mimeType"` + BytesBase64Encoded string `json:"bytesBase64Encoded"` + Encoding string `json:"encoding"` +} + +type operationResponse struct { + Name string `json:"name"` + Done bool `json:"done"` + Response struct { + Type string `json:"@type"` + RaiMediaFilteredCount int `json:"raiMediaFilteredCount"` + Videos []operationVideo `json:"videos"` + BytesBase64Encoded string `json:"bytesBase64Encoded"` + Encoding string `json:"encoding"` + Video string `json:"video"` + GenerateVideoResponse struct { + GeneratedSamples []struct { + Video struct { + URI string `json:"uri"` + } `json:"video"` + } `json:"generatedSamples"` + } `json:"generateVideoResponse"` + } `json:"response"` + Error struct { + Message string `json:"message"` + } `json:"error"` +} + +// ============================ +// Adaptor implementation +// ============================ + +type TaskAdaptor struct { + ChannelType int + apiKey string + baseURL string +} + +func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) { + a.ChannelType = info.ChannelType + a.baseURL = info.ChannelBaseUrl + a.apiKey = info.ApiKey +} + +// ValidateRequestAndSetAction parses body, validates fields and sets default action. +func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.TaskError) { + // Use the standard validation method for TaskSubmitReq + return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate) +} + +// BuildRequestURL constructs the upstream URL. +func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) { + modelName := info.OriginModelName + if modelName == "" { + modelName = "veo-3.0-generate-001" + } + + version := model_setting.GetGeminiVersionSetting(modelName) + + return fmt.Sprintf( + "%s/%s/models/%s:predictLongRunning", + a.baseURL, + version, + modelName, + ), nil +} + +// BuildRequestHeader sets required headers. +func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info *relaycommon.RelayInfo) error { + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + req.Header.Set("x-goog-api-key", a.apiKey) + return nil +} + +// BuildRequestBody converts request into Gemini specific format. +func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) { + v, ok := c.Get("task_request") + if !ok { + return nil, fmt.Errorf("request not found in context") + } + req := v.(relaycommon.TaskSubmitReq) + + body := requestPayload{ + Instances: []map[string]any{{"prompt": req.Prompt}}, + Parameters: map[string]any{}, + } + + // Add Veo-specific parameters from metadata + if req.Metadata != nil { + if v, ok := req.Metadata["aspectRatio"]; ok { + body.Parameters["aspectRatio"] = v + } else { + body.Parameters["aspectRatio"] = "16:9" // default + } + + if v, ok := req.Metadata["negativePrompt"]; ok { + body.Parameters["negativePrompt"] = v + } + + if v, ok := req.Metadata["durationSeconds"]; ok { + body.Parameters["durationSeconds"] = v + } else { + body.Parameters["durationSeconds"] = "8" // default + } + + if v, ok := req.Metadata["resolution"]; ok { + body.Parameters["resolution"] = v + } + + if v, ok := req.Metadata["personGeneration"]; ok { + body.Parameters["personGeneration"] = v + } else { + body.Parameters["personGeneration"] = "allow_adult" // default + } + } + + data, err := json.Marshal(body) + if err != nil { + return nil, err + } + return bytes.NewReader(data), nil +} + +// DoRequest delegates to common helper. +func (a *TaskAdaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (*http.Response, error) { + return channel.DoTaskApiRequest(a, c, info, requestBody) +} + +// DoResponse handles upstream response, returns taskID etc. +func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (taskID string, taskData []byte, taskErr *dto.TaskError) { + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return "", nil, service.TaskErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError) + } + _ = resp.Body.Close() + + var s submitResponse + if err := json.Unmarshal(responseBody, &s); err != nil { + return "", nil, service.TaskErrorWrapper(err, "unmarshal_response_failed", http.StatusInternalServerError) + } + if strings.TrimSpace(s.Name) == "" { + return "", nil, service.TaskErrorWrapper(fmt.Errorf("missing operation name"), "invalid_response", http.StatusInternalServerError) + } + localID := encodeLocalTaskID(s.Name) + c.JSON(http.StatusOK, gin.H{"task_id": localID}) + return localID, responseBody, nil +} + +func (a *TaskAdaptor) GetModelList() []string { + return []string{"veo-3.0-generate-001"} +} + +func (a *TaskAdaptor) GetChannelName() string { + return "gemini" +} + +// FetchTask fetch task status +func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) { + taskID, ok := body["task_id"].(string) + if !ok { + return nil, fmt.Errorf("invalid task_id") + } + + upstreamName, err := decodeLocalTaskID(taskID) + if err != nil { + return nil, fmt.Errorf("decode task_id failed: %w", err) + } + + // For Gemini API, we use GET request to the operations endpoint + version := model_setting.GetGeminiVersionSetting("veo-3.0-generate-001") + url := fmt.Sprintf("%s/%s/%s", baseUrl, version, upstreamName) + + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + req.Header.Set("Accept", "application/json") + req.Header.Set("x-goog-api-key", key) + + return service.GetHttpClient().Do(req) +} + +func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, error) { + var op operationResponse + if err := json.Unmarshal(respBody, &op); err != nil { + return nil, fmt.Errorf("unmarshal operation response failed: %w", err) + } + + ti := &relaycommon.TaskInfo{} + + if op.Error.Message != "" { + ti.Status = model.TaskStatusFailure + ti.Reason = op.Error.Message + ti.Progress = "100%" + return ti, nil + } + + if !op.Done { + ti.Status = model.TaskStatusInProgress + ti.Progress = "50%" + return ti, nil + } + + ti.Status = model.TaskStatusSuccess + ti.Progress = "100%" + + // Extract URL from generateVideoResponse if available + if len(op.Response.GenerateVideoResponse.GeneratedSamples) > 0 { + if uri := op.Response.GenerateVideoResponse.GeneratedSamples[0].Video.URI; uri != "" { + ti.Url = uri + } + } + + return ti, nil +} + +// ============================ +// helpers +// ============================ + +func encodeLocalTaskID(name string) string { + // Add timestamp to ensure uniqueness + timestamp := time.Now().Unix() + data := fmt.Sprintf("%s:%d", name, timestamp) + return base64.RawURLEncoding.EncodeToString([]byte(data)) +} + +func decodeLocalTaskID(local string) (string, error) { + b, err := base64.RawURLEncoding.DecodeString(local) + if err != nil { + return "", err + } + + // Extract the operation name from encoded data (remove timestamp if present) + parts := strings.Split(string(b), ":") + if len(parts) > 0 { + return parts[0], nil + } + + return string(b), nil +} diff --git a/relay/relay_adaptor.go b/relay/relay_adaptor.go index 485abe5a..7fb47da1 100644 --- a/relay/relay_adaptor.go +++ b/relay/relay_adaptor.go @@ -28,6 +28,7 @@ import ( "github.com/QuantumNous/new-api/relay/channel/siliconflow" "github.com/QuantumNous/new-api/relay/channel/submodel" taskdoubao "github.com/QuantumNous/new-api/relay/channel/task/doubao" + taskGemini "github.com/QuantumNous/new-api/relay/channel/task/gemini" taskjimeng "github.com/QuantumNous/new-api/relay/channel/task/jimeng" "github.com/QuantumNous/new-api/relay/channel/task/kling" tasksora "github.com/QuantumNous/new-api/relay/channel/task/sora" @@ -141,6 +142,8 @@ func GetTaskAdaptor(platform constant.TaskPlatform) channel.TaskAdaptor { return &taskdoubao.TaskAdaptor{} case constant.ChannelTypeSora, constant.ChannelTypeOpenAI: return &tasksora.TaskAdaptor{} + case constant.ChannelTypeGemini: + return &taskGemini.TaskAdaptor{} } } return nil From f0a727ccb8e8c0880a469cb794a3e9ef0a591f28 Mon Sep 17 00:00:00 2001 From: feitianbubu Date: Sat, 18 Oct 2025 20:31:11 +0800 Subject: [PATCH 2/5] feat: veo video url use proxy download --- controller/video_proxy.go | 31 ++++++++++++++++++++---- relay/channel/task/gemini/adaptor.go | 35 +++++++++++----------------- 2 files changed, 40 insertions(+), 26 deletions(-) diff --git a/controller/video_proxy.go b/controller/video_proxy.go index 17e466ae..df4aa911 100644 --- a/controller/video_proxy.go +++ b/controller/video_proxy.go @@ -4,8 +4,10 @@ import ( "fmt" "io" "net/http" + "net/url" "time" + "github.com/QuantumNous/new-api/constant" "github.com/QuantumNous/new-api/logger" "github.com/QuantumNous/new-api/model" @@ -36,7 +38,7 @@ func VideoProxy(c *gin.Context) { return } if !exists || task == nil { - logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to get task %s: %s", taskID, err.Error())) + logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to get task %s: %v", taskID, err)) c.JSON(http.StatusNotFound, gin.H{ "error": gin.H{ "message": "Task not found", @@ -71,15 +73,15 @@ func VideoProxy(c *gin.Context) { if baseURL == "" { baseURL = "https://api.openai.com" } - videoURL := fmt.Sprintf("%s/v1/videos/%s/content", baseURL, task.TaskID) + var videoURL string client := &http.Client{ Timeout: 60 * time.Second, } - req, err := http.NewRequestWithContext(c.Request.Context(), http.MethodGet, videoURL, nil) + req, err := http.NewRequestWithContext(c.Request.Context(), http.MethodGet, "", nil) if err != nil { - logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to create request for %s: %s", videoURL, err.Error())) + logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to create request: %s", err.Error())) c.JSON(http.StatusInternalServerError, gin.H{ "error": gin.H{ "message": "Failed to create proxy request", @@ -89,7 +91,26 @@ func VideoProxy(c *gin.Context) { return } - req.Header.Set("Authorization", "Bearer "+channel.Key) + if channel.Type == constant.ChannelTypeGemini { + videoURL = fmt.Sprintf("%s&key=%s", c.Query("url"), channel.Key) + req.Header.Set("x-goog-api-key", channel.Key) + } else { + // Default (Sora, etc.): Use original logic + videoURL = fmt.Sprintf("%s/v1/videos/%s/content", baseURL, task.TaskID) + req.Header.Set("Authorization", "Bearer "+channel.Key) + } + + req.URL, err = url.Parse(videoURL) + if err != nil { + logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to parse URL %s: %s", videoURL, err.Error())) + c.JSON(http.StatusInternalServerError, gin.H{ + "error": gin.H{ + "message": "Failed to create proxy request", + "type": "server_error", + }, + }) + return + } resp, err := client.Do(req) if err != nil { diff --git a/relay/channel/task/gemini/adaptor.go b/relay/channel/task/gemini/adaptor.go index 1b362a17..368dc534 100644 --- a/relay/channel/task/gemini/adaptor.go +++ b/relay/channel/task/gemini/adaptor.go @@ -17,6 +17,7 @@ import ( relaycommon "github.com/QuantumNous/new-api/relay/common" "github.com/QuantumNous/new-api/service" "github.com/QuantumNous/new-api/setting/model_setting" + "github.com/QuantumNous/new-api/setting/system_setting" "github.com/gin-gonic/gin" ) @@ -87,10 +88,6 @@ func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycom // BuildRequestURL constructs the upstream URL. func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) { modelName := info.OriginModelName - if modelName == "" { - modelName = "veo-3.0-generate-001" - } - version := model_setting.GetGeminiVersionSetting(modelName) return fmt.Sprintf( @@ -178,13 +175,18 @@ func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *rela if strings.TrimSpace(s.Name) == "" { return "", nil, service.TaskErrorWrapper(fmt.Errorf("missing operation name"), "invalid_response", http.StatusInternalServerError) } - localID := encodeLocalTaskID(s.Name) - c.JSON(http.StatusOK, gin.H{"task_id": localID}) - return localID, responseBody, nil + taskID = encodeLocalTaskID(s.Name) + ov := dto.NewOpenAIVideo() + ov.ID = taskID + ov.TaskID = taskID + ov.CreatedAt = time.Now().Unix() + ov.Model = info.OriginModelName + c.JSON(http.StatusOK, ov) + return taskID, responseBody, nil } func (a *TaskAdaptor) GetModelList() []string { - return []string{"veo-3.0-generate-001"} + return []string{"veo-3.0-generate-001", "veo-3.1-generate-preview", "veo-3.1-fast-generate-preview"} } func (a *TaskAdaptor) GetChannelName() string { @@ -204,7 +206,7 @@ func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http } // For Gemini API, we use GET request to the operations endpoint - version := model_setting.GetGeminiVersionSetting("veo-3.0-generate-001") + version := model_setting.GetGeminiVersionSetting("default") url := fmt.Sprintf("%s/%s/%s", baseUrl, version, upstreamName) req, err := http.NewRequest(http.MethodGet, url, nil) @@ -245,7 +247,8 @@ func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, e // Extract URL from generateVideoResponse if available if len(op.Response.GenerateVideoResponse.GeneratedSamples) > 0 { if uri := op.Response.GenerateVideoResponse.GeneratedSamples[0].Video.URI; uri != "" { - ti.Url = uri + taskID := encodeLocalTaskID(op.Name) + ti.Url = fmt.Sprintf("%s/v1/videos/%s/content?url=%s", system_setting.ServerAddress, taskID, uri) } } @@ -257,10 +260,7 @@ func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, e // ============================ func encodeLocalTaskID(name string) string { - // Add timestamp to ensure uniqueness - timestamp := time.Now().Unix() - data := fmt.Sprintf("%s:%d", name, timestamp) - return base64.RawURLEncoding.EncodeToString([]byte(data)) + return base64.RawURLEncoding.EncodeToString([]byte(name)) } func decodeLocalTaskID(local string) (string, error) { @@ -268,12 +268,5 @@ func decodeLocalTaskID(local string) (string, error) { if err != nil { return "", err } - - // Extract the operation name from encoded data (remove timestamp if present) - parts := strings.Split(string(b), ":") - if len(parts) > 0 { - return parts[0], nil - } - return string(b), nil } From fa81fe93968f69163dfdd3d7e9cd75bbb415d48e Mon Sep 17 00:00:00 2001 From: feitianbubu Date: Sat, 18 Oct 2025 21:37:39 +0800 Subject: [PATCH 3/5] feat: gemini veo req to struct --- relay/channel/task/gemini/adaptor.go | 67 ++++++++++++++-------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/relay/channel/task/gemini/adaptor.go b/relay/channel/task/gemini/adaptor.go index 368dc534..62ec3833 100644 --- a/relay/channel/task/gemini/adaptor.go +++ b/relay/channel/task/gemini/adaptor.go @@ -19,15 +19,32 @@ import ( "github.com/QuantumNous/new-api/setting/model_setting" "github.com/QuantumNous/new-api/setting/system_setting" "github.com/gin-gonic/gin" + "github.com/pkg/errors" ) // ============================ // Request / Response structures // ============================ -type requestPayload struct { - Instances []map[string]any `json:"instances"` - Parameters map[string]any `json:"parameters,omitempty"` +// GeminiVideoGenerationConfig represents the video generation configuration +// Based on: https://ai.google.dev/gemini-api/docs/video +type GeminiVideoGenerationConfig struct { + AspectRatio string `json:"aspectRatio,omitempty"` // "16:9" or "9:16" + DurationSeconds float64 `json:"durationSeconds,omitempty"` // 4, 6, or 8 (as number) + NegativePrompt string `json:"negativePrompt,omitempty"` // unwanted elements + PersonGeneration string `json:"personGeneration,omitempty"` // "allow_all" for text-to-video, "allow_adult" for image-to-video + Resolution string `json:"resolution,omitempty"` // video resolution +} + +// GeminiVideoRequest represents a single video generation instance +type GeminiVideoRequest struct { + Prompt string `json:"prompt"` +} + +// GeminiVideoPayload represents the complete video generation request payload +type GeminiVideoPayload struct { + Instances []GeminiVideoRequest `json:"instances"` + Parameters GeminiVideoGenerationConfig `json:"parameters,omitempty"` } type submitResponse struct { @@ -114,38 +131,22 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn } req := v.(relaycommon.TaskSubmitReq) - body := requestPayload{ - Instances: []map[string]any{{"prompt": req.Prompt}}, - Parameters: map[string]any{}, + // Create structured video generation request + body := GeminiVideoPayload{ + Instances: []GeminiVideoRequest{ + {Prompt: req.Prompt}, + }, + Parameters: GeminiVideoGenerationConfig{}, } - // Add Veo-specific parameters from metadata - if req.Metadata != nil { - if v, ok := req.Metadata["aspectRatio"]; ok { - body.Parameters["aspectRatio"] = v - } else { - body.Parameters["aspectRatio"] = "16:9" // default - } - - if v, ok := req.Metadata["negativePrompt"]; ok { - body.Parameters["negativePrompt"] = v - } - - if v, ok := req.Metadata["durationSeconds"]; ok { - body.Parameters["durationSeconds"] = v - } else { - body.Parameters["durationSeconds"] = "8" // default - } - - if v, ok := req.Metadata["resolution"]; ok { - body.Parameters["resolution"] = v - } - - if v, ok := req.Metadata["personGeneration"]; ok { - body.Parameters["personGeneration"] = v - } else { - body.Parameters["personGeneration"] = "allow_adult" // default - } + metadata := req.Metadata + medaBytes, err := json.Marshal(metadata) + if err != nil { + return nil, errors.Wrap(err, "metadata marshal metadata failed") + } + err = json.Unmarshal(medaBytes, &body.Parameters) + if err != nil { + return nil, errors.Wrap(err, "unmarshal metadata failed") } data, err := json.Marshal(body) From 37776c5083511c22b8acd972e7e0c5182aa5ef8a Mon Sep 17 00:00:00 2001 From: IcedTangerine Date: Sat, 18 Oct 2025 23:06:25 +0800 Subject: [PATCH 4/5] Fix error logging for channel retrieval failure --- controller/video_proxy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controller/video_proxy.go b/controller/video_proxy.go index df4aa911..2bfb0dc2 100644 --- a/controller/video_proxy.go +++ b/controller/video_proxy.go @@ -60,7 +60,7 @@ func VideoProxy(c *gin.Context) { channel, err := model.CacheGetChannel(task.ChannelId) if err != nil { - logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to get channel %d: %s", task.ChannelId, err.Error())) + logger.LogError(c.Request.Context(), fmt.Sprintf("Failed to get task %s: not found", taskID)) c.JSON(http.StatusInternalServerError, gin.H{ "error": gin.H{ "message": "Failed to retrieve channel information", From 5c792263ba07696a46c39c7141b09939ce1bd28d Mon Sep 17 00:00:00 2001 From: IcedTangerine Date: Sat, 18 Oct 2025 23:07:50 +0800 Subject: [PATCH 5/5] Add type assertion for task_request in adaptor.go --- relay/channel/task/gemini/adaptor.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/relay/channel/task/gemini/adaptor.go b/relay/channel/task/gemini/adaptor.go index 62ec3833..092059c6 100644 --- a/relay/channel/task/gemini/adaptor.go +++ b/relay/channel/task/gemini/adaptor.go @@ -129,7 +129,10 @@ func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayIn if !ok { return nil, fmt.Errorf("request not found in context") } - req := v.(relaycommon.TaskSubmitReq) + req, ok := v.(relaycommon.TaskSubmitReq) + if !ok { + return nil, fmt.Errorf("unexpected task_request type") + } // Create structured video generation request body := GeminiVideoPayload{