diff --git a/relay/channel/task/gemini/adaptor.go b/relay/channel/task/gemini/adaptor.go new file mode 100644 index 00000000..1b362a17 --- /dev/null +++ b/relay/channel/task/gemini/adaptor.go @@ -0,0 +1,279 @@ +package gemini + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "net/http" + "strings" + "time" + + "github.com/QuantumNous/new-api/constant" + "github.com/QuantumNous/new-api/dto" + "github.com/QuantumNous/new-api/model" + "github.com/QuantumNous/new-api/relay/channel" + relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/service" + "github.com/QuantumNous/new-api/setting/model_setting" + "github.com/gin-gonic/gin" +) + +// ============================ +// Request / Response structures +// ============================ + +type requestPayload struct { + Instances []map[string]any `json:"instances"` + Parameters map[string]any `json:"parameters,omitempty"` +} + +type submitResponse struct { + Name string `json:"name"` +} + +type operationVideo struct { + MimeType string `json:"mimeType"` + BytesBase64Encoded string `json:"bytesBase64Encoded"` + Encoding string `json:"encoding"` +} + +type operationResponse struct { + Name string `json:"name"` + Done bool `json:"done"` + Response struct { + Type string `json:"@type"` + RaiMediaFilteredCount int `json:"raiMediaFilteredCount"` + Videos []operationVideo `json:"videos"` + BytesBase64Encoded string `json:"bytesBase64Encoded"` + Encoding string `json:"encoding"` + Video string `json:"video"` + GenerateVideoResponse struct { + GeneratedSamples []struct { + Video struct { + URI string `json:"uri"` + } `json:"video"` + } `json:"generatedSamples"` + } `json:"generateVideoResponse"` + } `json:"response"` + Error struct { + Message string `json:"message"` + } `json:"error"` +} + +// ============================ +// Adaptor implementation +// ============================ + +type TaskAdaptor struct { + ChannelType int + apiKey string + baseURL string +} + +func (a *TaskAdaptor) Init(info *relaycommon.RelayInfo) { + a.ChannelType = info.ChannelType + a.baseURL = info.ChannelBaseUrl + a.apiKey = info.ApiKey +} + +// ValidateRequestAndSetAction parses body, validates fields and sets default action. +func (a *TaskAdaptor) ValidateRequestAndSetAction(c *gin.Context, info *relaycommon.RelayInfo) (taskErr *dto.TaskError) { + // Use the standard validation method for TaskSubmitReq + return relaycommon.ValidateBasicTaskRequest(c, info, constant.TaskActionTextGenerate) +} + +// BuildRequestURL constructs the upstream URL. +func (a *TaskAdaptor) BuildRequestURL(info *relaycommon.RelayInfo) (string, error) { + modelName := info.OriginModelName + if modelName == "" { + modelName = "veo-3.0-generate-001" + } + + version := model_setting.GetGeminiVersionSetting(modelName) + + return fmt.Sprintf( + "%s/%s/models/%s:predictLongRunning", + a.baseURL, + version, + modelName, + ), nil +} + +// BuildRequestHeader sets required headers. +func (a *TaskAdaptor) BuildRequestHeader(c *gin.Context, req *http.Request, info *relaycommon.RelayInfo) error { + req.Header.Set("Content-Type", "application/json") + req.Header.Set("Accept", "application/json") + req.Header.Set("x-goog-api-key", a.apiKey) + return nil +} + +// BuildRequestBody converts request into Gemini specific format. +func (a *TaskAdaptor) BuildRequestBody(c *gin.Context, info *relaycommon.RelayInfo) (io.Reader, error) { + v, ok := c.Get("task_request") + if !ok { + return nil, fmt.Errorf("request not found in context") + } + req := v.(relaycommon.TaskSubmitReq) + + body := requestPayload{ + Instances: []map[string]any{{"prompt": req.Prompt}}, + Parameters: map[string]any{}, + } + + // Add Veo-specific parameters from metadata + if req.Metadata != nil { + if v, ok := req.Metadata["aspectRatio"]; ok { + body.Parameters["aspectRatio"] = v + } else { + body.Parameters["aspectRatio"] = "16:9" // default + } + + if v, ok := req.Metadata["negativePrompt"]; ok { + body.Parameters["negativePrompt"] = v + } + + if v, ok := req.Metadata["durationSeconds"]; ok { + body.Parameters["durationSeconds"] = v + } else { + body.Parameters["durationSeconds"] = "8" // default + } + + if v, ok := req.Metadata["resolution"]; ok { + body.Parameters["resolution"] = v + } + + if v, ok := req.Metadata["personGeneration"]; ok { + body.Parameters["personGeneration"] = v + } else { + body.Parameters["personGeneration"] = "allow_adult" // default + } + } + + data, err := json.Marshal(body) + if err != nil { + return nil, err + } + return bytes.NewReader(data), nil +} + +// DoRequest delegates to common helper. +func (a *TaskAdaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (*http.Response, error) { + return channel.DoTaskApiRequest(a, c, info, requestBody) +} + +// DoResponse handles upstream response, returns taskID etc. +func (a *TaskAdaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (taskID string, taskData []byte, taskErr *dto.TaskError) { + responseBody, err := io.ReadAll(resp.Body) + if err != nil { + return "", nil, service.TaskErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError) + } + _ = resp.Body.Close() + + var s submitResponse + if err := json.Unmarshal(responseBody, &s); err != nil { + return "", nil, service.TaskErrorWrapper(err, "unmarshal_response_failed", http.StatusInternalServerError) + } + if strings.TrimSpace(s.Name) == "" { + return "", nil, service.TaskErrorWrapper(fmt.Errorf("missing operation name"), "invalid_response", http.StatusInternalServerError) + } + localID := encodeLocalTaskID(s.Name) + c.JSON(http.StatusOK, gin.H{"task_id": localID}) + return localID, responseBody, nil +} + +func (a *TaskAdaptor) GetModelList() []string { + return []string{"veo-3.0-generate-001"} +} + +func (a *TaskAdaptor) GetChannelName() string { + return "gemini" +} + +// FetchTask fetch task status +func (a *TaskAdaptor) FetchTask(baseUrl, key string, body map[string]any) (*http.Response, error) { + taskID, ok := body["task_id"].(string) + if !ok { + return nil, fmt.Errorf("invalid task_id") + } + + upstreamName, err := decodeLocalTaskID(taskID) + if err != nil { + return nil, fmt.Errorf("decode task_id failed: %w", err) + } + + // For Gemini API, we use GET request to the operations endpoint + version := model_setting.GetGeminiVersionSetting("veo-3.0-generate-001") + url := fmt.Sprintf("%s/%s/%s", baseUrl, version, upstreamName) + + req, err := http.NewRequest(http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + req.Header.Set("Accept", "application/json") + req.Header.Set("x-goog-api-key", key) + + return service.GetHttpClient().Do(req) +} + +func (a *TaskAdaptor) ParseTaskResult(respBody []byte) (*relaycommon.TaskInfo, error) { + var op operationResponse + if err := json.Unmarshal(respBody, &op); err != nil { + return nil, fmt.Errorf("unmarshal operation response failed: %w", err) + } + + ti := &relaycommon.TaskInfo{} + + if op.Error.Message != "" { + ti.Status = model.TaskStatusFailure + ti.Reason = op.Error.Message + ti.Progress = "100%" + return ti, nil + } + + if !op.Done { + ti.Status = model.TaskStatusInProgress + ti.Progress = "50%" + return ti, nil + } + + ti.Status = model.TaskStatusSuccess + ti.Progress = "100%" + + // Extract URL from generateVideoResponse if available + if len(op.Response.GenerateVideoResponse.GeneratedSamples) > 0 { + if uri := op.Response.GenerateVideoResponse.GeneratedSamples[0].Video.URI; uri != "" { + ti.Url = uri + } + } + + return ti, nil +} + +// ============================ +// helpers +// ============================ + +func encodeLocalTaskID(name string) string { + // Add timestamp to ensure uniqueness + timestamp := time.Now().Unix() + data := fmt.Sprintf("%s:%d", name, timestamp) + return base64.RawURLEncoding.EncodeToString([]byte(data)) +} + +func decodeLocalTaskID(local string) (string, error) { + b, err := base64.RawURLEncoding.DecodeString(local) + if err != nil { + return "", err + } + + // Extract the operation name from encoded data (remove timestamp if present) + parts := strings.Split(string(b), ":") + if len(parts) > 0 { + return parts[0], nil + } + + return string(b), nil +} diff --git a/relay/relay_adaptor.go b/relay/relay_adaptor.go index 485abe5a..7fb47da1 100644 --- a/relay/relay_adaptor.go +++ b/relay/relay_adaptor.go @@ -28,6 +28,7 @@ import ( "github.com/QuantumNous/new-api/relay/channel/siliconflow" "github.com/QuantumNous/new-api/relay/channel/submodel" taskdoubao "github.com/QuantumNous/new-api/relay/channel/task/doubao" + taskGemini "github.com/QuantumNous/new-api/relay/channel/task/gemini" taskjimeng "github.com/QuantumNous/new-api/relay/channel/task/jimeng" "github.com/QuantumNous/new-api/relay/channel/task/kling" tasksora "github.com/QuantumNous/new-api/relay/channel/task/sora" @@ -141,6 +142,8 @@ func GetTaskAdaptor(platform constant.TaskPlatform) channel.TaskAdaptor { return &taskdoubao.TaskAdaptor{} case constant.ChannelTypeSora, constant.ChannelTypeOpenAI: return &tasksora.TaskAdaptor{} + case constant.ChannelTypeGemini: + return &taskGemini.TaskAdaptor{} } } return nil