feat(gemini): update request structures for Veo predictLongRunning

- Refactored the request URL and body construction methods to align with the Veo predictLongRunning endpoint. - Introduced new data structures for Veo instances and parameters, replacing the previous Gemini video generation configurations. - Updated the Vertex adaptor to utilize the new Veo request payload format.
2026-02-28 18:42:54 +08:00
parent 8103b4b1a7
commit e22f59e449
3 changed files with 51 additions and 67 deletions
--- a/relay/channel/task/gemini/dto.go
+++ b/relay/channel/task/gemini/dto.go
@@ -1,16 +1,5 @@
 package gemini

-// GeminiVideoGenerationConfig represents the Gemini API GenerateVideosConfig.
-// Reference: https://ai.google.dev/gemini-api/docs/video
-type GeminiVideoGenerationConfig struct {
-	AspectRatio      string `json:"aspectRatio,omitempty"`
-	DurationSeconds  int    `json:"durationSeconds,omitempty"`
-	NegativePrompt   string `json:"negativePrompt,omitempty"`
-	PersonGeneration string `json:"personGeneration,omitempty"`
-	Resolution       string `json:"resolution,omitempty"`
-	NumberOfVideos   int    `json:"numberOfVideos,omitempty"`
-}
-
 // VeoImageInput represents an image input for Veo image-to-video.
 // Used by both Gemini and Vertex adaptors.
 type VeoImageInput struct {
@@ -18,17 +7,36 @@ type VeoImageInput struct {
 	MimeType           string `json:"mimeType"`
 }

-// GeminiVideoPayload is the top-level request body for the Gemini API
-// models/{model}:generateVideos endpoint.
-type GeminiVideoPayload struct {
-	Model  string                       `json:"model,omitempty"`
-	Prompt string                       `json:"prompt"`
-	Image  *VeoImageInput               `json:"image,omitempty"`
-	Config *GeminiVideoGenerationConfig `json:"config,omitempty"`
+// VeoInstance represents a single instance in the Veo predictLongRunning request.
+type VeoInstance struct {
+	Prompt string         `json:"prompt"`
+	Image  *VeoImageInput `json:"image,omitempty"`
 	// TODO: support referenceImages (style/asset references, up to 3 images)
 	// TODO: support lastFrame (first+last frame interpolation, Veo 3.1)
 }

+// VeoParameters represents the parameters block for Veo predictLongRunning.
+type VeoParameters struct {
+	SampleCount        int    `json:"sampleCount"`
+	DurationSeconds    int    `json:"durationSeconds,omitempty"`
+	AspectRatio        string `json:"aspectRatio,omitempty"`
+	Resolution         string `json:"resolution,omitempty"`
+	NegativePrompt     string `json:"negativePrompt,omitempty"`
+	PersonGeneration   string `json:"personGeneration,omitempty"`
+	StorageUri         string `json:"storageUri,omitempty"`
+	CompressionQuality string `json:"compressionQuality,omitempty"`
+	ResizeMode         string `json:"resizeMode,omitempty"`
+	Seed               *int   `json:"seed,omitempty"`
+	GenerateAudio      *bool  `json:"generateAudio,omitempty"`
+}
+
+// VeoRequestPayload is the top-level request body for the Veo
+// predictLongRunning endpoint (used by both Gemini and Vertex).
+type VeoRequestPayload struct {
+	Instances  []VeoInstance  `json:"instances"`
+	Parameters *VeoParameters `json:"parameters,omitempty"`
+}
+
 type submitResponse struct {
 	Name string `json:"name"`
 }