fix : Gemini embedding model only embeds the first text in a batch
This commit is contained in:
@@ -216,10 +216,14 @@ type GeminiEmbeddingRequest struct {
|
|||||||
OutputDimensionality int `json:"outputDimensionality,omitempty"`
|
OutputDimensionality int `json:"outputDimensionality,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type GeminiEmbeddingResponse struct {
|
type GeminiBatchEmbeddingRequest struct {
|
||||||
Embedding ContentEmbedding `json:"embedding"`
|
Requests []*GeminiEmbeddingRequest `json:"requests"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ContentEmbedding struct {
|
type GeminiEmbedding struct {
|
||||||
Values []float64 `json:"values"`
|
Values []float64 `json:"values"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type GeminiBatchEmbeddingResponse struct {
|
||||||
|
Embeddings []*GeminiEmbedding `json:"embeddings"`
|
||||||
|
}
|
||||||
|
|||||||
@@ -114,7 +114,7 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
|||||||
if strings.HasPrefix(info.UpstreamModelName, "text-embedding") ||
|
if strings.HasPrefix(info.UpstreamModelName, "text-embedding") ||
|
||||||
strings.HasPrefix(info.UpstreamModelName, "embedding") ||
|
strings.HasPrefix(info.UpstreamModelName, "embedding") ||
|
||||||
strings.HasPrefix(info.UpstreamModelName, "gemini-embedding") {
|
strings.HasPrefix(info.UpstreamModelName, "gemini-embedding") {
|
||||||
return fmt.Sprintf("%s/%s/models/%s:embedContent", info.BaseUrl, version, info.UpstreamModelName), nil
|
return fmt.Sprintf("%s/%s/models/%s:batchEmbedContents", info.BaseUrl, version, info.UpstreamModelName), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
action := "generateContent"
|
action := "generateContent"
|
||||||
@@ -156,29 +156,35 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
|
|||||||
if len(inputs) == 0 {
|
if len(inputs) == 0 {
|
||||||
return nil, errors.New("input is empty")
|
return nil, errors.New("input is empty")
|
||||||
}
|
}
|
||||||
|
// process all inputs
|
||||||
// only process the first input
|
geminiRequests := make([]map[string]interface{}, 0, len(inputs))
|
||||||
geminiRequest := dto.GeminiEmbeddingRequest{
|
for _, input := range inputs {
|
||||||
Content: dto.GeminiChatContent{
|
geminiRequest := map[string]interface{}{
|
||||||
Parts: []dto.GeminiPart{
|
"model": fmt.Sprintf("models/%s", info.UpstreamModelName),
|
||||||
{
|
"content": dto.GeminiChatContent{
|
||||||
Text: inputs[0],
|
Parts: []dto.GeminiPart{
|
||||||
|
{
|
||||||
|
Text: input,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
// set specific parameters for different models
|
|
||||||
// https://ai.google.dev/api/embeddings?hl=zh-cn#method:-models.embedcontent
|
|
||||||
switch info.UpstreamModelName {
|
|
||||||
case "text-embedding-004":
|
|
||||||
// except embedding-001 supports setting `OutputDimensionality`
|
|
||||||
if request.Dimensions > 0 {
|
|
||||||
geminiRequest.OutputDimensionality = request.Dimensions
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// set specific parameters for different models
|
||||||
|
// https://ai.google.dev/api/embeddings?hl=zh-cn#method:-models.embedcontent
|
||||||
|
switch info.UpstreamModelName {
|
||||||
|
case "text-embedding-004":
|
||||||
|
// except embedding-001 supports setting `OutputDimensionality`
|
||||||
|
if request.Dimensions > 0 {
|
||||||
|
geminiRequest["outputDimensionality"] = request.Dimensions
|
||||||
|
}
|
||||||
|
}
|
||||||
|
geminiRequests = append(geminiRequests, geminiRequest)
|
||||||
}
|
}
|
||||||
|
|
||||||
return geminiRequest, nil
|
return map[string]interface{}{
|
||||||
|
"requests": geminiRequests,
|
||||||
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
|
func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
|
||||||
|
|||||||
@@ -974,7 +974,7 @@ func GeminiEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *h
|
|||||||
return nil, types.NewOpenAIError(readErr, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
return nil, types.NewOpenAIError(readErr, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
|
|
||||||
var geminiResponse dto.GeminiEmbeddingResponse
|
var geminiResponse dto.GeminiBatchEmbeddingResponse
|
||||||
if jsonErr := common.Unmarshal(responseBody, &geminiResponse); jsonErr != nil {
|
if jsonErr := common.Unmarshal(responseBody, &geminiResponse); jsonErr != nil {
|
||||||
return nil, types.NewOpenAIError(jsonErr, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
return nil, types.NewOpenAIError(jsonErr, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
@@ -982,14 +982,16 @@ func GeminiEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *h
|
|||||||
// convert to openai format response
|
// convert to openai format response
|
||||||
openAIResponse := dto.OpenAIEmbeddingResponse{
|
openAIResponse := dto.OpenAIEmbeddingResponse{
|
||||||
Object: "list",
|
Object: "list",
|
||||||
Data: []dto.OpenAIEmbeddingResponseItem{
|
Data: make([]dto.OpenAIEmbeddingResponseItem, 0, len(geminiResponse.Embeddings)),
|
||||||
{
|
Model: info.UpstreamModelName,
|
||||||
Object: "embedding",
|
}
|
||||||
Embedding: geminiResponse.Embedding.Values,
|
|
||||||
Index: 0,
|
for i, embedding := range geminiResponse.Embeddings {
|
||||||
},
|
openAIResponse.Data = append(openAIResponse.Data, dto.OpenAIEmbeddingResponseItem{
|
||||||
},
|
Object: "embedding",
|
||||||
Model: info.UpstreamModelName,
|
Embedding: embedding.Values,
|
||||||
|
Index: i,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// calculate usage
|
// calculate usage
|
||||||
|
|||||||
Reference in New Issue
Block a user