Merge pull request #1537 from RedwindA/feat/support-native-gemini-embedding
feat: 支持原生Gemini Embedding格式
This commit is contained in:
@@ -114,7 +114,11 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||
if strings.HasPrefix(info.UpstreamModelName, "text-embedding") ||
|
||||
strings.HasPrefix(info.UpstreamModelName, "embedding") ||
|
||||
strings.HasPrefix(info.UpstreamModelName, "gemini-embedding") {
|
||||
return fmt.Sprintf("%s/%s/models/%s:batchEmbedContents", info.BaseUrl, version, info.UpstreamModelName), nil
|
||||
action := "embedContent"
|
||||
if info.IsGeminiBatchEmbedding {
|
||||
action = "batchEmbedContents"
|
||||
}
|
||||
return fmt.Sprintf("%s/%s/models/%s:%s", info.BaseUrl, version, info.UpstreamModelName, action), nil
|
||||
}
|
||||
|
||||
action := "generateContent"
|
||||
@@ -159,6 +163,9 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
|
||||
if len(inputs) == 0 {
|
||||
return nil, errors.New("input is empty")
|
||||
}
|
||||
// We always build a batch-style payload with `requests`, so ensure we call the
|
||||
// batch endpoint upstream to avoid payload/endpoint mismatches.
|
||||
info.IsGeminiBatchEmbedding = true
|
||||
// process all inputs
|
||||
geminiRequests := make([]map[string]interface{}, 0, len(inputs))
|
||||
for _, input := range inputs {
|
||||
@@ -176,7 +183,7 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
|
||||
// set specific parameters for different models
|
||||
// https://ai.google.dev/api/embeddings?hl=zh-cn#method:-models.embedcontent
|
||||
switch info.UpstreamModelName {
|
||||
case "text-embedding-004","gemini-embedding-exp-03-07","gemini-embedding-001":
|
||||
case "text-embedding-004", "gemini-embedding-exp-03-07", "gemini-embedding-001":
|
||||
// Only newer models introduced after 2024 support OutputDimensionality
|
||||
if request.Dimensions > 0 {
|
||||
geminiRequest["outputDimensionality"] = request.Dimensions
|
||||
@@ -201,6 +208,10 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
|
||||
|
||||
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
|
||||
if info.RelayMode == constant.RelayModeGemini {
|
||||
if strings.HasSuffix(info.RequestURLPath, ":embedContent") ||
|
||||
strings.HasSuffix(info.RequestURLPath, ":batchEmbedContents") {
|
||||
return NativeGeminiEmbeddingHandler(c, resp, info)
|
||||
}
|
||||
if info.IsStream {
|
||||
return GeminiTextGenerationStreamHandler(c, info, resp)
|
||||
} else {
|
||||
@@ -225,18 +236,6 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
|
||||
return GeminiChatHandler(c, info, resp)
|
||||
}
|
||||
|
||||
//if usage.(*dto.Usage).CompletionTokenDetails.ReasoningTokens > 100 {
|
||||
// // 没有请求-thinking的情况下,产生思考token,则按照思考模型计费
|
||||
// if !strings.HasSuffix(info.OriginModelName, "-thinking") &&
|
||||
// !strings.HasSuffix(info.OriginModelName, "-nothinking") {
|
||||
// thinkingModelName := info.OriginModelName + "-thinking"
|
||||
// if operation_setting.SelfUseModeEnabled || helper.ContainPriceOrRatio(thinkingModelName) {
|
||||
// info.OriginModelName = thinkingModelName
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
return nil, types.NewError(errors.New("not implemented"), types.ErrorCodeBadResponseBody)
|
||||
}
|
||||
|
||||
func (a *Adaptor) GetModelList() []string {
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package gemini
|
||||
|
||||
import (
|
||||
"github.com/pkg/errors"
|
||||
"io"
|
||||
"net/http"
|
||||
"one-api/common"
|
||||
@@ -12,6 +11,8 @@ import (
|
||||
"one-api/types"
|
||||
"strings"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
@@ -63,6 +64,42 @@ func GeminiTextGenerationHandler(c *gin.Context, info *relaycommon.RelayInfo, re
|
||||
return &usage, nil
|
||||
}
|
||||
|
||||
func NativeGeminiEmbeddingHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.Usage, *types.NewAPIError) {
|
||||
defer common.CloseResponseBodyGracefully(resp)
|
||||
|
||||
responseBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
|
||||
if common.DebugEnabled {
|
||||
println(string(responseBody))
|
||||
}
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: info.PromptTokens,
|
||||
TotalTokens: info.PromptTokens,
|
||||
}
|
||||
|
||||
if info.IsGeminiBatchEmbedding {
|
||||
var geminiResponse dto.GeminiBatchEmbeddingResponse
|
||||
err = common.Unmarshal(responseBody, &geminiResponse)
|
||||
if err != nil {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
} else {
|
||||
var geminiResponse dto.GeminiEmbeddingResponse
|
||||
err = common.Unmarshal(responseBody, &geminiResponse)
|
||||
if err != nil {
|
||||
return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
|
||||
}
|
||||
}
|
||||
|
||||
common.IOCopyBytesGracefully(c, resp, responseBody)
|
||||
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
func GeminiTextGenerationStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
|
||||
var usage = &dto.Usage{}
|
||||
var imageCount int
|
||||
|
||||
Reference in New Issue
Block a user