feat: add gemini thinking suffix support #981
This commit is contained in:
19
dto/dalle.go
19
dto/dalle.go
@@ -1,14 +1,17 @@
|
|||||||
package dto
|
package dto
|
||||||
|
|
||||||
|
import "encoding/json"
|
||||||
|
|
||||||
type ImageRequest struct {
|
type ImageRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Prompt string `json:"prompt" binding:"required"`
|
Prompt string `json:"prompt" binding:"required"`
|
||||||
N int `json:"n,omitempty"`
|
N int `json:"n,omitempty"`
|
||||||
Size string `json:"size,omitempty"`
|
Size string `json:"size,omitempty"`
|
||||||
Quality string `json:"quality,omitempty"`
|
Quality string `json:"quality,omitempty"`
|
||||||
ResponseFormat string `json:"response_format,omitempty"`
|
ResponseFormat string `json:"response_format,omitempty"`
|
||||||
Style string `json:"style,omitempty"`
|
Style string `json:"style,omitempty"`
|
||||||
User string `json:"user,omitempty"`
|
User string `json:"user,omitempty"`
|
||||||
|
ExtraFields json.RawMessage `json:"extra_fields,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ImageResponse struct {
|
type ImageResponse struct {
|
||||||
|
|||||||
@@ -173,3 +173,17 @@ type Usage struct {
|
|||||||
PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"`
|
PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"`
|
||||||
CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
|
CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type InputTokenDetails struct {
|
||||||
|
CachedTokens int `json:"cached_tokens"`
|
||||||
|
CachedCreationTokens int `json:"-"`
|
||||||
|
TextTokens int `json:"text_tokens"`
|
||||||
|
AudioTokens int `json:"audio_tokens"`
|
||||||
|
ImageTokens int `json:"image_tokens"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type OutputTokenDetails struct {
|
||||||
|
TextTokens int `json:"text_tokens"`
|
||||||
|
AudioTokens int `json:"audio_tokens"`
|
||||||
|
ReasoningTokens int `json:"reasoning_tokens"`
|
||||||
|
}
|
||||||
|
|||||||
@@ -43,20 +43,6 @@ type RealtimeUsage struct {
|
|||||||
OutputTokenDetails OutputTokenDetails `json:"output_token_details"`
|
OutputTokenDetails OutputTokenDetails `json:"output_token_details"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type InputTokenDetails struct {
|
|
||||||
CachedTokens int `json:"cached_tokens"`
|
|
||||||
CachedCreationTokens int `json:"-"`
|
|
||||||
TextTokens int `json:"text_tokens"`
|
|
||||||
AudioTokens int `json:"audio_tokens"`
|
|
||||||
ImageTokens int `json:"image_tokens"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type OutputTokenDetails struct {
|
|
||||||
TextTokens int `json:"text_tokens"`
|
|
||||||
AudioTokens int `json:"audio_tokens"`
|
|
||||||
ReasoningTokens int `json:"reasoning_tokens"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type RealtimeSession struct {
|
type RealtimeSession struct {
|
||||||
Modalities []string `json:"modalities"`
|
Modalities []string `json:"modalities"`
|
||||||
Instructions string `json:"instructions"`
|
Instructions string `json:"instructions"`
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ import (
|
|||||||
relaycommon "one-api/relay/common"
|
relaycommon "one-api/relay/common"
|
||||||
"one-api/service"
|
"one-api/service"
|
||||||
"one-api/setting/model_setting"
|
"one-api/setting/model_setting"
|
||||||
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
@@ -70,6 +69,16 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||||
|
|
||||||
|
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
||||||
|
// suffix -thinking and -nothinking
|
||||||
|
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
||||||
|
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
|
||||||
|
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
|
||||||
|
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
version := model_setting.GetGeminiVersionSetting(info.UpstreamModelName)
|
version := model_setting.GetGeminiVersionSetting(info.UpstreamModelName)
|
||||||
|
|
||||||
if strings.HasPrefix(info.UpstreamModelName, "imagen") {
|
if strings.HasPrefix(info.UpstreamModelName, "imagen") {
|
||||||
@@ -99,11 +108,13 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
|
|||||||
if request == nil {
|
if request == nil {
|
||||||
return nil, errors.New("request is nil")
|
return nil, errors.New("request is nil")
|
||||||
}
|
}
|
||||||
ai, err := CovertGemini2OpenAI(*request, info)
|
|
||||||
|
geminiRequest, err := CovertGemini2OpenAI(*request, info)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return ai, nil
|
|
||||||
|
return geminiRequest, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
|
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
|
||||||
@@ -165,6 +176,18 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
|
|||||||
} else {
|
} else {
|
||||||
err, usage = GeminiChatHandler(c, resp, info)
|
err, usage = GeminiChatHandler(c, resp, info)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//if usage.(*dto.Usage).CompletionTokenDetails.ReasoningTokens > 100 {
|
||||||
|
// // 没有请求-thinking的情况下,产生思考token,则按照思考模型计费
|
||||||
|
// if !strings.HasSuffix(info.OriginModelName, "-thinking") &&
|
||||||
|
// !strings.HasSuffix(info.OriginModelName, "-nothinking") {
|
||||||
|
// thinkingModelName := info.OriginModelName + "-thinking"
|
||||||
|
// if operation_setting.SelfUseModeEnabled || helper.ContainPriceOrRatio(thinkingModelName) {
|
||||||
|
// info.OriginModelName = thinkingModelName
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,15 @@ type GeminiChatRequest struct {
|
|||||||
SystemInstructions *GeminiChatContent `json:"system_instruction,omitempty"`
|
SystemInstructions *GeminiChatContent `json:"system_instruction,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type GeminiThinkingConfig struct {
|
||||||
|
IncludeThoughts bool `json:"includeThoughts,omitempty"`
|
||||||
|
ThinkingBudget *int `json:"thinkingBudget,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *GeminiThinkingConfig) SetThinkingBudget(budget int) {
|
||||||
|
c.ThinkingBudget = &budget
|
||||||
|
}
|
||||||
|
|
||||||
type GeminiInlineData struct {
|
type GeminiInlineData struct {
|
||||||
MimeType string `json:"mimeType"`
|
MimeType string `json:"mimeType"`
|
||||||
Data string `json:"data"`
|
Data string `json:"data"`
|
||||||
@@ -71,16 +80,17 @@ type GeminiChatTool struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type GeminiChatGenerationConfig struct {
|
type GeminiChatGenerationConfig struct {
|
||||||
Temperature *float64 `json:"temperature,omitempty"`
|
Temperature *float64 `json:"temperature,omitempty"`
|
||||||
TopP float64 `json:"topP,omitempty"`
|
TopP float64 `json:"topP,omitempty"`
|
||||||
TopK float64 `json:"topK,omitempty"`
|
TopK float64 `json:"topK,omitempty"`
|
||||||
MaxOutputTokens uint `json:"maxOutputTokens,omitempty"`
|
MaxOutputTokens uint `json:"maxOutputTokens,omitempty"`
|
||||||
CandidateCount int `json:"candidateCount,omitempty"`
|
CandidateCount int `json:"candidateCount,omitempty"`
|
||||||
StopSequences []string `json:"stopSequences,omitempty"`
|
StopSequences []string `json:"stopSequences,omitempty"`
|
||||||
ResponseMimeType string `json:"responseMimeType,omitempty"`
|
ResponseMimeType string `json:"responseMimeType,omitempty"`
|
||||||
ResponseSchema any `json:"responseSchema,omitempty"`
|
ResponseSchema any `json:"responseSchema,omitempty"`
|
||||||
Seed int64 `json:"seed,omitempty"`
|
Seed int64 `json:"seed,omitempty"`
|
||||||
ResponseModalities []string `json:"responseModalities,omitempty"`
|
ResponseModalities []string `json:"responseModalities,omitempty"`
|
||||||
|
ThinkingConfig *GeminiThinkingConfig `json:"thinkingConfig,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type GeminiChatCandidate struct {
|
type GeminiChatCandidate struct {
|
||||||
@@ -109,6 +119,7 @@ type GeminiUsageMetadata struct {
|
|||||||
PromptTokenCount int `json:"promptTokenCount"`
|
PromptTokenCount int `json:"promptTokenCount"`
|
||||||
CandidatesTokenCount int `json:"candidatesTokenCount"`
|
CandidatesTokenCount int `json:"candidatesTokenCount"`
|
||||||
TotalTokenCount int `json:"totalTokenCount"`
|
TotalTokenCount int `json:"totalTokenCount"`
|
||||||
|
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Imagen related structs
|
// Imagen related structs
|
||||||
|
|||||||
@@ -23,12 +23,14 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
|||||||
|
|
||||||
geminiRequest := GeminiChatRequest{
|
geminiRequest := GeminiChatRequest{
|
||||||
Contents: make([]GeminiChatContent, 0, len(textRequest.Messages)),
|
Contents: make([]GeminiChatContent, 0, len(textRequest.Messages)),
|
||||||
//SafetySettings: []GeminiChatSafetySettings{},
|
|
||||||
GenerationConfig: GeminiChatGenerationConfig{
|
GenerationConfig: GeminiChatGenerationConfig{
|
||||||
Temperature: textRequest.Temperature,
|
Temperature: textRequest.Temperature,
|
||||||
TopP: textRequest.TopP,
|
TopP: textRequest.TopP,
|
||||||
MaxOutputTokens: textRequest.MaxTokens,
|
MaxOutputTokens: textRequest.MaxTokens,
|
||||||
Seed: int64(textRequest.Seed),
|
Seed: int64(textRequest.Seed),
|
||||||
|
ThinkingConfig: &GeminiThinkingConfig{
|
||||||
|
IncludeThoughts: true,
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,6 +41,18 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
||||||
|
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
||||||
|
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
||||||
|
if budgetTokens == 0 || budgetTokens > 24576 {
|
||||||
|
budgetTokens = 24576
|
||||||
|
}
|
||||||
|
geminiRequest.GenerationConfig.ThinkingConfig.SetThinkingBudget(int(budgetTokens))
|
||||||
|
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
|
||||||
|
geminiRequest.GenerationConfig.ThinkingConfig.SetThinkingBudget(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
safetySettings := make([]GeminiChatSafetySettings, 0, len(SafetySettingList))
|
safetySettings := make([]GeminiChatSafetySettings, 0, len(SafetySettingList))
|
||||||
for _, category := range SafetySettingList {
|
for _, category := range SafetySettingList {
|
||||||
safetySettings = append(safetySettings, GeminiChatSafetySettings{
|
safetySettings = append(safetySettings, GeminiChatSafetySettings{
|
||||||
@@ -644,6 +658,7 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom
|
|||||||
if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
|
if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
|
||||||
usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
|
usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
|
||||||
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
|
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
|
||||||
|
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||||
}
|
}
|
||||||
err = helper.ObjectData(c, response)
|
err = helper.ObjectData(c, response)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -666,7 +681,7 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom
|
|||||||
|
|
||||||
usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
|
usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
|
||||||
usage.PromptTokensDetails.TextTokens = usage.PromptTokens
|
usage.PromptTokensDetails.TextTokens = usage.PromptTokens
|
||||||
usage.CompletionTokenDetails.TextTokens = usage.CompletionTokens
|
//usage.CompletionTokenDetails.TextTokens = usage.CompletionTokens
|
||||||
|
|
||||||
if info.ShouldIncludeUsage {
|
if info.ShouldIncludeUsage {
|
||||||
response = helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
|
response = helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
|
||||||
@@ -712,6 +727,9 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
|
|||||||
CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount,
|
CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount,
|
||||||
TotalTokens: geminiResponse.UsageMetadata.TotalTokenCount,
|
TotalTokens: geminiResponse.UsageMetadata.TotalTokenCount,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||||
|
|
||||||
fullTextResponse.Usage = usage
|
fullTextResponse.Usage = usage
|
||||||
jsonResponse, err := json.Marshal(fullTextResponse)
|
jsonResponse, err := json.Marshal(fullTextResponse)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|||||||
@@ -49,11 +49,7 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !acceptUnsetRatio {
|
if !acceptUnsetRatio {
|
||||||
if info.UserId == 1 {
|
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置,请联系管理员设置或开始自用模式;Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName)
|
||||||
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置,请设置或开始自用模式;Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName)
|
|
||||||
} else {
|
|
||||||
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置, 请联系管理员设置;Model %s ratio or price not set, please contact administrator to set", info.OriginModelName, info.OriginModelName)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
completionRatio = operation_setting.GetCompletionRatio(info.OriginModelName)
|
completionRatio = operation_setting.GetCompletionRatio(info.OriginModelName)
|
||||||
@@ -82,3 +78,15 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
|
|||||||
|
|
||||||
return priceData, nil
|
return priceData, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ContainPriceOrRatio(modelName string) bool {
|
||||||
|
_, ok := operation_setting.GetModelPrice(modelName, false)
|
||||||
|
if ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
_, ok = operation_setting.GetModelRatio(modelName)
|
||||||
|
if ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,9 +6,11 @@ import (
|
|||||||
|
|
||||||
// GeminiSettings 定义Gemini模型的配置
|
// GeminiSettings 定义Gemini模型的配置
|
||||||
type GeminiSettings struct {
|
type GeminiSettings struct {
|
||||||
SafetySettings map[string]string `json:"safety_settings"`
|
SafetySettings map[string]string `json:"safety_settings"`
|
||||||
VersionSettings map[string]string `json:"version_settings"`
|
VersionSettings map[string]string `json:"version_settings"`
|
||||||
SupportedImagineModels []string `json:"supported_imagine_models"`
|
SupportedImagineModels []string `json:"supported_imagine_models"`
|
||||||
|
ThinkingAdapterEnabled bool `json:"thinking_adapter_enabled"`
|
||||||
|
ThinkingAdapterBudgetTokensPercentage float64 `json:"thinking_adapter_budget_tokens_percentage"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// 默认配置
|
// 默认配置
|
||||||
@@ -25,6 +27,8 @@ var defaultGeminiSettings = GeminiSettings{
|
|||||||
"gemini-2.0-flash-exp-image-generation",
|
"gemini-2.0-flash-exp-image-generation",
|
||||||
"gemini-2.0-flash-exp",
|
"gemini-2.0-flash-exp",
|
||||||
},
|
},
|
||||||
|
ThinkingAdapterEnabled: false,
|
||||||
|
ThinkingAdapterBudgetTokensPercentage: 0.6,
|
||||||
}
|
}
|
||||||
|
|
||||||
// 全局实例
|
// 全局实例
|
||||||
|
|||||||
@@ -86,89 +86,92 @@ var defaultModelRatio = map[string]float64{
|
|||||||
"text-curie-001": 1,
|
"text-curie-001": 1,
|
||||||
//"text-davinci-002": 10,
|
//"text-davinci-002": 10,
|
||||||
//"text-davinci-003": 10,
|
//"text-davinci-003": 10,
|
||||||
"text-davinci-edit-001": 10,
|
"text-davinci-edit-001": 10,
|
||||||
"code-davinci-edit-001": 10,
|
"code-davinci-edit-001": 10,
|
||||||
"whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
|
"whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens
|
||||||
"tts-1": 7.5, // 1k characters -> $0.015
|
"tts-1": 7.5, // 1k characters -> $0.015
|
||||||
"tts-1-1106": 7.5, // 1k characters -> $0.015
|
"tts-1-1106": 7.5, // 1k characters -> $0.015
|
||||||
"tts-1-hd": 15, // 1k characters -> $0.03
|
"tts-1-hd": 15, // 1k characters -> $0.03
|
||||||
"tts-1-hd-1106": 15, // 1k characters -> $0.03
|
"tts-1-hd-1106": 15, // 1k characters -> $0.03
|
||||||
"davinci": 10,
|
"davinci": 10,
|
||||||
"curie": 10,
|
"curie": 10,
|
||||||
"babbage": 10,
|
"babbage": 10,
|
||||||
"ada": 10,
|
"ada": 10,
|
||||||
"text-embedding-3-small": 0.01,
|
"text-embedding-3-small": 0.01,
|
||||||
"text-embedding-3-large": 0.065,
|
"text-embedding-3-large": 0.065,
|
||||||
"text-embedding-ada-002": 0.05,
|
"text-embedding-ada-002": 0.05,
|
||||||
"text-search-ada-doc-001": 10,
|
"text-search-ada-doc-001": 10,
|
||||||
"text-moderation-stable": 0.1,
|
"text-moderation-stable": 0.1,
|
||||||
"text-moderation-latest": 0.1,
|
"text-moderation-latest": 0.1,
|
||||||
"claude-instant-1": 0.4, // $0.8 / 1M tokens
|
"claude-instant-1": 0.4, // $0.8 / 1M tokens
|
||||||
"claude-2.0": 4, // $8 / 1M tokens
|
"claude-2.0": 4, // $8 / 1M tokens
|
||||||
"claude-2.1": 4, // $8 / 1M tokens
|
"claude-2.1": 4, // $8 / 1M tokens
|
||||||
"claude-3-haiku-20240307": 0.125, // $0.25 / 1M tokens
|
"claude-3-haiku-20240307": 0.125, // $0.25 / 1M tokens
|
||||||
"claude-3-5-haiku-20241022": 0.5, // $1 / 1M tokens
|
"claude-3-5-haiku-20241022": 0.5, // $1 / 1M tokens
|
||||||
"claude-3-sonnet-20240229": 1.5, // $3 / 1M tokens
|
"claude-3-sonnet-20240229": 1.5, // $3 / 1M tokens
|
||||||
"claude-3-5-sonnet-20240620": 1.5,
|
"claude-3-5-sonnet-20240620": 1.5,
|
||||||
"claude-3-5-sonnet-20241022": 1.5,
|
"claude-3-5-sonnet-20241022": 1.5,
|
||||||
"claude-3-7-sonnet-20250219": 1.5,
|
"claude-3-7-sonnet-20250219": 1.5,
|
||||||
"claude-3-7-sonnet-20250219-thinking": 1.5,
|
"claude-3-7-sonnet-20250219-thinking": 1.5,
|
||||||
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
|
"claude-3-opus-20240229": 7.5, // $15 / 1M tokens
|
||||||
"ERNIE-4.0-8K": 0.120 * RMB,
|
"ERNIE-4.0-8K": 0.120 * RMB,
|
||||||
"ERNIE-3.5-8K": 0.012 * RMB,
|
"ERNIE-3.5-8K": 0.012 * RMB,
|
||||||
"ERNIE-3.5-8K-0205": 0.024 * RMB,
|
"ERNIE-3.5-8K-0205": 0.024 * RMB,
|
||||||
"ERNIE-3.5-8K-1222": 0.012 * RMB,
|
"ERNIE-3.5-8K-1222": 0.012 * RMB,
|
||||||
"ERNIE-Bot-8K": 0.024 * RMB,
|
"ERNIE-Bot-8K": 0.024 * RMB,
|
||||||
"ERNIE-3.5-4K-0205": 0.012 * RMB,
|
"ERNIE-3.5-4K-0205": 0.012 * RMB,
|
||||||
"ERNIE-Speed-8K": 0.004 * RMB,
|
"ERNIE-Speed-8K": 0.004 * RMB,
|
||||||
"ERNIE-Speed-128K": 0.004 * RMB,
|
"ERNIE-Speed-128K": 0.004 * RMB,
|
||||||
"ERNIE-Lite-8K-0922": 0.008 * RMB,
|
"ERNIE-Lite-8K-0922": 0.008 * RMB,
|
||||||
"ERNIE-Lite-8K-0308": 0.003 * RMB,
|
"ERNIE-Lite-8K-0308": 0.003 * RMB,
|
||||||
"ERNIE-Tiny-8K": 0.001 * RMB,
|
"ERNIE-Tiny-8K": 0.001 * RMB,
|
||||||
"BLOOMZ-7B": 0.004 * RMB,
|
"BLOOMZ-7B": 0.004 * RMB,
|
||||||
"Embedding-V1": 0.002 * RMB,
|
"Embedding-V1": 0.002 * RMB,
|
||||||
"bge-large-zh": 0.002 * RMB,
|
"bge-large-zh": 0.002 * RMB,
|
||||||
"bge-large-en": 0.002 * RMB,
|
"bge-large-en": 0.002 * RMB,
|
||||||
"tao-8k": 0.002 * RMB,
|
"tao-8k": 0.002 * RMB,
|
||||||
"PaLM-2": 1,
|
"PaLM-2": 1,
|
||||||
"gemini-1.5-pro-latest": 1.25, // $3.5 / 1M tokens
|
"gemini-1.5-pro-latest": 1.25, // $3.5 / 1M tokens
|
||||||
"gemini-1.5-flash-latest": 0.075,
|
"gemini-1.5-flash-latest": 0.075,
|
||||||
"gemini-2.0-flash": 0.05,
|
"gemini-2.0-flash": 0.05,
|
||||||
"gemini-2.5-pro-exp-03-25": 0.625,
|
"gemini-2.5-pro-exp-03-25": 0.625,
|
||||||
"gemini-2.5-pro-preview-03-25": 0.625,
|
"gemini-2.5-pro-preview-03-25": 0.625,
|
||||||
"text-embedding-004": 0.001,
|
"gemini-2.5-flash-preview-04-17": 0.075,
|
||||||
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
|
"gemini-2.5-flash-preview-04-17-thinking": 0.075,
|
||||||
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
|
"gemini-2.5-flash-preview-04-17-nothinking": 0.075,
|
||||||
"chatglm_std": 0.3572, // ¥0.005 / 1k tokens
|
"text-embedding-004": 0.001,
|
||||||
"chatglm_lite": 0.1429, // ¥0.002 / 1k tokens
|
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
|
||||||
"glm-4": 7.143, // ¥0.1 / 1k tokens
|
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
|
||||||
"glm-4v": 0.05 * RMB, // ¥0.05 / 1k tokens
|
"chatglm_std": 0.3572, // ¥0.005 / 1k tokens
|
||||||
"glm-4-alltools": 0.1 * RMB, // ¥0.1 / 1k tokens
|
"chatglm_lite": 0.1429, // ¥0.002 / 1k tokens
|
||||||
"glm-3-turbo": 0.3572,
|
"glm-4": 7.143, // ¥0.1 / 1k tokens
|
||||||
"glm-4-plus": 0.05 * RMB,
|
"glm-4v": 0.05 * RMB, // ¥0.05 / 1k tokens
|
||||||
"glm-4-0520": 0.1 * RMB,
|
"glm-4-alltools": 0.1 * RMB, // ¥0.1 / 1k tokens
|
||||||
"glm-4-air": 0.001 * RMB,
|
"glm-3-turbo": 0.3572,
|
||||||
"glm-4-airx": 0.01 * RMB,
|
"glm-4-plus": 0.05 * RMB,
|
||||||
"glm-4-long": 0.001 * RMB,
|
"glm-4-0520": 0.1 * RMB,
|
||||||
"glm-4-flash": 0,
|
"glm-4-air": 0.001 * RMB,
|
||||||
"glm-4v-plus": 0.01 * RMB,
|
"glm-4-airx": 0.01 * RMB,
|
||||||
"qwen-turbo": 0.8572, // ¥0.012 / 1k tokens
|
"glm-4-long": 0.001 * RMB,
|
||||||
"qwen-plus": 10, // ¥0.14 / 1k tokens
|
"glm-4-flash": 0,
|
||||||
"text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens
|
"glm-4v-plus": 0.01 * RMB,
|
||||||
"SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens
|
"qwen-turbo": 0.8572, // ¥0.012 / 1k tokens
|
||||||
"SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens
|
"qwen-plus": 10, // ¥0.14 / 1k tokens
|
||||||
"SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens
|
"text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens
|
||||||
"SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens
|
"SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens
|
||||||
"SparkDesk-v4.0": 1.2858,
|
"SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens
|
||||||
"360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens
|
"SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens
|
||||||
"360gpt-turbo": 0.0858, // ¥0.0012 / 1k tokens
|
"SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens
|
||||||
"360gpt-turbo-responsibility-8k": 0.8572, // ¥0.012 / 1k tokens
|
"SparkDesk-v4.0": 1.2858,
|
||||||
"360gpt-pro": 0.8572, // ¥0.012 / 1k tokens
|
"360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens
|
||||||
"360gpt2-pro": 0.8572, // ¥0.012 / 1k tokens
|
"360gpt-turbo": 0.0858, // ¥0.0012 / 1k tokens
|
||||||
"embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens
|
"360gpt-turbo-responsibility-8k": 0.8572, // ¥0.012 / 1k tokens
|
||||||
"embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
"360gpt-pro": 0.8572, // ¥0.012 / 1k tokens
|
||||||
"semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
"360gpt2-pro": 0.8572, // ¥0.012 / 1k tokens
|
||||||
"hunyuan": 7.143, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
|
"embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens
|
||||||
|
"embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||||
|
"semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens
|
||||||
|
"hunyuan": 7.143, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
|
||||||
// https://platform.lingyiwanwu.com/docs#-计费单元
|
// https://platform.lingyiwanwu.com/docs#-计费单元
|
||||||
// 已经按照 7.2 来换算美元价格
|
// 已经按照 7.2 来换算美元价格
|
||||||
"yi-34b-chat-0205": 0.18,
|
"yi-34b-chat-0205": 0.18,
|
||||||
@@ -275,8 +278,6 @@ func InitModelSettings() {
|
|||||||
cacheRatioMapMutex.Lock()
|
cacheRatioMapMutex.Lock()
|
||||||
cacheRatioMap = defaultCacheRatio
|
cacheRatioMap = defaultCacheRatio
|
||||||
cacheRatioMapMutex.Unlock()
|
cacheRatioMapMutex.Unlock()
|
||||||
|
|
||||||
common.SysLog("model settings initialized")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetModelPriceMap() map[string]float64 {
|
func GetModelPriceMap() map[string]float64 {
|
||||||
@@ -459,6 +460,12 @@ func getHardcodedCompletionModelRatio(name string) (float64, bool) {
|
|||||||
return 4, true
|
return 4, true
|
||||||
} else if strings.HasPrefix(name, "gemini-2.5-pro-preview") {
|
} else if strings.HasPrefix(name, "gemini-2.5-pro-preview") {
|
||||||
return 8, true
|
return 8, true
|
||||||
|
} else if strings.HasPrefix(name, "gemini-2.5-flash-preview") {
|
||||||
|
if strings.HasSuffix(name, "-nothinking") {
|
||||||
|
return 4, false
|
||||||
|
} else {
|
||||||
|
return 3.5 / 0.6, false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 4, false
|
return 4, false
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,8 @@ const ModelSetting = () => {
|
|||||||
'global.pass_through_request_enabled': false,
|
'global.pass_through_request_enabled': false,
|
||||||
'general_setting.ping_interval_enabled': false,
|
'general_setting.ping_interval_enabled': false,
|
||||||
'general_setting.ping_interval_seconds': 60,
|
'general_setting.ping_interval_seconds': 60,
|
||||||
|
'gemini.thinking_adapter_enabled': false,
|
||||||
|
'gemini.thinking_adapter_budget_tokens_percentage': 0.6,
|
||||||
});
|
});
|
||||||
|
|
||||||
let [loading, setLoading] = useState(false);
|
let [loading, setLoading] = useState(false);
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import {
|
|||||||
verifyJSON,
|
verifyJSON,
|
||||||
} from '../../../helpers';
|
} from '../../../helpers';
|
||||||
import { useTranslation } from 'react-i18next';
|
import { useTranslation } from 'react-i18next';
|
||||||
|
import Text from '@douyinfe/semi-ui/lib/es/typography/text.js';
|
||||||
|
|
||||||
const GEMINI_SETTING_EXAMPLE = {
|
const GEMINI_SETTING_EXAMPLE = {
|
||||||
default: 'OFF',
|
default: 'OFF',
|
||||||
@@ -27,6 +28,8 @@ export default function SettingGeminiModel(props) {
|
|||||||
'gemini.safety_settings': '',
|
'gemini.safety_settings': '',
|
||||||
'gemini.version_settings': '',
|
'gemini.version_settings': '',
|
||||||
'gemini.supported_imagine_models': [],
|
'gemini.supported_imagine_models': [],
|
||||||
|
'gemini.thinking_adapter_enabled': false,
|
||||||
|
'gemini.thinking_adapter_budget_tokens_percentage': 0.6,
|
||||||
});
|
});
|
||||||
const refForm = useRef();
|
const refForm = useRef();
|
||||||
const [inputsRow, setInputsRow] = useState(inputs);
|
const [inputsRow, setInputsRow] = useState(inputs);
|
||||||
@@ -151,13 +154,69 @@ export default function SettingGeminiModel(props) {
|
|||||||
/>
|
/>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
</Form.Section>
|
||||||
|
|
||||||
|
<Form.Section text={t('Gemini思考适配设置')}>
|
||||||
<Row>
|
<Row>
|
||||||
<Button size='default' onClick={onSubmit}>
|
<Col span={16}>
|
||||||
{t('保存')}
|
<Text>
|
||||||
</Button>
|
{t(
|
||||||
|
"和Claude不同,默认情况下Gemini的思考模型会自动决定要不要思考,就算不开启适配模型也可以正常使用," +
|
||||||
|
"-nothinking后缀(BudgetTokens=0,思考关闭)也会返回少量的思考token,这是gemini的特性," +
|
||||||
|
"如果您需要计费,推荐设置无后缀模型价格按思考价格设置"
|
||||||
|
)}
|
||||||
|
</Text>
|
||||||
|
</Col>
|
||||||
|
</Row>
|
||||||
|
<Row>
|
||||||
|
<Col span={16}>
|
||||||
|
<Form.Switch
|
||||||
|
label={t('启用Gemini思考后缀适配')}
|
||||||
|
field={'gemini.thinking_adapter_enabled'}
|
||||||
|
extraText={"适配-thinking和-nothinking后缀"}
|
||||||
|
onChange={(value) =>
|
||||||
|
setInputs({
|
||||||
|
...inputs,
|
||||||
|
'gemini.thinking_adapter_enabled': value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
</Col>
|
||||||
|
</Row>
|
||||||
|
<Row>
|
||||||
|
<Col span={16}>
|
||||||
|
<Text>
|
||||||
|
{t(
|
||||||
|
'Gemini思考适配 BudgetTokens = MaxTokens * BudgetTokens 百分比',
|
||||||
|
)}
|
||||||
|
</Text>
|
||||||
|
</Col>
|
||||||
|
</Row>
|
||||||
|
<Row>
|
||||||
|
<Col xs={24} sm={12} md={8} lg={8} xl={8}>
|
||||||
|
<Form.InputNumber
|
||||||
|
label={t('请求模型带-thinking后缀的BudgetTokens数(超出24576的部分将被忽略)')}
|
||||||
|
field={'gemini.thinking_adapter_budget_tokens_percentage'}
|
||||||
|
initValue={''}
|
||||||
|
extraText={t('0.1-1之间的小数')}
|
||||||
|
min={0.1}
|
||||||
|
max={1}
|
||||||
|
onChange={(value) =>
|
||||||
|
setInputs({
|
||||||
|
...inputs,
|
||||||
|
'gemini.thinking_adapter_budget_tokens_percentage': value,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
/>
|
||||||
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
</Form.Section>
|
</Form.Section>
|
||||||
|
|
||||||
|
<Row>
|
||||||
|
<Button size='default' onClick={onSubmit}>
|
||||||
|
{t('保存')}
|
||||||
|
</Button>
|
||||||
|
</Row>
|
||||||
</Form>
|
</Form>
|
||||||
</Spin>
|
</Spin>
|
||||||
</>
|
</>
|
||||||
|
|||||||
Reference in New Issue
Block a user