diff --git a/dto/dalle.go b/dto/dalle.go index d0bba655..562d5f1a 100644 --- a/dto/dalle.go +++ b/dto/dalle.go @@ -1,14 +1,17 @@ package dto +import "encoding/json" + type ImageRequest struct { - Model string `json:"model"` - Prompt string `json:"prompt" binding:"required"` - N int `json:"n,omitempty"` - Size string `json:"size,omitempty"` - Quality string `json:"quality,omitempty"` - ResponseFormat string `json:"response_format,omitempty"` - Style string `json:"style,omitempty"` - User string `json:"user,omitempty"` + Model string `json:"model"` + Prompt string `json:"prompt" binding:"required"` + N int `json:"n,omitempty"` + Size string `json:"size,omitempty"` + Quality string `json:"quality,omitempty"` + ResponseFormat string `json:"response_format,omitempty"` + Style string `json:"style,omitempty"` + User string `json:"user,omitempty"` + ExtraFields json.RawMessage `json:"extra_fields,omitempty"` } type ImageResponse struct { diff --git a/dto/openai_response.go b/dto/openai_response.go index fe2609bf..ddd1a907 100644 --- a/dto/openai_response.go +++ b/dto/openai_response.go @@ -173,3 +173,17 @@ type Usage struct { PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"` CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"` } + +type InputTokenDetails struct { + CachedTokens int `json:"cached_tokens"` + CachedCreationTokens int `json:"-"` + TextTokens int `json:"text_tokens"` + AudioTokens int `json:"audio_tokens"` + ImageTokens int `json:"image_tokens"` +} + +type OutputTokenDetails struct { + TextTokens int `json:"text_tokens"` + AudioTokens int `json:"audio_tokens"` + ReasoningTokens int `json:"reasoning_tokens"` +} diff --git a/dto/realtime.go b/dto/realtime.go index bb572267..86ae352d 100644 --- a/dto/realtime.go +++ b/dto/realtime.go @@ -43,20 +43,6 @@ type RealtimeUsage struct { OutputTokenDetails OutputTokenDetails `json:"output_token_details"` } -type InputTokenDetails struct { - CachedTokens int `json:"cached_tokens"` - CachedCreationTokens int `json:"-"` - TextTokens int `json:"text_tokens"` - AudioTokens int `json:"audio_tokens"` - ImageTokens int `json:"image_tokens"` -} - -type OutputTokenDetails struct { - TextTokens int `json:"text_tokens"` - AudioTokens int `json:"audio_tokens"` - ReasoningTokens int `json:"reasoning_tokens"` -} - type RealtimeSession struct { Modalities []string `json:"modalities"` Instructions string `json:"instructions"` diff --git a/relay/channel/gemini/adaptor.go b/relay/channel/gemini/adaptor.go index e5ee134a..feaed8f4 100644 --- a/relay/channel/gemini/adaptor.go +++ b/relay/channel/gemini/adaptor.go @@ -12,7 +12,6 @@ import ( relaycommon "one-api/relay/common" "one-api/service" "one-api/setting/model_setting" - "strings" "github.com/gin-gonic/gin" @@ -70,6 +69,16 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) { } func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { + + if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { + // suffix -thinking and -nothinking + if strings.HasSuffix(info.OriginModelName, "-thinking") { + info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking") + } else if strings.HasSuffix(info.OriginModelName, "-nothinking") { + info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking") + } + } + version := model_setting.GetGeminiVersionSetting(info.UpstreamModelName) if strings.HasPrefix(info.UpstreamModelName, "imagen") { @@ -99,11 +108,13 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn if request == nil { return nil, errors.New("request is nil") } - ai, err := CovertGemini2OpenAI(*request, info) + + geminiRequest, err := CovertGemini2OpenAI(*request, info) if err != nil { return nil, err } - return ai, nil + + return geminiRequest, nil } func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) { @@ -165,6 +176,18 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom } else { err, usage = GeminiChatHandler(c, resp, info) } + + //if usage.(*dto.Usage).CompletionTokenDetails.ReasoningTokens > 100 { + // // 没有请求-thinking的情况下,产生思考token,则按照思考模型计费 + // if !strings.HasSuffix(info.OriginModelName, "-thinking") && + // !strings.HasSuffix(info.OriginModelName, "-nothinking") { + // thinkingModelName := info.OriginModelName + "-thinking" + // if operation_setting.SelfUseModeEnabled || helper.ContainPriceOrRatio(thinkingModelName) { + // info.OriginModelName = thinkingModelName + // } + // } + //} + return } diff --git a/relay/channel/gemini/dto.go b/relay/channel/gemini/dto.go index 7f98b1b7..5d5c1287 100644 --- a/relay/channel/gemini/dto.go +++ b/relay/channel/gemini/dto.go @@ -8,6 +8,15 @@ type GeminiChatRequest struct { SystemInstructions *GeminiChatContent `json:"system_instruction,omitempty"` } +type GeminiThinkingConfig struct { + IncludeThoughts bool `json:"includeThoughts,omitempty"` + ThinkingBudget *int `json:"thinkingBudget,omitempty"` +} + +func (c *GeminiThinkingConfig) SetThinkingBudget(budget int) { + c.ThinkingBudget = &budget +} + type GeminiInlineData struct { MimeType string `json:"mimeType"` Data string `json:"data"` @@ -71,16 +80,17 @@ type GeminiChatTool struct { } type GeminiChatGenerationConfig struct { - Temperature *float64 `json:"temperature,omitempty"` - TopP float64 `json:"topP,omitempty"` - TopK float64 `json:"topK,omitempty"` - MaxOutputTokens uint `json:"maxOutputTokens,omitempty"` - CandidateCount int `json:"candidateCount,omitempty"` - StopSequences []string `json:"stopSequences,omitempty"` - ResponseMimeType string `json:"responseMimeType,omitempty"` - ResponseSchema any `json:"responseSchema,omitempty"` - Seed int64 `json:"seed,omitempty"` - ResponseModalities []string `json:"responseModalities,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP float64 `json:"topP,omitempty"` + TopK float64 `json:"topK,omitempty"` + MaxOutputTokens uint `json:"maxOutputTokens,omitempty"` + CandidateCount int `json:"candidateCount,omitempty"` + StopSequences []string `json:"stopSequences,omitempty"` + ResponseMimeType string `json:"responseMimeType,omitempty"` + ResponseSchema any `json:"responseSchema,omitempty"` + Seed int64 `json:"seed,omitempty"` + ResponseModalities []string `json:"responseModalities,omitempty"` + ThinkingConfig *GeminiThinkingConfig `json:"thinkingConfig,omitempty"` } type GeminiChatCandidate struct { @@ -109,6 +119,7 @@ type GeminiUsageMetadata struct { PromptTokenCount int `json:"promptTokenCount"` CandidatesTokenCount int `json:"candidatesTokenCount"` TotalTokenCount int `json:"totalTokenCount"` + ThoughtsTokenCount int `json:"thoughtsTokenCount"` } // Imagen related structs diff --git a/relay/channel/gemini/relay-gemini.go b/relay/channel/gemini/relay-gemini.go index 03736f38..e358d741 100644 --- a/relay/channel/gemini/relay-gemini.go +++ b/relay/channel/gemini/relay-gemini.go @@ -23,12 +23,14 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon geminiRequest := GeminiChatRequest{ Contents: make([]GeminiChatContent, 0, len(textRequest.Messages)), - //SafetySettings: []GeminiChatSafetySettings{}, GenerationConfig: GeminiChatGenerationConfig{ Temperature: textRequest.Temperature, TopP: textRequest.TopP, MaxOutputTokens: textRequest.MaxTokens, Seed: int64(textRequest.Seed), + ThinkingConfig: &GeminiThinkingConfig{ + IncludeThoughts: true, + }, }, } @@ -39,6 +41,18 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon } } + if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { + if strings.HasSuffix(info.OriginModelName, "-thinking") { + budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens) + if budgetTokens == 0 || budgetTokens > 24576 { + budgetTokens = 24576 + } + geminiRequest.GenerationConfig.ThinkingConfig.SetThinkingBudget(int(budgetTokens)) + } else if strings.HasSuffix(info.OriginModelName, "-nothinking") { + geminiRequest.GenerationConfig.ThinkingConfig.SetThinkingBudget(0) + } + } + safetySettings := make([]GeminiChatSafetySettings, 0, len(SafetySettingList)) for _, category := range SafetySettingList { safetySettings = append(safetySettings, GeminiChatSafetySettings{ @@ -644,6 +658,7 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom if geminiResponse.UsageMetadata.TotalTokenCount != 0 { usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount } err = helper.ObjectData(c, response) if err != nil { @@ -666,7 +681,7 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens usage.PromptTokensDetails.TextTokens = usage.PromptTokens - usage.CompletionTokenDetails.TextTokens = usage.CompletionTokens + //usage.CompletionTokenDetails.TextTokens = usage.CompletionTokens if info.ShouldIncludeUsage { response = helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage) @@ -712,6 +727,9 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount, TotalTokens: geminiResponse.UsageMetadata.TotalTokenCount, } + + usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount + fullTextResponse.Usage = usage jsonResponse, err := json.Marshal(fullTextResponse) if err != nil { diff --git a/relay/helper/price.go b/relay/helper/price.go index 758c2829..a68cd54d 100644 --- a/relay/helper/price.go +++ b/relay/helper/price.go @@ -49,11 +49,7 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens } } if !acceptUnsetRatio { - if info.UserId == 1 { - return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置,请设置或开始自用模式;Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName) - } else { - return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置, 请联系管理员设置;Model %s ratio or price not set, please contact administrator to set", info.OriginModelName, info.OriginModelName) - } + return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置,请联系管理员设置或开始自用模式;Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName) } } completionRatio = operation_setting.GetCompletionRatio(info.OriginModelName) @@ -82,3 +78,15 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens return priceData, nil } + +func ContainPriceOrRatio(modelName string) bool { + _, ok := operation_setting.GetModelPrice(modelName, false) + if ok { + return true + } + _, ok = operation_setting.GetModelRatio(modelName) + if ok { + return true + } + return false +} diff --git a/setting/model_setting/gemini.go b/setting/model_setting/gemini.go index e6509232..f132fec8 100644 --- a/setting/model_setting/gemini.go +++ b/setting/model_setting/gemini.go @@ -6,9 +6,11 @@ import ( // GeminiSettings 定义Gemini模型的配置 type GeminiSettings struct { - SafetySettings map[string]string `json:"safety_settings"` - VersionSettings map[string]string `json:"version_settings"` - SupportedImagineModels []string `json:"supported_imagine_models"` + SafetySettings map[string]string `json:"safety_settings"` + VersionSettings map[string]string `json:"version_settings"` + SupportedImagineModels []string `json:"supported_imagine_models"` + ThinkingAdapterEnabled bool `json:"thinking_adapter_enabled"` + ThinkingAdapterBudgetTokensPercentage float64 `json:"thinking_adapter_budget_tokens_percentage"` } // 默认配置 @@ -25,6 +27,8 @@ var defaultGeminiSettings = GeminiSettings{ "gemini-2.0-flash-exp-image-generation", "gemini-2.0-flash-exp", }, + ThinkingAdapterEnabled: false, + ThinkingAdapterBudgetTokensPercentage: 0.6, } // 全局实例 diff --git a/setting/operation_setting/model-ratio.go b/setting/operation_setting/model-ratio.go index 68e50757..6a80ef1a 100644 --- a/setting/operation_setting/model-ratio.go +++ b/setting/operation_setting/model-ratio.go @@ -86,89 +86,92 @@ var defaultModelRatio = map[string]float64{ "text-curie-001": 1, //"text-davinci-002": 10, //"text-davinci-003": 10, - "text-davinci-edit-001": 10, - "code-davinci-edit-001": 10, - "whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens - "tts-1": 7.5, // 1k characters -> $0.015 - "tts-1-1106": 7.5, // 1k characters -> $0.015 - "tts-1-hd": 15, // 1k characters -> $0.03 - "tts-1-hd-1106": 15, // 1k characters -> $0.03 - "davinci": 10, - "curie": 10, - "babbage": 10, - "ada": 10, - "text-embedding-3-small": 0.01, - "text-embedding-3-large": 0.065, - "text-embedding-ada-002": 0.05, - "text-search-ada-doc-001": 10, - "text-moderation-stable": 0.1, - "text-moderation-latest": 0.1, - "claude-instant-1": 0.4, // $0.8 / 1M tokens - "claude-2.0": 4, // $8 / 1M tokens - "claude-2.1": 4, // $8 / 1M tokens - "claude-3-haiku-20240307": 0.125, // $0.25 / 1M tokens - "claude-3-5-haiku-20241022": 0.5, // $1 / 1M tokens - "claude-3-sonnet-20240229": 1.5, // $3 / 1M tokens - "claude-3-5-sonnet-20240620": 1.5, - "claude-3-5-sonnet-20241022": 1.5, - "claude-3-7-sonnet-20250219": 1.5, - "claude-3-7-sonnet-20250219-thinking": 1.5, - "claude-3-opus-20240229": 7.5, // $15 / 1M tokens - "ERNIE-4.0-8K": 0.120 * RMB, - "ERNIE-3.5-8K": 0.012 * RMB, - "ERNIE-3.5-8K-0205": 0.024 * RMB, - "ERNIE-3.5-8K-1222": 0.012 * RMB, - "ERNIE-Bot-8K": 0.024 * RMB, - "ERNIE-3.5-4K-0205": 0.012 * RMB, - "ERNIE-Speed-8K": 0.004 * RMB, - "ERNIE-Speed-128K": 0.004 * RMB, - "ERNIE-Lite-8K-0922": 0.008 * RMB, - "ERNIE-Lite-8K-0308": 0.003 * RMB, - "ERNIE-Tiny-8K": 0.001 * RMB, - "BLOOMZ-7B": 0.004 * RMB, - "Embedding-V1": 0.002 * RMB, - "bge-large-zh": 0.002 * RMB, - "bge-large-en": 0.002 * RMB, - "tao-8k": 0.002 * RMB, - "PaLM-2": 1, - "gemini-1.5-pro-latest": 1.25, // $3.5 / 1M tokens - "gemini-1.5-flash-latest": 0.075, - "gemini-2.0-flash": 0.05, - "gemini-2.5-pro-exp-03-25": 0.625, - "gemini-2.5-pro-preview-03-25": 0.625, - "text-embedding-004": 0.001, - "chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens - "chatglm_pro": 0.7143, // ¥0.01 / 1k tokens - "chatglm_std": 0.3572, // ¥0.005 / 1k tokens - "chatglm_lite": 0.1429, // ¥0.002 / 1k tokens - "glm-4": 7.143, // ¥0.1 / 1k tokens - "glm-4v": 0.05 * RMB, // ¥0.05 / 1k tokens - "glm-4-alltools": 0.1 * RMB, // ¥0.1 / 1k tokens - "glm-3-turbo": 0.3572, - "glm-4-plus": 0.05 * RMB, - "glm-4-0520": 0.1 * RMB, - "glm-4-air": 0.001 * RMB, - "glm-4-airx": 0.01 * RMB, - "glm-4-long": 0.001 * RMB, - "glm-4-flash": 0, - "glm-4v-plus": 0.01 * RMB, - "qwen-turbo": 0.8572, // ¥0.012 / 1k tokens - "qwen-plus": 10, // ¥0.14 / 1k tokens - "text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens - "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens - "SparkDesk-v4.0": 1.2858, - "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens - "360gpt-turbo": 0.0858, // ¥0.0012 / 1k tokens - "360gpt-turbo-responsibility-8k": 0.8572, // ¥0.012 / 1k tokens - "360gpt-pro": 0.8572, // ¥0.012 / 1k tokens - "360gpt2-pro": 0.8572, // ¥0.012 / 1k tokens - "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens - "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens - "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens - "hunyuan": 7.143, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 + "text-davinci-edit-001": 10, + "code-davinci-edit-001": 10, + "whisper-1": 15, // $0.006 / minute -> $0.006 / 150 words -> $0.006 / 200 tokens -> $0.03 / 1k tokens + "tts-1": 7.5, // 1k characters -> $0.015 + "tts-1-1106": 7.5, // 1k characters -> $0.015 + "tts-1-hd": 15, // 1k characters -> $0.03 + "tts-1-hd-1106": 15, // 1k characters -> $0.03 + "davinci": 10, + "curie": 10, + "babbage": 10, + "ada": 10, + "text-embedding-3-small": 0.01, + "text-embedding-3-large": 0.065, + "text-embedding-ada-002": 0.05, + "text-search-ada-doc-001": 10, + "text-moderation-stable": 0.1, + "text-moderation-latest": 0.1, + "claude-instant-1": 0.4, // $0.8 / 1M tokens + "claude-2.0": 4, // $8 / 1M tokens + "claude-2.1": 4, // $8 / 1M tokens + "claude-3-haiku-20240307": 0.125, // $0.25 / 1M tokens + "claude-3-5-haiku-20241022": 0.5, // $1 / 1M tokens + "claude-3-sonnet-20240229": 1.5, // $3 / 1M tokens + "claude-3-5-sonnet-20240620": 1.5, + "claude-3-5-sonnet-20241022": 1.5, + "claude-3-7-sonnet-20250219": 1.5, + "claude-3-7-sonnet-20250219-thinking": 1.5, + "claude-3-opus-20240229": 7.5, // $15 / 1M tokens + "ERNIE-4.0-8K": 0.120 * RMB, + "ERNIE-3.5-8K": 0.012 * RMB, + "ERNIE-3.5-8K-0205": 0.024 * RMB, + "ERNIE-3.5-8K-1222": 0.012 * RMB, + "ERNIE-Bot-8K": 0.024 * RMB, + "ERNIE-3.5-4K-0205": 0.012 * RMB, + "ERNIE-Speed-8K": 0.004 * RMB, + "ERNIE-Speed-128K": 0.004 * RMB, + "ERNIE-Lite-8K-0922": 0.008 * RMB, + "ERNIE-Lite-8K-0308": 0.003 * RMB, + "ERNIE-Tiny-8K": 0.001 * RMB, + "BLOOMZ-7B": 0.004 * RMB, + "Embedding-V1": 0.002 * RMB, + "bge-large-zh": 0.002 * RMB, + "bge-large-en": 0.002 * RMB, + "tao-8k": 0.002 * RMB, + "PaLM-2": 1, + "gemini-1.5-pro-latest": 1.25, // $3.5 / 1M tokens + "gemini-1.5-flash-latest": 0.075, + "gemini-2.0-flash": 0.05, + "gemini-2.5-pro-exp-03-25": 0.625, + "gemini-2.5-pro-preview-03-25": 0.625, + "gemini-2.5-flash-preview-04-17": 0.075, + "gemini-2.5-flash-preview-04-17-thinking": 0.075, + "gemini-2.5-flash-preview-04-17-nothinking": 0.075, + "text-embedding-004": 0.001, + "chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens + "chatglm_pro": 0.7143, // ¥0.01 / 1k tokens + "chatglm_std": 0.3572, // ¥0.005 / 1k tokens + "chatglm_lite": 0.1429, // ¥0.002 / 1k tokens + "glm-4": 7.143, // ¥0.1 / 1k tokens + "glm-4v": 0.05 * RMB, // ¥0.05 / 1k tokens + "glm-4-alltools": 0.1 * RMB, // ¥0.1 / 1k tokens + "glm-3-turbo": 0.3572, + "glm-4-plus": 0.05 * RMB, + "glm-4-0520": 0.1 * RMB, + "glm-4-air": 0.001 * RMB, + "glm-4-airx": 0.01 * RMB, + "glm-4-long": 0.001 * RMB, + "glm-4-flash": 0, + "glm-4v-plus": 0.01 * RMB, + "qwen-turbo": 0.8572, // ¥0.012 / 1k tokens + "qwen-plus": 10, // ¥0.14 / 1k tokens + "text-embedding-v1": 0.05, // ¥0.0007 / 1k tokens + "SparkDesk-v1.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v2.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.1": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v3.5": 1.2858, // ¥0.018 / 1k tokens + "SparkDesk-v4.0": 1.2858, + "360GPT_S2_V9": 0.8572, // ¥0.012 / 1k tokens + "360gpt-turbo": 0.0858, // ¥0.0012 / 1k tokens + "360gpt-turbo-responsibility-8k": 0.8572, // ¥0.012 / 1k tokens + "360gpt-pro": 0.8572, // ¥0.012 / 1k tokens + "360gpt2-pro": 0.8572, // ¥0.012 / 1k tokens + "embedding-bert-512-v1": 0.0715, // ¥0.001 / 1k tokens + "embedding_s1_v1": 0.0715, // ¥0.001 / 1k tokens + "semantic_similarity_s1_v1": 0.0715, // ¥0.001 / 1k tokens + "hunyuan": 7.143, // ¥0.1 / 1k tokens // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0 // https://platform.lingyiwanwu.com/docs#-计费单元 // 已经按照 7.2 来换算美元价格 "yi-34b-chat-0205": 0.18, @@ -275,8 +278,6 @@ func InitModelSettings() { cacheRatioMapMutex.Lock() cacheRatioMap = defaultCacheRatio cacheRatioMapMutex.Unlock() - - common.SysLog("model settings initialized") } func GetModelPriceMap() map[string]float64 { @@ -459,6 +460,12 @@ func getHardcodedCompletionModelRatio(name string) (float64, bool) { return 4, true } else if strings.HasPrefix(name, "gemini-2.5-pro-preview") { return 8, true + } else if strings.HasPrefix(name, "gemini-2.5-flash-preview") { + if strings.HasSuffix(name, "-nothinking") { + return 4, false + } else { + return 3.5 / 0.6, false + } } return 4, false } diff --git a/web/src/components/ModelSetting.js b/web/src/components/ModelSetting.js index dab001ff..2a566d6b 100644 --- a/web/src/components/ModelSetting.js +++ b/web/src/components/ModelSetting.js @@ -20,6 +20,8 @@ const ModelSetting = () => { 'global.pass_through_request_enabled': false, 'general_setting.ping_interval_enabled': false, 'general_setting.ping_interval_seconds': 60, + 'gemini.thinking_adapter_enabled': false, + 'gemini.thinking_adapter_budget_tokens_percentage': 0.6, }); let [loading, setLoading] = useState(false); diff --git a/web/src/pages/Setting/Model/SettingGeminiModel.js b/web/src/pages/Setting/Model/SettingGeminiModel.js index 6e911080..a8f278db 100644 --- a/web/src/pages/Setting/Model/SettingGeminiModel.js +++ b/web/src/pages/Setting/Model/SettingGeminiModel.js @@ -9,6 +9,7 @@ import { verifyJSON, } from '../../../helpers'; import { useTranslation } from 'react-i18next'; +import Text from '@douyinfe/semi-ui/lib/es/typography/text.js'; const GEMINI_SETTING_EXAMPLE = { default: 'OFF', @@ -27,6 +28,8 @@ export default function SettingGeminiModel(props) { 'gemini.safety_settings': '', 'gemini.version_settings': '', 'gemini.supported_imagine_models': [], + 'gemini.thinking_adapter_enabled': false, + 'gemini.thinking_adapter_budget_tokens_percentage': 0.6, }); const refForm = useRef(); const [inputsRow, setInputsRow] = useState(inputs); @@ -151,13 +154,69 @@ export default function SettingGeminiModel(props) { /> + +