From e7353772184b31d13758504574758f0e8dead6f6 Mon Sep 17 00:00:00 2001 From: RedwindA Date: Sun, 15 Jun 2025 21:12:56 +0800 Subject: [PATCH 1/8] feat: implement thinking budget control in model name --- relay/channel/gemini/adaptor.go | 7 ++- relay/channel/gemini/relay-gemini.go | 43 ++++++++++++++++++- setting/operation_setting/model-ratio.go | 18 ++++++-- web/src/i18n/locales/en.json | 3 ++ .../pages/Setting/Model/SettingGeminiModel.js | 7 +-- 5 files changed, 69 insertions(+), 9 deletions(-) diff --git a/relay/channel/gemini/adaptor.go b/relay/channel/gemini/adaptor.go index e6f66d5f..a81eb3a9 100644 --- a/relay/channel/gemini/adaptor.go +++ b/relay/channel/gemini/adaptor.go @@ -72,8 +72,11 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) { func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { - // suffix -thinking and -nothinking - if strings.HasSuffix(info.OriginModelName, "-thinking") { + // 新增逻辑:处理 -thinking- 格式 + if strings.Contains(info.OriginModelName, "-thinking-") { + parts := strings.Split(info.UpstreamModelName, "-thinking-") + info.UpstreamModelName = parts[0] + } else if strings.HasSuffix(info.OriginModelName, "-thinking") { // 旧的适配 info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking") } else if strings.HasSuffix(info.OriginModelName, "-nothinking") { info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking") diff --git a/relay/channel/gemini/relay-gemini.go b/relay/channel/gemini/relay-gemini.go index e2288faf..b65d5af7 100644 --- a/relay/channel/gemini/relay-gemini.go +++ b/relay/channel/gemini/relay-gemini.go @@ -12,6 +12,7 @@ import ( "one-api/relay/helper" "one-api/service" "one-api/setting/model_setting" + "strconv" "strings" "unicode/utf8" @@ -36,6 +37,13 @@ var geminiSupportedMimeTypes = map[string]bool{ "video/flv": true, } +// Gemini 允许的思考预算范围 +const ( + pro25MinBudget = 128 + pro25MaxBudget = 32768 + flash25MaxBudget = 24576 +) + // Setting safety to the lowest possible values since Gemini is already powerless enough func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*GeminiChatRequest, error) { @@ -57,7 +65,40 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon } if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { - if strings.HasSuffix(info.OriginModelName, "-thinking") { + // 新增逻辑:处理 -thinking- 格式 + if strings.Contains(info.OriginModelName, "-thinking-") { + parts := strings.SplitN(info.OriginModelName, "-thinking-", 2) + if len(parts) == 2 && parts[1] != "" { + if budgetTokens, err := strconv.Atoi(parts[1]); err == nil { + // 从模型名称成功解析预算 + isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") && + !strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") && + !strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25") + + if isNew25Pro { + // 新的2.5pro模型:ThinkingBudget范围为128-32768 + if budgetTokens < pro25MinBudget { + budgetTokens = pro25MinBudget + } else if budgetTokens > pro25MaxBudget { + budgetTokens = pro25MaxBudget + } + } else { + // 其他模型:ThinkingBudget范围为0-24576 + if budgetTokens < 0 { + budgetTokens = 0 + } else if budgetTokens > flash25MaxBudget { + budgetTokens = flash25MaxBudget + } + } + + geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{ + ThinkingBudget: common.GetPointer(budgetTokens), + IncludeThoughts: true, + } + } + // 如果解析失败,则不设置ThinkingConfig,静默处理 + } + } else if strings.HasSuffix(info.OriginModelName, "-thinking") { // 保留旧逻辑以兼容 // 硬编码不支持 ThinkingBudget 的旧模型 unsupportedModels := []string{ "gemini-2.5-pro-preview-05-06", diff --git a/setting/operation_setting/model-ratio.go b/setting/operation_setting/model-ratio.go index 700a7c4e..fa6f9560 100644 --- a/setting/operation_setting/model-ratio.go +++ b/setting/operation_setting/model-ratio.go @@ -142,6 +142,11 @@ var defaultModelRatio = map[string]float64{ "gemini-2.5-flash-preview-04-17": 0.075, "gemini-2.5-flash-preview-04-17-thinking": 0.075, "gemini-2.5-flash-preview-04-17-nothinking": 0.075, + "gemini-2.5-flash-preview-05-20": 0.075, + "gemini-2.5-flash-preview-05-20-thinking": 0.075, + "gemini-2.5-flash-preview-05-20-nothinking": 0.075, + "gemini-2.5-flash-thinking-*": 0.075, // 用于为后续所有2.5 flash thinking budget 模型设置默认倍率 + "gemini-2.5-pro-thinking-*": 0.625, // 用于为后续所有2.5 pro thinking budget 模型设置默认倍率 "text-embedding-004": 0.001, "chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens "chatglm_pro": 0.7143, // ¥0.01 / 1k tokens @@ -345,7 +350,14 @@ func UpdateModelRatioByJSONString(jsonStr string) error { func GetModelRatio(name string) (float64, bool) { modelRatioMapMutex.RLock() defer modelRatioMapMutex.RUnlock() - + // 处理带有思考预算的模型名称,方便统一定价 + handleThinkingBudgetModel := func(prefix, wildcard string) { + if strings.HasPrefix(name, prefix) && strings.Contains(name, "-thinking-") { + name = wildcard + } + } + handleThinkingBudgetModel("gemini-2.5-flash", "gemini-2.5-flash-thinking-*") + handleThinkingBudgetModel("gemini-2.5-pro", "gemini-2.5-pro-thinking-*") if strings.HasPrefix(name, "gpt-4-gizmo") { name = "gpt-4-gizmo-*" } @@ -470,9 +482,9 @@ func getHardcodedCompletionModelRatio(name string) (float64, bool) { return 4, true } else if strings.HasPrefix(name, "gemini-2.0") { return 4, true - } else if strings.HasPrefix(name, "gemini-2.5-pro-preview") { + } else if strings.HasPrefix(name, "gemini-2.5-pro") { // 移除preview来增加兼容性,这里假设正式版的倍率和preview一致 return 8, true - } else if strings.HasPrefix(name, "gemini-2.5-flash-preview") { + } else if strings.HasPrefix(name, "gemini-2.5-flash") { // 同上 if strings.HasSuffix(name, "-nothinking") { return 4, false } else { diff --git a/web/src/i18n/locales/en.json b/web/src/i18n/locales/en.json index ba23ca5c..d563aaad 100644 --- a/web/src/i18n/locales/en.json +++ b/web/src/i18n/locales/en.json @@ -1373,6 +1373,9 @@ "示例": "Example", "缺省 MaxTokens": "Default MaxTokens", "启用Claude思考适配(-thinking后缀)": "Enable Claude thinking adaptation (-thinking suffix)", + "启用Gemini思考后缀适配": "Enable Gemini thinking suffix adaptation", + "适配-thinking、-thinking-预算数字和-nothinking后缀": "Adapt -thinking, -thinking-budgetNumber, and -nothinking suffixes", + "思考预算占比": "Thinking budget ratio", "Claude思考适配 BudgetTokens = MaxTokens * BudgetTokens 百分比": "Claude thinking adaptation BudgetTokens = MaxTokens * BudgetTokens percentage", "思考适配 BudgetTokens 百分比": "Thinking adaptation BudgetTokens percentage", "0.1-1之间的小数": "Decimal between 0.1 and 1", diff --git a/web/src/pages/Setting/Model/SettingGeminiModel.js b/web/src/pages/Setting/Model/SettingGeminiModel.js index b802af1a..1d28ae92 100644 --- a/web/src/pages/Setting/Model/SettingGeminiModel.js +++ b/web/src/pages/Setting/Model/SettingGeminiModel.js @@ -173,7 +173,8 @@ export default function SettingGeminiModel(props) { {t( "和Claude不同,默认情况下Gemini的思考模型会自动决定要不要思考,就算不开启适配模型也可以正常使用," + - "如果您需要计费,推荐设置无后缀模型价格按思考价格设置" + "如果您需要计费,推荐设置无后缀模型价格按思考价格设置。" + + "支持使用 gemini-2.5-pro-preview-06-05-thinking-128 格式来精确传递思考预算。" )} @@ -183,7 +184,7 @@ export default function SettingGeminiModel(props) { setInputs({ ...inputs, @@ -205,7 +206,7 @@ export default function SettingGeminiModel(props) { Date: Sun, 15 Jun 2025 23:40:58 +0800 Subject: [PATCH 2/8] update i18n --- web/src/i18n/locales/en.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/web/src/i18n/locales/en.json b/web/src/i18n/locales/en.json index d563aaad..8316f8a2 100644 --- a/web/src/i18n/locales/en.json +++ b/web/src/i18n/locales/en.json @@ -1373,6 +1373,9 @@ "示例": "Example", "缺省 MaxTokens": "Default MaxTokens", "启用Claude思考适配(-thinking后缀)": "Enable Claude thinking adaptation (-thinking suffix)", + "和Claude不同,默认情况下Gemini的思考模型会自动决定要不要思考,就算不开启适配模型也可以正常使用,": "Unlike Claude, Gemini's thinking model automatically decides whether to think by default, and can be used normally even without enabling the adaptation model.", + "如果您需要计费,推荐设置无后缀模型价格按思考价格设置。": "If you need billing, it is recommended to set the no-suffix model price according to the thinking price.", + "支持使用 gemini-2.5-pro-preview-06-05-thinking-128 格式来精确传递思考预算。": "Supports using gemini-2.5-pro-preview-06-05-thinking-128 format to precisely pass thinking budget.", "启用Gemini思考后缀适配": "Enable Gemini thinking suffix adaptation", "适配-thinking、-thinking-预算数字和-nothinking后缀": "Adapt -thinking, -thinking-budgetNumber, and -nothinking suffixes", "思考预算占比": "Thinking budget ratio", From 1294d286ee0decfd73e14bc907cdeb736bc4b4de Mon Sep 17 00:00:00 2001 From: RedwindA Date: Mon, 16 Jun 2025 19:41:42 +0800 Subject: [PATCH 3/8] refactor: replace inline closure with a helper function --- setting/operation_setting/model-ratio.go | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/setting/operation_setting/model-ratio.go b/setting/operation_setting/model-ratio.go index fa6f9560..5155b2fc 100644 --- a/setting/operation_setting/model-ratio.go +++ b/setting/operation_setting/model-ratio.go @@ -347,17 +347,20 @@ func UpdateModelRatioByJSONString(jsonStr string) error { return json.Unmarshal([]byte(jsonStr), &modelRatioMap) } +// 处理带有思考预算的模型名称,方便统一定价 +func handleThinkingBudgetModel(name, prefix, wildcard string) string { + if strings.HasPrefix(name, prefix) && strings.Contains(name, "-thinking-") { + return wildcard + } + return name +} + func GetModelRatio(name string) (float64, bool) { modelRatioMapMutex.RLock() defer modelRatioMapMutex.RUnlock() - // 处理带有思考预算的模型名称,方便统一定价 - handleThinkingBudgetModel := func(prefix, wildcard string) { - if strings.HasPrefix(name, prefix) && strings.Contains(name, "-thinking-") { - name = wildcard - } - } - handleThinkingBudgetModel("gemini-2.5-flash", "gemini-2.5-flash-thinking-*") - handleThinkingBudgetModel("gemini-2.5-pro", "gemini-2.5-pro-thinking-*") + + name = handleThinkingBudgetModel(name, "gemini-2.5-flash", "gemini-2.5-flash-thinking-*") + name = handleThinkingBudgetModel(name, "gemini-2.5-pro", "gemini-2.5-pro-thinking-*") if strings.HasPrefix(name, "gpt-4-gizmo") { name = "gpt-4-gizmo-*" } From d5c96cb036514fb1f0b504da31a47da79a2403df Mon Sep 17 00:00:00 2001 From: "Apple\\Apple" Date: Mon, 16 Jun 2025 20:05:54 +0800 Subject: [PATCH 4/8] =?UTF-8?q?=F0=9F=90=9B=20fix(console-setting):=20ensu?= =?UTF-8?q?re=20announcements=20are=20returned=20in=20newest-first=20order?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary • Added stable, descending sort to `GetAnnouncements()` so that the API always returns the latest announcements first. • Introduced helper `getPublishTime()` to safely parse `publishDate` (RFC 3339) and fall back to zero value on failure. • Switched to `sort.SliceStable` for deterministic ordering when timestamps are identical. • Imported the standard `sort` package and removed redundant, duplicate date parsing. Impact Front-end no longer needs to perform client-side sorting; the latest announcement is guaranteed to appear at the top on all platforms and clients. --- setting/console_setting/validation.go | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/setting/console_setting/validation.go b/setting/console_setting/validation.go index 51a84849..fda6453d 100644 --- a/setting/console_setting/validation.go +++ b/setting/console_setting/validation.go @@ -7,6 +7,7 @@ import ( "regexp" "strings" "time" + "sort" ) var ( @@ -210,8 +211,23 @@ func validateFAQ(faqStr string) error { return nil } +func getPublishTime(item map[string]interface{}) time.Time { + if v, ok := item["publishDate"]; ok { + if s, ok2 := v.(string); ok2 { + if t, err := time.Parse(time.RFC3339, s); err == nil { + return t + } + } + } + return time.Time{} +} + func GetAnnouncements() []map[string]interface{} { - return getJSONList(GetConsoleSetting().Announcements) + list := getJSONList(GetConsoleSetting().Announcements) + sort.SliceStable(list, func(i, j int) bool { + return getPublishTime(list[i]).After(getPublishTime(list[j])) + }) + return list } func GetFAQ() []map[string]interface{} { From 6b7295bbdf2e48fad0ddbe286e06b5e51d0ce35a Mon Sep 17 00:00:00 2001 From: CaIon <1808837298@qq.com> Date: Mon, 16 Jun 2025 21:02:27 +0800 Subject: [PATCH 5/8] =?UTF-8?q?=F0=9F=94=A7=20refactor(relay):=20replace?= =?UTF-8?q?=20UUID=20generation=20with=20helper=20function=20for=20respons?= =?UTF-8?q?e=20IDs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/cohere/relay-cohere.go | 3 +-- relay/channel/gemini/relay-gemini.go | 8 ++++---- relay/channel/palm/relay-palm.go | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/relay/channel/cohere/relay-cohere.go b/relay/channel/cohere/relay-cohere.go index 10c4328b..8a044bf2 100644 --- a/relay/channel/cohere/relay-cohere.go +++ b/relay/channel/cohere/relay-cohere.go @@ -3,7 +3,6 @@ package cohere import ( "bufio" "encoding/json" - "fmt" "github.com/gin-gonic/gin" "io" "net/http" @@ -78,7 +77,7 @@ func stopReasonCohere2OpenAI(reason string) string { } func cohereStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) { - responseId := fmt.Sprintf("chatcmpl-%s", common.GetUUID()) + responseId := helper.GetResponseID(c) createdTime := common.GetTimestamp() usage := &dto.Usage{} responseText := "" diff --git a/relay/channel/gemini/relay-gemini.go b/relay/channel/gemini/relay-gemini.go index e2288faf..e0b70805 100644 --- a/relay/channel/gemini/relay-gemini.go +++ b/relay/channel/gemini/relay-gemini.go @@ -611,9 +611,9 @@ func getResponseToolCall(item *GeminiPart) *dto.ToolCallResponse { } } -func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResponse { +func responseGeminiChat2OpenAI(c *gin.Context, response *GeminiChatResponse) *dto.OpenAITextResponse { fullTextResponse := dto.OpenAITextResponse{ - Id: fmt.Sprintf("chatcmpl-%s", common.GetUUID()), + Id: helper.GetResponseID(c), Object: "chat.completion", Created: common.GetTimestamp(), Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)), @@ -754,7 +754,7 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.C func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) { // responseText := "" - id := fmt.Sprintf("chatcmpl-%s", common.GetUUID()) + id := helper.GetResponseID(c) createAt := common.GetTimestamp() var usage = &dto.Usage{} var imageCount int @@ -849,7 +849,7 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re StatusCode: resp.StatusCode, }, nil } - fullTextResponse := responseGeminiChat2OpenAI(&geminiResponse) + fullTextResponse := responseGeminiChat2OpenAI(c, &geminiResponse) fullTextResponse.Model = info.UpstreamModelName usage := dto.Usage{ PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount, diff --git a/relay/channel/palm/relay-palm.go b/relay/channel/palm/relay-palm.go index 5c398b5e..1f301009 100644 --- a/relay/channel/palm/relay-palm.go +++ b/relay/channel/palm/relay-palm.go @@ -73,7 +73,7 @@ func streamResponsePaLM2OpenAI(palmResponse *PaLMChatResponse) *dto.ChatCompleti func palmStreamHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWithStatusCode, string) { responseText := "" - responseId := fmt.Sprintf("chatcmpl-%s", common.GetUUID()) + responseId := helper.GetResponseID(c) createdTime := common.GetTimestamp() dataChan := make(chan string) stopChan := make(chan bool) From 4fc85d27e9d391977636cc6efb0a274dfc4080dd Mon Sep 17 00:00:00 2001 From: CaIon <1808837298@qq.com> Date: Tue, 17 Jun 2025 00:09:26 +0800 Subject: [PATCH 6/8] =?UTF-8?q?=F0=9F=A7=B9=20chore(relay):=20remove=20unu?= =?UTF-8?q?sed=20import=20in=20relay-palm.go?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/palm/relay-palm.go | 1 - 1 file changed, 1 deletion(-) diff --git a/relay/channel/palm/relay-palm.go b/relay/channel/palm/relay-palm.go index 1f301009..0c6f8641 100644 --- a/relay/channel/palm/relay-palm.go +++ b/relay/channel/palm/relay-palm.go @@ -2,7 +2,6 @@ package palm import ( "encoding/json" - "fmt" "github.com/gin-gonic/gin" "io" "net/http" From a7d87475af8768ad23f81f84a4894c35c9a9cc29 Mon Sep 17 00:00:00 2001 From: RedwindA Date: Tue, 17 Jun 2025 02:37:19 +0800 Subject: [PATCH 7/8] =?UTF-8?q?=F0=9F=94=A7=20fix(redis):=20only=20set=20e?= =?UTF-8?q?xpiration=20if=20greater=20than=200=20in=20RedisHSetObj?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- common/redis.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/common/redis.go b/common/redis.go index ba35331a..1efc217f 100644 --- a/common/redis.go +++ b/common/redis.go @@ -141,7 +141,11 @@ func RedisHSetObj(key string, obj interface{}, expiration time.Duration) error { txn := RDB.TxPipeline() txn.HSet(ctx, key, data) - txn.Expire(ctx, key, expiration) + + // 只有在 expiration 大于 0 时才设置过期时间 + if expiration > 0 { + txn.Expire(ctx, key, expiration) + } _, err := txn.Exec(ctx) if err != nil { From 0b9c6ecb00ab46724909a27f302a9454f4968f57 Mon Sep 17 00:00:00 2001 From: RedwindA Date: Tue, 17 Jun 2025 03:21:53 +0800 Subject: [PATCH 8/8] =?UTF-8?q?=F0=9F=94=A7=20refactor(redis):=20replace?= =?UTF-8?q?=20direct=20constant=20usage=20with=20RedisKeyCacheSeconds=20fu?= =?UTF-8?q?nction=20for=20cache=20duration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- constant/cache_key.go | 10 ++++------ model/token_cache.go | 2 +- model/user_cache.go | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/constant/cache_key.go b/constant/cache_key.go index 27cb3b75..daedfd40 100644 --- a/constant/cache_key.go +++ b/constant/cache_key.go @@ -2,12 +2,10 @@ package constant import "one-api/common" -var ( - TokenCacheSeconds = common.SyncFrequency - UserId2GroupCacheSeconds = common.SyncFrequency - UserId2QuotaCacheSeconds = common.SyncFrequency - UserId2StatusCacheSeconds = common.SyncFrequency -) +// 使用函数来避免初始化顺序带来的赋值问题 +func RedisKeyCacheSeconds() int { + return common.SyncFrequency +} // Cache keys const ( diff --git a/model/token_cache.go b/model/token_cache.go index b2e0c951..a4b0beae 100644 --- a/model/token_cache.go +++ b/model/token_cache.go @@ -10,7 +10,7 @@ import ( func cacheSetToken(token Token) error { key := common.GenerateHMAC(token.Key) token.Clean() - err := common.RedisHSetObj(fmt.Sprintf("token:%s", key), &token, time.Duration(constant.TokenCacheSeconds)*time.Second) + err := common.RedisHSetObj(fmt.Sprintf("token:%s", key), &token, time.Duration(constant.RedisKeyCacheSeconds())*time.Second) if err != nil { return err } diff --git a/model/user_cache.go b/model/user_cache.go index d74877bd..e673defc 100644 --- a/model/user_cache.go +++ b/model/user_cache.go @@ -70,7 +70,7 @@ func updateUserCache(user User) error { return common.RedisHSetObj( getUserCacheKey(user.Id), user.ToBaseUser(), - time.Duration(constant.UserId2QuotaCacheSeconds)*time.Second, + time.Duration(constant.RedisKeyCacheSeconds())*time.Second, ) }