Merge branch 'alpha'
This commit is contained in:
@@ -141,7 +141,11 @@ func RedisHSetObj(key string, obj interface{}, expiration time.Duration) error {
|
||||
|
||||
txn := RDB.TxPipeline()
|
||||
txn.HSet(ctx, key, data)
|
||||
txn.Expire(ctx, key, expiration)
|
||||
|
||||
// 只有在 expiration 大于 0 时才设置过期时间
|
||||
if expiration > 0 {
|
||||
txn.Expire(ctx, key, expiration)
|
||||
}
|
||||
|
||||
_, err := txn.Exec(ctx)
|
||||
if err != nil {
|
||||
|
||||
@@ -2,12 +2,10 @@ package constant
|
||||
|
||||
import "one-api/common"
|
||||
|
||||
var (
|
||||
TokenCacheSeconds = common.SyncFrequency
|
||||
UserId2GroupCacheSeconds = common.SyncFrequency
|
||||
UserId2QuotaCacheSeconds = common.SyncFrequency
|
||||
UserId2StatusCacheSeconds = common.SyncFrequency
|
||||
)
|
||||
// 使用函数来避免初始化顺序带来的赋值问题
|
||||
func RedisKeyCacheSeconds() int {
|
||||
return common.SyncFrequency
|
||||
}
|
||||
|
||||
// Cache keys
|
||||
const (
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
func cacheSetToken(token Token) error {
|
||||
key := common.GenerateHMAC(token.Key)
|
||||
token.Clean()
|
||||
err := common.RedisHSetObj(fmt.Sprintf("token:%s", key), &token, time.Duration(constant.TokenCacheSeconds)*time.Second)
|
||||
err := common.RedisHSetObj(fmt.Sprintf("token:%s", key), &token, time.Duration(constant.RedisKeyCacheSeconds())*time.Second)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -70,7 +70,7 @@ func updateUserCache(user User) error {
|
||||
return common.RedisHSetObj(
|
||||
getUserCacheKey(user.Id),
|
||||
user.ToBaseUser(),
|
||||
time.Duration(constant.UserId2QuotaCacheSeconds)*time.Second,
|
||||
time.Duration(constant.RedisKeyCacheSeconds())*time.Second,
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ package cohere
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/gin-gonic/gin"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -78,7 +77,7 @@ func stopReasonCohere2OpenAI(reason string) string {
|
||||
}
|
||||
|
||||
func cohereStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
|
||||
responseId := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
|
||||
responseId := helper.GetResponseID(c)
|
||||
createdTime := common.GetTimestamp()
|
||||
usage := &dto.Usage{}
|
||||
responseText := ""
|
||||
|
||||
@@ -72,8 +72,11 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
|
||||
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||
|
||||
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
||||
// suffix -thinking and -nothinking
|
||||
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
||||
// 新增逻辑:处理 -thinking-<budget> 格式
|
||||
if strings.Contains(info.OriginModelName, "-thinking-") {
|
||||
parts := strings.Split(info.UpstreamModelName, "-thinking-")
|
||||
info.UpstreamModelName = parts[0]
|
||||
} else if strings.HasSuffix(info.OriginModelName, "-thinking") { // 旧的适配
|
||||
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
|
||||
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
|
||||
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"one-api/relay/helper"
|
||||
"one-api/service"
|
||||
"one-api/setting/model_setting"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
@@ -36,6 +37,13 @@ var geminiSupportedMimeTypes = map[string]bool{
|
||||
"video/flv": true,
|
||||
}
|
||||
|
||||
// Gemini 允许的思考预算范围
|
||||
const (
|
||||
pro25MinBudget = 128
|
||||
pro25MaxBudget = 32768
|
||||
flash25MaxBudget = 24576
|
||||
)
|
||||
|
||||
// Setting safety to the lowest possible values since Gemini is already powerless enough
|
||||
func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*GeminiChatRequest, error) {
|
||||
|
||||
@@ -57,7 +65,40 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
||||
}
|
||||
|
||||
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
||||
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
||||
// 新增逻辑:处理 -thinking-<budget> 格式
|
||||
if strings.Contains(info.OriginModelName, "-thinking-") {
|
||||
parts := strings.SplitN(info.OriginModelName, "-thinking-", 2)
|
||||
if len(parts) == 2 && parts[1] != "" {
|
||||
if budgetTokens, err := strconv.Atoi(parts[1]); err == nil {
|
||||
// 从模型名称成功解析预算
|
||||
isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
|
||||
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
|
||||
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
|
||||
|
||||
if isNew25Pro {
|
||||
// 新的2.5pro模型:ThinkingBudget范围为128-32768
|
||||
if budgetTokens < pro25MinBudget {
|
||||
budgetTokens = pro25MinBudget
|
||||
} else if budgetTokens > pro25MaxBudget {
|
||||
budgetTokens = pro25MaxBudget
|
||||
}
|
||||
} else {
|
||||
// 其他模型:ThinkingBudget范围为0-24576
|
||||
if budgetTokens < 0 {
|
||||
budgetTokens = 0
|
||||
} else if budgetTokens > flash25MaxBudget {
|
||||
budgetTokens = flash25MaxBudget
|
||||
}
|
||||
}
|
||||
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
ThinkingBudget: common.GetPointer(budgetTokens),
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
}
|
||||
// 如果解析失败,则不设置ThinkingConfig,静默处理
|
||||
}
|
||||
} else if strings.HasSuffix(info.OriginModelName, "-thinking") { // 保留旧逻辑以兼容
|
||||
// 硬编码不支持 ThinkingBudget 的旧模型
|
||||
unsupportedModels := []string{
|
||||
"gemini-2.5-pro-preview-05-06",
|
||||
@@ -611,9 +652,9 @@ func getResponseToolCall(item *GeminiPart) *dto.ToolCallResponse {
|
||||
}
|
||||
}
|
||||
|
||||
func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResponse {
|
||||
func responseGeminiChat2OpenAI(c *gin.Context, response *GeminiChatResponse) *dto.OpenAITextResponse {
|
||||
fullTextResponse := dto.OpenAITextResponse{
|
||||
Id: fmt.Sprintf("chatcmpl-%s", common.GetUUID()),
|
||||
Id: helper.GetResponseID(c),
|
||||
Object: "chat.completion",
|
||||
Created: common.GetTimestamp(),
|
||||
Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
|
||||
@@ -754,7 +795,7 @@ func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.C
|
||||
|
||||
func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
|
||||
// responseText := ""
|
||||
id := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
|
||||
id := helper.GetResponseID(c)
|
||||
createAt := common.GetTimestamp()
|
||||
var usage = &dto.Usage{}
|
||||
var imageCount int
|
||||
@@ -849,7 +890,7 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
|
||||
StatusCode: resp.StatusCode,
|
||||
}, nil
|
||||
}
|
||||
fullTextResponse := responseGeminiChat2OpenAI(&geminiResponse)
|
||||
fullTextResponse := responseGeminiChat2OpenAI(c, &geminiResponse)
|
||||
fullTextResponse.Model = info.UpstreamModelName
|
||||
usage := dto.Usage{
|
||||
PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
|
||||
|
||||
@@ -2,7 +2,6 @@ package palm
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"github.com/gin-gonic/gin"
|
||||
"io"
|
||||
"net/http"
|
||||
@@ -73,7 +72,7 @@ func streamResponsePaLM2OpenAI(palmResponse *PaLMChatResponse) *dto.ChatCompleti
|
||||
|
||||
func palmStreamHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWithStatusCode, string) {
|
||||
responseText := ""
|
||||
responseId := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
|
||||
responseId := helper.GetResponseID(c)
|
||||
createdTime := common.GetTimestamp()
|
||||
dataChan := make(chan string)
|
||||
stopChan := make(chan bool)
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
"sort"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -210,8 +211,23 @@ func validateFAQ(faqStr string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getPublishTime(item map[string]interface{}) time.Time {
|
||||
if v, ok := item["publishDate"]; ok {
|
||||
if s, ok2 := v.(string); ok2 {
|
||||
if t, err := time.Parse(time.RFC3339, s); err == nil {
|
||||
return t
|
||||
}
|
||||
}
|
||||
}
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
func GetAnnouncements() []map[string]interface{} {
|
||||
return getJSONList(GetConsoleSetting().Announcements)
|
||||
list := getJSONList(GetConsoleSetting().Announcements)
|
||||
sort.SliceStable(list, func(i, j int) bool {
|
||||
return getPublishTime(list[i]).After(getPublishTime(list[j]))
|
||||
})
|
||||
return list
|
||||
}
|
||||
|
||||
func GetFAQ() []map[string]interface{} {
|
||||
|
||||
@@ -142,6 +142,11 @@ var defaultModelRatio = map[string]float64{
|
||||
"gemini-2.5-flash-preview-04-17": 0.075,
|
||||
"gemini-2.5-flash-preview-04-17-thinking": 0.075,
|
||||
"gemini-2.5-flash-preview-04-17-nothinking": 0.075,
|
||||
"gemini-2.5-flash-preview-05-20": 0.075,
|
||||
"gemini-2.5-flash-preview-05-20-thinking": 0.075,
|
||||
"gemini-2.5-flash-preview-05-20-nothinking": 0.075,
|
||||
"gemini-2.5-flash-thinking-*": 0.075, // 用于为后续所有2.5 flash thinking budget 模型设置默认倍率
|
||||
"gemini-2.5-pro-thinking-*": 0.625, // 用于为后续所有2.5 pro thinking budget 模型设置默认倍率
|
||||
"text-embedding-004": 0.001,
|
||||
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
|
||||
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
|
||||
@@ -342,10 +347,20 @@ func UpdateModelRatioByJSONString(jsonStr string) error {
|
||||
return json.Unmarshal([]byte(jsonStr), &modelRatioMap)
|
||||
}
|
||||
|
||||
// 处理带有思考预算的模型名称,方便统一定价
|
||||
func handleThinkingBudgetModel(name, prefix, wildcard string) string {
|
||||
if strings.HasPrefix(name, prefix) && strings.Contains(name, "-thinking-") {
|
||||
return wildcard
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func GetModelRatio(name string) (float64, bool) {
|
||||
modelRatioMapMutex.RLock()
|
||||
defer modelRatioMapMutex.RUnlock()
|
||||
|
||||
name = handleThinkingBudgetModel(name, "gemini-2.5-flash", "gemini-2.5-flash-thinking-*")
|
||||
name = handleThinkingBudgetModel(name, "gemini-2.5-pro", "gemini-2.5-pro-thinking-*")
|
||||
if strings.HasPrefix(name, "gpt-4-gizmo") {
|
||||
name = "gpt-4-gizmo-*"
|
||||
}
|
||||
@@ -470,9 +485,9 @@ func getHardcodedCompletionModelRatio(name string) (float64, bool) {
|
||||
return 4, true
|
||||
} else if strings.HasPrefix(name, "gemini-2.0") {
|
||||
return 4, true
|
||||
} else if strings.HasPrefix(name, "gemini-2.5-pro-preview") {
|
||||
} else if strings.HasPrefix(name, "gemini-2.5-pro") { // 移除preview来增加兼容性,这里假设正式版的倍率和preview一致
|
||||
return 8, true
|
||||
} else if strings.HasPrefix(name, "gemini-2.5-flash-preview") {
|
||||
} else if strings.HasPrefix(name, "gemini-2.5-flash") { // 同上
|
||||
if strings.HasSuffix(name, "-nothinking") {
|
||||
return 4, false
|
||||
} else {
|
||||
|
||||
@@ -1373,6 +1373,12 @@
|
||||
"示例": "Example",
|
||||
"缺省 MaxTokens": "Default MaxTokens",
|
||||
"启用Claude思考适配(-thinking后缀)": "Enable Claude thinking adaptation (-thinking suffix)",
|
||||
"和Claude不同,默认情况下Gemini的思考模型会自动决定要不要思考,就算不开启适配模型也可以正常使用,": "Unlike Claude, Gemini's thinking model automatically decides whether to think by default, and can be used normally even without enabling the adaptation model.",
|
||||
"如果您需要计费,推荐设置无后缀模型价格按思考价格设置。": "If you need billing, it is recommended to set the no-suffix model price according to the thinking price.",
|
||||
"支持使用 gemini-2.5-pro-preview-06-05-thinking-128 格式来精确传递思考预算。": "Supports using gemini-2.5-pro-preview-06-05-thinking-128 format to precisely pass thinking budget.",
|
||||
"启用Gemini思考后缀适配": "Enable Gemini thinking suffix adaptation",
|
||||
"适配-thinking、-thinking-预算数字和-nothinking后缀": "Adapt -thinking, -thinking-budgetNumber, and -nothinking suffixes",
|
||||
"思考预算占比": "Thinking budget ratio",
|
||||
"Claude思考适配 BudgetTokens = MaxTokens * BudgetTokens 百分比": "Claude thinking adaptation BudgetTokens = MaxTokens * BudgetTokens percentage",
|
||||
"思考适配 BudgetTokens 百分比": "Thinking adaptation BudgetTokens percentage",
|
||||
"0.1-1之间的小数": "Decimal between 0.1 and 1",
|
||||
|
||||
@@ -173,7 +173,8 @@ export default function SettingGeminiModel(props) {
|
||||
<Text>
|
||||
{t(
|
||||
"和Claude不同,默认情况下Gemini的思考模型会自动决定要不要思考,就算不开启适配模型也可以正常使用," +
|
||||
"如果您需要计费,推荐设置无后缀模型价格按思考价格设置"
|
||||
"如果您需要计费,推荐设置无后缀模型价格按思考价格设置。" +
|
||||
"支持使用 gemini-2.5-pro-preview-06-05-thinking-128 格式来精确传递思考预算。"
|
||||
)}
|
||||
</Text>
|
||||
</Col>
|
||||
@@ -183,7 +184,7 @@ export default function SettingGeminiModel(props) {
|
||||
<Form.Switch
|
||||
label={t('启用Gemini思考后缀适配')}
|
||||
field={'gemini.thinking_adapter_enabled'}
|
||||
extraText={"适配-thinking和-nothinking后缀"}
|
||||
extraText={t('适配 -thinking、-thinking-预算数字 和 -nothinking 后缀')}
|
||||
onChange={(value) =>
|
||||
setInputs({
|
||||
...inputs,
|
||||
@@ -205,7 +206,7 @@ export default function SettingGeminiModel(props) {
|
||||
<Row>
|
||||
<Col xs={24} sm={12} md={8} lg={8} xl={8}>
|
||||
<Form.InputNumber
|
||||
label={t('请求模型带-thinking后缀的BudgetTokens数(超出24576的部分将被忽略)')}
|
||||
label={t('思考预算占比')}
|
||||
field={'gemini.thinking_adapter_budget_tokens_percentage'}
|
||||
initValue={''}
|
||||
extraText={t('0.1-1之间的小数')}
|
||||
|
||||
Reference in New Issue
Block a user