feat: add gemini thinking suffix support #981

This commit is contained in:
CaIon
2025-04-18 19:36:18 +08:00
parent 1eebdc4773
commit 4c4dc6e8b4
11 changed files with 268 additions and 133 deletions

View File

@@ -1,5 +1,7 @@
package dto package dto
import "encoding/json"
type ImageRequest struct { type ImageRequest struct {
Model string `json:"model"` Model string `json:"model"`
Prompt string `json:"prompt" binding:"required"` Prompt string `json:"prompt" binding:"required"`
@@ -9,6 +11,7 @@ type ImageRequest struct {
ResponseFormat string `json:"response_format,omitempty"` ResponseFormat string `json:"response_format,omitempty"`
Style string `json:"style,omitempty"` Style string `json:"style,omitempty"`
User string `json:"user,omitempty"` User string `json:"user,omitempty"`
ExtraFields json.RawMessage `json:"extra_fields,omitempty"`
} }
type ImageResponse struct { type ImageResponse struct {

View File

@@ -173,3 +173,17 @@ type Usage struct {
PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"` PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"`
CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"` CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
} }
type InputTokenDetails struct {
CachedTokens int `json:"cached_tokens"`
CachedCreationTokens int `json:"-"`
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
ImageTokens int `json:"image_tokens"`
}
type OutputTokenDetails struct {
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
ReasoningTokens int `json:"reasoning_tokens"`
}

View File

@@ -43,20 +43,6 @@ type RealtimeUsage struct {
OutputTokenDetails OutputTokenDetails `json:"output_token_details"` OutputTokenDetails OutputTokenDetails `json:"output_token_details"`
} }
type InputTokenDetails struct {
CachedTokens int `json:"cached_tokens"`
CachedCreationTokens int `json:"-"`
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
ImageTokens int `json:"image_tokens"`
}
type OutputTokenDetails struct {
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
ReasoningTokens int `json:"reasoning_tokens"`
}
type RealtimeSession struct { type RealtimeSession struct {
Modalities []string `json:"modalities"` Modalities []string `json:"modalities"`
Instructions string `json:"instructions"` Instructions string `json:"instructions"`

View File

@@ -12,7 +12,6 @@ import (
relaycommon "one-api/relay/common" relaycommon "one-api/relay/common"
"one-api/service" "one-api/service"
"one-api/setting/model_setting" "one-api/setting/model_setting"
"strings" "strings"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
@@ -70,6 +69,16 @@ func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
} }
func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
// suffix -thinking and -nothinking
if strings.HasSuffix(info.OriginModelName, "-thinking") {
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
}
}
version := model_setting.GetGeminiVersionSetting(info.UpstreamModelName) version := model_setting.GetGeminiVersionSetting(info.UpstreamModelName)
if strings.HasPrefix(info.UpstreamModelName, "imagen") { if strings.HasPrefix(info.UpstreamModelName, "imagen") {
@@ -99,11 +108,13 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
if request == nil { if request == nil {
return nil, errors.New("request is nil") return nil, errors.New("request is nil")
} }
ai, err := CovertGemini2OpenAI(*request, info)
geminiRequest, err := CovertGemini2OpenAI(*request, info)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return ai, nil
return geminiRequest, nil
} }
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) { func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
@@ -165,6 +176,18 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
} else { } else {
err, usage = GeminiChatHandler(c, resp, info) err, usage = GeminiChatHandler(c, resp, info)
} }
//if usage.(*dto.Usage).CompletionTokenDetails.ReasoningTokens > 100 {
// // 没有请求-thinking的情况下产生思考token则按照思考模型计费
// if !strings.HasSuffix(info.OriginModelName, "-thinking") &&
// !strings.HasSuffix(info.OriginModelName, "-nothinking") {
// thinkingModelName := info.OriginModelName + "-thinking"
// if operation_setting.SelfUseModeEnabled || helper.ContainPriceOrRatio(thinkingModelName) {
// info.OriginModelName = thinkingModelName
// }
// }
//}
return return
} }

View File

@@ -8,6 +8,15 @@ type GeminiChatRequest struct {
SystemInstructions *GeminiChatContent `json:"system_instruction,omitempty"` SystemInstructions *GeminiChatContent `json:"system_instruction,omitempty"`
} }
type GeminiThinkingConfig struct {
IncludeThoughts bool `json:"includeThoughts,omitempty"`
ThinkingBudget *int `json:"thinkingBudget,omitempty"`
}
func (c *GeminiThinkingConfig) SetThinkingBudget(budget int) {
c.ThinkingBudget = &budget
}
type GeminiInlineData struct { type GeminiInlineData struct {
MimeType string `json:"mimeType"` MimeType string `json:"mimeType"`
Data string `json:"data"` Data string `json:"data"`
@@ -81,6 +90,7 @@ type GeminiChatGenerationConfig struct {
ResponseSchema any `json:"responseSchema,omitempty"` ResponseSchema any `json:"responseSchema,omitempty"`
Seed int64 `json:"seed,omitempty"` Seed int64 `json:"seed,omitempty"`
ResponseModalities []string `json:"responseModalities,omitempty"` ResponseModalities []string `json:"responseModalities,omitempty"`
ThinkingConfig *GeminiThinkingConfig `json:"thinkingConfig,omitempty"`
} }
type GeminiChatCandidate struct { type GeminiChatCandidate struct {
@@ -109,6 +119,7 @@ type GeminiUsageMetadata struct {
PromptTokenCount int `json:"promptTokenCount"` PromptTokenCount int `json:"promptTokenCount"`
CandidatesTokenCount int `json:"candidatesTokenCount"` CandidatesTokenCount int `json:"candidatesTokenCount"`
TotalTokenCount int `json:"totalTokenCount"` TotalTokenCount int `json:"totalTokenCount"`
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
} }
// Imagen related structs // Imagen related structs

View File

@@ -23,12 +23,14 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
geminiRequest := GeminiChatRequest{ geminiRequest := GeminiChatRequest{
Contents: make([]GeminiChatContent, 0, len(textRequest.Messages)), Contents: make([]GeminiChatContent, 0, len(textRequest.Messages)),
//SafetySettings: []GeminiChatSafetySettings{},
GenerationConfig: GeminiChatGenerationConfig{ GenerationConfig: GeminiChatGenerationConfig{
Temperature: textRequest.Temperature, Temperature: textRequest.Temperature,
TopP: textRequest.TopP, TopP: textRequest.TopP,
MaxOutputTokens: textRequest.MaxTokens, MaxOutputTokens: textRequest.MaxTokens,
Seed: int64(textRequest.Seed), Seed: int64(textRequest.Seed),
ThinkingConfig: &GeminiThinkingConfig{
IncludeThoughts: true,
},
}, },
} }
@@ -39,6 +41,18 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
} }
} }
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
if strings.HasSuffix(info.OriginModelName, "-thinking") {
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
if budgetTokens == 0 || budgetTokens > 24576 {
budgetTokens = 24576
}
geminiRequest.GenerationConfig.ThinkingConfig.SetThinkingBudget(int(budgetTokens))
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
geminiRequest.GenerationConfig.ThinkingConfig.SetThinkingBudget(0)
}
}
safetySettings := make([]GeminiChatSafetySettings, 0, len(SafetySettingList)) safetySettings := make([]GeminiChatSafetySettings, 0, len(SafetySettingList))
for _, category := range SafetySettingList { for _, category := range SafetySettingList {
safetySettings = append(safetySettings, GeminiChatSafetySettings{ safetySettings = append(safetySettings, GeminiChatSafetySettings{
@@ -644,6 +658,7 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom
if geminiResponse.UsageMetadata.TotalTokenCount != 0 { if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
} }
err = helper.ObjectData(c, response) err = helper.ObjectData(c, response)
if err != nil { if err != nil {
@@ -666,7 +681,7 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom
usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
usage.PromptTokensDetails.TextTokens = usage.PromptTokens usage.PromptTokensDetails.TextTokens = usage.PromptTokens
usage.CompletionTokenDetails.TextTokens = usage.CompletionTokens //usage.CompletionTokenDetails.TextTokens = usage.CompletionTokens
if info.ShouldIncludeUsage { if info.ShouldIncludeUsage {
response = helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage) response = helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
@@ -712,6 +727,9 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount, CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount,
TotalTokens: geminiResponse.UsageMetadata.TotalTokenCount, TotalTokens: geminiResponse.UsageMetadata.TotalTokenCount,
} }
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
fullTextResponse.Usage = usage fullTextResponse.Usage = usage
jsonResponse, err := json.Marshal(fullTextResponse) jsonResponse, err := json.Marshal(fullTextResponse)
if err != nil { if err != nil {

View File

@@ -49,11 +49,7 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
} }
} }
if !acceptUnsetRatio { if !acceptUnsetRatio {
if info.UserId == 1 { return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置请联系管理员设置或开始自用模式Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName)
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置请设置或开始自用模式Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName)
} else {
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置, 请联系管理员设置Model %s ratio or price not set, please contact administrator to set", info.OriginModelName, info.OriginModelName)
}
} }
} }
completionRatio = operation_setting.GetCompletionRatio(info.OriginModelName) completionRatio = operation_setting.GetCompletionRatio(info.OriginModelName)
@@ -82,3 +78,15 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
return priceData, nil return priceData, nil
} }
func ContainPriceOrRatio(modelName string) bool {
_, ok := operation_setting.GetModelPrice(modelName, false)
if ok {
return true
}
_, ok = operation_setting.GetModelRatio(modelName)
if ok {
return true
}
return false
}

View File

@@ -9,6 +9,8 @@ type GeminiSettings struct {
SafetySettings map[string]string `json:"safety_settings"` SafetySettings map[string]string `json:"safety_settings"`
VersionSettings map[string]string `json:"version_settings"` VersionSettings map[string]string `json:"version_settings"`
SupportedImagineModels []string `json:"supported_imagine_models"` SupportedImagineModels []string `json:"supported_imagine_models"`
ThinkingAdapterEnabled bool `json:"thinking_adapter_enabled"`
ThinkingAdapterBudgetTokensPercentage float64 `json:"thinking_adapter_budget_tokens_percentage"`
} }
// 默认配置 // 默认配置
@@ -25,6 +27,8 @@ var defaultGeminiSettings = GeminiSettings{
"gemini-2.0-flash-exp-image-generation", "gemini-2.0-flash-exp-image-generation",
"gemini-2.0-flash-exp", "gemini-2.0-flash-exp",
}, },
ThinkingAdapterEnabled: false,
ThinkingAdapterBudgetTokensPercentage: 0.6,
} }
// 全局实例 // 全局实例

View File

@@ -136,6 +136,9 @@ var defaultModelRatio = map[string]float64{
"gemini-2.0-flash": 0.05, "gemini-2.0-flash": 0.05,
"gemini-2.5-pro-exp-03-25": 0.625, "gemini-2.5-pro-exp-03-25": 0.625,
"gemini-2.5-pro-preview-03-25": 0.625, "gemini-2.5-pro-preview-03-25": 0.625,
"gemini-2.5-flash-preview-04-17": 0.075,
"gemini-2.5-flash-preview-04-17-thinking": 0.075,
"gemini-2.5-flash-preview-04-17-nothinking": 0.075,
"text-embedding-004": 0.001, "text-embedding-004": 0.001,
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens "chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens "chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
@@ -275,8 +278,6 @@ func InitModelSettings() {
cacheRatioMapMutex.Lock() cacheRatioMapMutex.Lock()
cacheRatioMap = defaultCacheRatio cacheRatioMap = defaultCacheRatio
cacheRatioMapMutex.Unlock() cacheRatioMapMutex.Unlock()
common.SysLog("model settings initialized")
} }
func GetModelPriceMap() map[string]float64 { func GetModelPriceMap() map[string]float64 {
@@ -459,6 +460,12 @@ func getHardcodedCompletionModelRatio(name string) (float64, bool) {
return 4, true return 4, true
} else if strings.HasPrefix(name, "gemini-2.5-pro-preview") { } else if strings.HasPrefix(name, "gemini-2.5-pro-preview") {
return 8, true return 8, true
} else if strings.HasPrefix(name, "gemini-2.5-flash-preview") {
if strings.HasSuffix(name, "-nothinking") {
return 4, false
} else {
return 3.5 / 0.6, false
}
} }
return 4, false return 4, false
} }

View File

@@ -20,6 +20,8 @@ const ModelSetting = () => {
'global.pass_through_request_enabled': false, 'global.pass_through_request_enabled': false,
'general_setting.ping_interval_enabled': false, 'general_setting.ping_interval_enabled': false,
'general_setting.ping_interval_seconds': 60, 'general_setting.ping_interval_seconds': 60,
'gemini.thinking_adapter_enabled': false,
'gemini.thinking_adapter_budget_tokens_percentage': 0.6,
}); });
let [loading, setLoading] = useState(false); let [loading, setLoading] = useState(false);

View File

@@ -9,6 +9,7 @@ import {
verifyJSON, verifyJSON,
} from '../../../helpers'; } from '../../../helpers';
import { useTranslation } from 'react-i18next'; import { useTranslation } from 'react-i18next';
import Text from '@douyinfe/semi-ui/lib/es/typography/text.js';
const GEMINI_SETTING_EXAMPLE = { const GEMINI_SETTING_EXAMPLE = {
default: 'OFF', default: 'OFF',
@@ -27,6 +28,8 @@ export default function SettingGeminiModel(props) {
'gemini.safety_settings': '', 'gemini.safety_settings': '',
'gemini.version_settings': '', 'gemini.version_settings': '',
'gemini.supported_imagine_models': [], 'gemini.supported_imagine_models': [],
'gemini.thinking_adapter_enabled': false,
'gemini.thinking_adapter_budget_tokens_percentage': 0.6,
}); });
const refForm = useRef(); const refForm = useRef();
const [inputsRow, setInputsRow] = useState(inputs); const [inputsRow, setInputsRow] = useState(inputs);
@@ -151,13 +154,69 @@ export default function SettingGeminiModel(props) {
/> />
</Col> </Col>
</Row> </Row>
</Form.Section>
<Form.Section text={t('Gemini思考适配设置')}>
<Row>
<Col span={16}>
<Text>
{t(
"和Claude不同默认情况下Gemini的思考模型会自动决定要不要思考就算不开启适配模型也可以正常使用" +
"-nothinking后缀BudgetTokens=0思考关闭也会返回少量的思考token这是gemini的特性" +
"如果您需要计费,推荐设置无后缀模型价格按思考价格设置"
)}
</Text>
</Col>
</Row>
<Row>
<Col span={16}>
<Form.Switch
label={t('启用Gemini思考后缀适配')}
field={'gemini.thinking_adapter_enabled'}
extraText={"适配-thinking和-nothinking后缀"}
onChange={(value) =>
setInputs({
...inputs,
'gemini.thinking_adapter_enabled': value,
})
}
/>
</Col>
</Row>
<Row>
<Col span={16}>
<Text>
{t(
'Gemini思考适配 BudgetTokens = MaxTokens * BudgetTokens 百分比',
)}
</Text>
</Col>
</Row>
<Row>
<Col xs={24} sm={12} md={8} lg={8} xl={8}>
<Form.InputNumber
label={t('请求模型带-thinking后缀的BudgetTokens数超出24576的部分将被忽略')}
field={'gemini.thinking_adapter_budget_tokens_percentage'}
initValue={''}
extraText={t('0.1-1之间的小数')}
min={0.1}
max={1}
onChange={(value) =>
setInputs({
...inputs,
'gemini.thinking_adapter_budget_tokens_percentage': value,
})
}
/>
</Col>
</Row>
</Form.Section>
<Row> <Row>
<Button size='default' onClick={onSubmit}> <Button size='default' onClick={onSubmit}>
{t('保存')} {t('保存')}
</Button> </Button>
</Row> </Row>
</Form.Section>
</Form> </Form>
</Spin> </Spin>
</> </>