Merge pull request #1248 from RedwindA/update-gemini-ratio
feat(model-ratio): add default ratios for new Gemini models and refine flash model handling
This commit is contained in:
@@ -103,7 +103,6 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
|||||||
isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
||||||
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
|
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
|
||||||
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
|
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
|
||||||
is25FlashLite := strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
|
|
||||||
|
|
||||||
if strings.Contains(modelName, "-thinking-") {
|
if strings.Contains(modelName, "-thinking-") {
|
||||||
parts := strings.SplitN(modelName, "-thinking-", 2)
|
parts := strings.SplitN(modelName, "-thinking-", 2)
|
||||||
@@ -142,7 +141,7 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if strings.HasSuffix(modelName, "-nothinking") {
|
} else if strings.HasSuffix(modelName, "-nothinking") {
|
||||||
if !isNew25Pro && !is25FlashLite {
|
if !isNew25Pro {
|
||||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||||
ThinkingBudget: common.GetPointer(0),
|
ThinkingBudget: common.GetPointer(0),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ const (
|
|||||||
const (
|
const (
|
||||||
// Gemini Audio Input Price
|
// Gemini Audio Input Price
|
||||||
Gemini25FlashPreviewInputAudioPrice = 1.00
|
Gemini25FlashPreviewInputAudioPrice = 1.00
|
||||||
|
Gemini25FlashProductionInputAudioPrice = 1.00 // for `gemini-2.5-flash`
|
||||||
|
Gemini25FlashLitePreviewInputAudioPrice = 0.50
|
||||||
Gemini25FlashNativeAudioInputAudioPrice = 3.00
|
Gemini25FlashNativeAudioInputAudioPrice = 3.00
|
||||||
Gemini20FlashInputAudioPrice = 0.70
|
Gemini20FlashInputAudioPrice = 0.70
|
||||||
)
|
)
|
||||||
@@ -64,10 +66,14 @@ func GetFileSearchPricePerThousand() float64 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GetGeminiInputAudioPricePerMillionTokens(modelName string) float64 {
|
func GetGeminiInputAudioPricePerMillionTokens(modelName string) float64 {
|
||||||
if strings.HasPrefix(modelName, "gemini-2.5-flash-preview") {
|
if strings.HasPrefix(modelName, "gemini-2.5-flash-preview-native-audio") {
|
||||||
return Gemini25FlashPreviewInputAudioPrice
|
|
||||||
} else if strings.HasPrefix(modelName, "gemini-2.5-flash-preview-native-audio") {
|
|
||||||
return Gemini25FlashNativeAudioInputAudioPrice
|
return Gemini25FlashNativeAudioInputAudioPrice
|
||||||
|
} else if strings.HasPrefix(modelName, "gemini-2.5-flash-preview-lite") {
|
||||||
|
return Gemini25FlashLitePreviewInputAudioPrice
|
||||||
|
} else if strings.HasPrefix(modelName, "gemini-2.5-flash-preview") {
|
||||||
|
return Gemini25FlashPreviewInputAudioPrice
|
||||||
|
} else if strings.HasPrefix(modelName, "gemini-2.5-flash") {
|
||||||
|
return Gemini25FlashProductionInputAudioPrice
|
||||||
} else if strings.HasPrefix(modelName, "gemini-2.0-flash") {
|
} else if strings.HasPrefix(modelName, "gemini-2.0-flash") {
|
||||||
return Gemini20FlashInputAudioPrice
|
return Gemini20FlashInputAudioPrice
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -140,6 +140,7 @@ var defaultModelRatio = map[string]float64{
|
|||||||
"gemini-2.0-flash": 0.05,
|
"gemini-2.0-flash": 0.05,
|
||||||
"gemini-2.5-pro-exp-03-25": 0.625,
|
"gemini-2.5-pro-exp-03-25": 0.625,
|
||||||
"gemini-2.5-pro-preview-03-25": 0.625,
|
"gemini-2.5-pro-preview-03-25": 0.625,
|
||||||
|
"gemini-2.5-pro": 0.625,
|
||||||
"gemini-2.5-flash-preview-04-17": 0.075,
|
"gemini-2.5-flash-preview-04-17": 0.075,
|
||||||
"gemini-2.5-flash-preview-04-17-thinking": 0.075,
|
"gemini-2.5-flash-preview-04-17-thinking": 0.075,
|
||||||
"gemini-2.5-flash-preview-04-17-nothinking": 0.075,
|
"gemini-2.5-flash-preview-04-17-nothinking": 0.075,
|
||||||
@@ -148,6 +149,8 @@ var defaultModelRatio = map[string]float64{
|
|||||||
"gemini-2.5-flash-preview-05-20-nothinking": 0.075,
|
"gemini-2.5-flash-preview-05-20-nothinking": 0.075,
|
||||||
"gemini-2.5-flash-thinking-*": 0.075, // 用于为后续所有2.5 flash thinking budget 模型设置默认倍率
|
"gemini-2.5-flash-thinking-*": 0.075, // 用于为后续所有2.5 flash thinking budget 模型设置默认倍率
|
||||||
"gemini-2.5-pro-thinking-*": 0.625, // 用于为后续所有2.5 pro thinking budget 模型设置默认倍率
|
"gemini-2.5-pro-thinking-*": 0.625, // 用于为后续所有2.5 pro thinking budget 模型设置默认倍率
|
||||||
|
"gemini-2.5-flash-lite-preview-06-17": 0.05,
|
||||||
|
"gemini-2.5-flash": 0.15,
|
||||||
"text-embedding-004": 0.001,
|
"text-embedding-004": 0.001,
|
||||||
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
|
"chatglm_turbo": 0.3572, // ¥0.005 / 1k tokens
|
||||||
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
|
"chatglm_pro": 0.7143, // ¥0.01 / 1k tokens
|
||||||
@@ -423,7 +426,12 @@ func UpdateCompletionRatioByJSONString(jsonStr string) error {
|
|||||||
func GetCompletionRatio(name string) float64 {
|
func GetCompletionRatio(name string) float64 {
|
||||||
CompletionRatioMutex.RLock()
|
CompletionRatioMutex.RLock()
|
||||||
defer CompletionRatioMutex.RUnlock()
|
defer CompletionRatioMutex.RUnlock()
|
||||||
|
if strings.HasPrefix(name, "gpt-4-gizmo") {
|
||||||
|
name = "gpt-4-gizmo-*"
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(name, "gpt-4o-gizmo") {
|
||||||
|
name = "gpt-4o-gizmo-*"
|
||||||
|
}
|
||||||
if strings.Contains(name, "/") {
|
if strings.Contains(name, "/") {
|
||||||
if ratio, ok := CompletionRatio[name]; ok {
|
if ratio, ok := CompletionRatio[name]; ok {
|
||||||
return ratio
|
return ratio
|
||||||
@@ -441,12 +449,6 @@ func GetCompletionRatio(name string) float64 {
|
|||||||
|
|
||||||
func getHardcodedCompletionModelRatio(name string) (float64, bool) {
|
func getHardcodedCompletionModelRatio(name string) (float64, bool) {
|
||||||
lowercaseName := strings.ToLower(name)
|
lowercaseName := strings.ToLower(name)
|
||||||
if strings.HasPrefix(name, "gpt-4-gizmo") {
|
|
||||||
name = "gpt-4-gizmo-*"
|
|
||||||
}
|
|
||||||
if strings.HasPrefix(name, "gpt-4o-gizmo") {
|
|
||||||
name = "gpt-4o-gizmo-*"
|
|
||||||
}
|
|
||||||
if strings.HasPrefix(name, "gpt-4") && !strings.HasSuffix(name, "-all") && !strings.HasSuffix(name, "-gizmo-*") {
|
if strings.HasPrefix(name, "gpt-4") && !strings.HasSuffix(name, "-all") && !strings.HasSuffix(name, "-gizmo-*") {
|
||||||
if strings.HasPrefix(name, "gpt-4o") {
|
if strings.HasPrefix(name, "gpt-4o") {
|
||||||
if name == "gpt-4o-2024-05-13" {
|
if name == "gpt-4o-2024-05-13" {
|
||||||
@@ -500,12 +502,17 @@ func getHardcodedCompletionModelRatio(name string) (float64, bool) {
|
|||||||
return 4, true
|
return 4, true
|
||||||
} else if strings.HasPrefix(name, "gemini-2.5-pro") { // 移除preview来增加兼容性,这里假设正式版的倍率和preview一致
|
} else if strings.HasPrefix(name, "gemini-2.5-pro") { // 移除preview来增加兼容性,这里假设正式版的倍率和preview一致
|
||||||
return 8, true
|
return 8, true
|
||||||
} else if strings.HasPrefix(name, "gemini-2.5-flash") { // 同上
|
} else if strings.HasPrefix(name, "gemini-2.5-flash") { // 处理不同的flash模型倍率
|
||||||
if strings.HasSuffix(name, "-nothinking") {
|
if strings.HasPrefix(name, "gemini-2.5-flash-preview") {
|
||||||
return 4, false
|
if strings.HasSuffix(name, "-nothinking") {
|
||||||
} else {
|
return 4, true
|
||||||
return 3.5 / 0.6, false
|
}
|
||||||
|
return 3.5 / 0.15, true
|
||||||
}
|
}
|
||||||
|
if strings.HasPrefix(name, "gemini-2.5-flash-lite-preview") {
|
||||||
|
return 4, true
|
||||||
|
}
|
||||||
|
return 2.5 / 0.3, true
|
||||||
}
|
}
|
||||||
return 4, false
|
return 4, false
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user