From d5746ac347ea337385fc975507ce6803b071441d Mon Sep 17 00:00:00 2001 From: "1808837298@qq.com" <1808837298@qq.com> Date: Mon, 3 Feb 2025 14:22:34 +0800 Subject: [PATCH] feat: add reasoning effort configuration for models - Support setting reasoning effort via model name suffix - Add `-high`, `-medium`, and `-low` suffixes to control reasoning effort - Update README with new model configuration option - Modify OpenAI adaptor to handle reasoning effort settings --- README.md | 8 ++++++-- common/model-ratio.go | 30 ++++++++++++++++++------------ relay/channel/openai/adaptor.go | 7 +++++++ relay/channel/openai/constant.go | 3 +++ 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 8e868061..28400480 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,10 @@ 14. 🔄 支持Rerank模型,目前兼容Cohere和Jina,可接入Dify,[对接文档](Rerank.md) 15. ⚡ **[OpenAI Realtime API](https://platform.openai.com/docs/guides/realtime/integration)** - 支持OpenAI的Realtime API,支持Azure渠道 16. 支持使用路由/chat2link 进入聊天界面 +17. 🧠 支持通过模型名称后缀设置 reasoning effort: + - 添加后缀 `-high` 设置为 high reasoning effort (例如: `o3-mini-high`) + - 添加后缀 `-medium` 设置为 medium reasoning effort (例如: `o3-mini-medium`) + - 添加后缀 `-low` 设置为 low reasoning effort (例如: `o3-mini-low`) ## 模型支持 此版本额外支持以下模型: @@ -85,8 +89,8 @@ - `GET_MEDIA_TOKEN`:是否统计图片token,默认为 `true`,关闭后将不再在本地计算图片token,可能会导致和上游计费不同,此项覆盖 `GET_MEDIA_TOKEN_NOT_STREAM` 选项作用。 - `GET_MEDIA_TOKEN_NOT_STREAM`:是否在非流(`stream=false`)情况下统计图片token,默认为 `true`。 - `UPDATE_TASK`:是否更新异步任务(Midjourney、Suno),默认为 `true`,关闭后将不会更新任务进度。 -- `GEMINI_MODEL_MAP`:Gemini模型指定版本(v1/v1beta),使用“模型:版本”指定,","分隔,例如:-e GEMINI_MODEL_MAP="gemini-1.5-pro-latest:v1beta,gemini-1.5-pro-001:v1beta",为空则使用默认配置(v1beta) -- `COHERE_SAFETY_SETTING`:Cohere模型[安全设置](https://docs.cohere.com/docs/safety-modes#overview),可选值为 `NONE`, `CONTEXTUAL`,`STRICT`,默认为 `NONE`。 +- `GEMINI_MODEL_MAP`:Gemini模型指定版本(v1/v1beta),使用"模型:版本"指定,","分隔,例如:-e GEMINI_MODEL_MAP="gemini-1.5-pro-latest:v1beta,gemini-1.5-pro-001:v1beta",为空则使用默认配置(v1beta) +- `COHERE_SAFETY_SETTING`:Cohere模型[安全设置](https://docs.cohere.com/docs/safety-modes#overview),可选值为 `NONE`, `CONTEXTUAL`, `STRICT`,默认为 `NONE`。 - `GEMINI_VISION_MAX_IMAGE_NUM`:Gemini模型最大图片数量,默认为 `16`,设置为 `-1` 则不限制。 - `MAX_FILE_DOWNLOAD_MB`: 最大文件下载大小,单位 MB,默认为 `20`。 - `CRYPTO_SECRET`:加密密钥,用于加密数据库内容。 diff --git a/common/model-ratio.go b/common/model-ratio.go index a0fb3383..e3482ba1 100644 --- a/common/model-ratio.go +++ b/common/model-ratio.go @@ -50,18 +50,24 @@ var defaultModelRatio = map[string]float64{ "gpt-4o-realtime-preview-2024-12-17": 2.5, "gpt-4o-mini-realtime-preview": 0.3, "gpt-4o-mini-realtime-preview-2024-12-17": 0.3, - "o1": 7.5, - "o1-2024-12-17": 7.5, - "o1-preview": 7.5, - "o1-preview-2024-09-12": 7.5, - "o1-mini": 0.55, - "o1-mini-2024-09-12": 0.55, - "o3-mini": 0.55, - "o3-mini-2025-01-31": 0.55, - "gpt-4o-mini": 0.075, - "gpt-4o-mini-2024-07-18": 0.075, - "gpt-4-turbo": 5, // $0.01 / 1K tokens - "gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens + "o1": 7.5, + "o1-2024-12-17": 7.5, + "o1-preview": 7.5, + "o1-preview-2024-09-12": 7.5, + "o1-mini": 0.55, + "o1-mini-2024-09-12": 0.55, + "o3-mini": 0.55, + "o3-mini-2025-01-31": 0.55, + "o3-mini-high": 0.55, + "o3-mini-2025-01-31-high": 0.55, + "o3-mini-low": 0.55, + "o3-mini-2025-01-31-low": 0.55, + "o3-mini-medium": 0.55, + "o3-mini-2025-01-31-medium": 0.55, + "gpt-4o-mini": 0.075, + "gpt-4o-mini-2024-07-18": 0.075, + "gpt-4-turbo": 5, // $0.01 / 1K tokens + "gpt-4-turbo-2024-04-09": 5, // $0.01 / 1K tokens //"gpt-3.5-turbo-0301": 0.75, //deprecated "gpt-3.5-turbo": 0.25, "gpt-3.5-turbo-0613": 0.75, diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index 4557a6f1..2de611b7 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -117,6 +117,13 @@ func (a *Adaptor) ConvertRequest(c *gin.Context, info *relaycommon.RelayInfo, re if strings.HasPrefix(request.Model, "o3") { request.Temperature = nil } + if strings.HasSuffix(request.Model, "high") { + request.ReasoningEffort = "high" + } else if strings.HasSuffix(request.Model, "low") { + request.ReasoningEffort = "low" + } else if strings.HasSuffix(request.Model, "medium") { + request.ReasoningEffort = "medium" + } } if request.Model == "o1" || request.Model == "o1-2024-12-17" || strings.HasPrefix(request.Model, "o3") { //修改第一个Message的内容,将system改为developer diff --git a/relay/channel/openai/constant.go b/relay/channel/openai/constant.go index 0e598fd8..d55242ed 100644 --- a/relay/channel/openai/constant.go +++ b/relay/channel/openai/constant.go @@ -14,6 +14,9 @@ var ModelList = []string{ "o1-preview", "o1-preview-2024-09-12", "o1-mini", "o1-mini-2024-09-12", "o3-mini", "o3-mini-2025-01-31", + "o3-mini-high", "o3-mini-2025-01-31-high", + "o3-mini-low", "o3-mini-2025-01-31-low", + "o3-mini-medium", "o3-mini-2025-01-31-medium", "o1", "o1-2024-12-17", "gpt-4o-audio-preview", "gpt-4o-audio-preview-2024-10-01", "gpt-4o-realtime-preview", "gpt-4o-realtime-preview-2024-10-01", "gpt-4o-realtime-preview-2024-12-17",