diff --git a/README.en.md b/README.en.md index 1ebea4e5..dc6696a0 100644 --- a/README.en.md +++ b/README.en.md @@ -59,6 +59,10 @@ 13. 🎵 Added [Suno API](https://github.com/Suno-API/Suno-API) interface support, [Integration Guide](Suno.md) 14. 🔄 Support for Rerank models, compatible with Cohere and Jina, can integrate with Dify, [Integration Guide](Rerank.md) 15. ⚡ **[OpenAI Realtime API](https://platform.openai.com/docs/guides/realtime/integration)** - Support for OpenAI's Realtime API, including Azure channels +16. 🧠 Support for setting reasoning effort through model name suffix: + - Add suffix `-high` to set high reasoning effort (e.g., `o3-mini-high`) + - Add suffix `-medium` to set medium reasoning effort + - Add suffix `-low` to set low reasoning effort ## Model Support This version additionally supports: @@ -84,6 +88,7 @@ You can add custom models gpt-4-gizmo-* in channels. These are third-party model - `GEMINI_VISION_MAX_IMAGE_NUM`: Gemini model maximum image number, default `16`, set to `-1` to disable - `MAX_FILE_DOWNLOAD_MB`: Maximum file download size in MB, default `20` - `CRYPTO_SECRET`: Encryption key for encrypting database content +- `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, if not specified in channel settings, use this version, default `2024-12-01-preview` ## Deployment > [!TIP] diff --git a/README.md b/README.md index 28400480..8ee539ec 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,7 @@ - `GEMINI_VISION_MAX_IMAGE_NUM`:Gemini模型最大图片数量,默认为 `16`,设置为 `-1` 则不限制。 - `MAX_FILE_DOWNLOAD_MB`: 最大文件下载大小,单位 MB,默认为 `20`。 - `CRYPTO_SECRET`:加密密钥,用于加密数据库内容。 +- `AZURE_DEFAULT_API_VERSION`:Azure渠道默认API版本,如果渠道设置中未指定API版本,则使用此版本,默认为 `2024-12-01-preview` ## 部署 > [!TIP] > 最新版Docker镜像:`calciumion/new-api:latest` diff --git a/constant/env.go b/constant/env.go index cd2d71b2..4135e8c7 100644 --- a/constant/env.go +++ b/constant/env.go @@ -21,6 +21,8 @@ var GetMediaTokenNotStream = common.GetEnvOrDefaultBool("GET_MEDIA_TOKEN_NOT_STR var UpdateTask = common.GetEnvOrDefaultBool("UPDATE_TASK", true) +var AzureDefaultAPIVersion = common.GetEnvOrDefaultString("AZURE_DEFAULT_API_VERSION", "2024-12-01-preview") + var GeminiModelMap = map[string]string{ "gemini-1.0-pro": "v1", } diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index e0b762d4..68c36528 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -10,6 +10,7 @@ import ( "mime/multipart" "net/http" "one-api/common" + constant2 "one-api/constant" "one-api/dto" "one-api/relay/channel" "one-api/relay/channel/ai360" @@ -44,16 +45,20 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) { } switch info.ChannelType { case common.ChannelTypeAzure: + apiVersion := info.ApiVersion + if apiVersion == "" { + apiVersion = constant2.AzureDefaultAPIVersion + } // https://learn.microsoft.com/en-us/azure/cognitive-services/openai/chatgpt-quickstart?pivots=rest-api&tabs=command-line#rest-api requestURL := strings.Split(info.RequestURLPath, "?")[0] - requestURL = fmt.Sprintf("%s?api-version=%s", requestURL, info.ApiVersion) + requestURL = fmt.Sprintf("%s?api-version=%s", requestURL, apiVersion) task := strings.TrimPrefix(requestURL, "/v1/") model_ := info.UpstreamModelName model_ = strings.Replace(model_, ".", "", -1) // https://github.com/songquanpeng/one-api/issues/67 requestURL = fmt.Sprintf("/openai/deployments/%s/%s", model_, task) if info.RelayMode == constant.RelayModeRealtime { - requestURL = fmt.Sprintf("/openai/realtime?deployment=%s&api-version=%s", model_, info.ApiVersion) + requestURL = fmt.Sprintf("/openai/realtime?deployment=%s&api-version=%s", model_, apiVersion) } return relaycommon.GetFullRequestURL(info.BaseUrl, requestURL, info.ChannelType), nil case common.ChannelTypeMiniMax: diff --git a/relay/relay-text.go b/relay/relay-text.go index 0cf5dcb4..f303ff6a 100644 --- a/relay/relay-text.go +++ b/relay/relay-text.go @@ -219,7 +219,7 @@ func TextHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) { return openaiErr } - if strings.HasPrefix(relayInfo.UpstreamModelName, "gpt-4o-audio") { + if strings.HasPrefix(relayInfo.RecodeModelName, "gpt-4o-audio") { service.PostAudioConsumeQuota(c, relayInfo, usage.(*dto.Usage), preConsumedQuota, userQuota, modelRatio, groupRatio, modelPrice, getModelPriceSuccess, "") } else { postConsumeQuota(c, relayInfo, relayInfo.RecodeModelName, usage.(*dto.Usage), ratio, preConsumedQuota, userQuota, modelRatio, groupRatio, modelPrice, getModelPriceSuccess, "") diff --git a/service/quota.go b/service/quota.go index 234ddc5b..ab048008 100644 --- a/service/quota.go +++ b/service/quota.go @@ -182,9 +182,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, audioOutTokens := usage.CompletionTokenDetails.AudioTokens tokenName := ctx.GetString("token_name") - completionRatio := common.GetCompletionRatio(relayInfo.UpstreamModelName) - audioRatio := common.GetAudioRatio(relayInfo.UpstreamModelName) - audioCompletionRatio := common.GetAudioCompletionRatio(relayInfo.UpstreamModelName) + completionRatio := common.GetCompletionRatio(relayInfo.RecodeModelName) + audioRatio := common.GetAudioRatio(relayInfo.RecodeModelName) + audioCompletionRatio := common.GetAudioCompletionRatio(relayInfo.RecodeModelName) quotaInfo := QuotaInfo{ InputDetails: TokenDetails{ @@ -195,7 +195,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, TextTokens: textOutTokens, AudioTokens: audioOutTokens, }, - ModelName: relayInfo.UpstreamModelName, + ModelName: relayInfo.RecodeModelName, UsePrice: usePrice, ModelRatio: modelRatio, GroupRatio: groupRatio, @@ -218,7 +218,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, quota = 0 logContent += fmt.Sprintf("(可能是上游超时)") common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+ - "tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.UpstreamModelName, preConsumedQuota)) + "tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.RecodeModelName, preConsumedQuota)) } else { quotaDelta := quota - preConsumedQuota if quotaDelta != 0 { @@ -231,7 +231,7 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota) } - logModel := relayInfo.UpstreamModelName + logModel := relayInfo.RecodeModelName if extraContent != "" { logContent += ", " + extraContent }