From d608a6f12398f2a52617951685c6990877740def Mon Sep 17 00:00:00 2001 From: Akkuman Date: Thu, 29 May 2025 10:56:01 +0800 Subject: [PATCH 1/4] feat: streaming response for tts --- relay/channel/openai/relay-openai.go | 37 ++++++++++------------------ 1 file changed, 13 insertions(+), 24 deletions(-) diff --git a/relay/channel/openai/relay-openai.go b/relay/channel/openai/relay-openai.go index 86c47a15..2e3d8df1 100644 --- a/relay/channel/openai/relay-openai.go +++ b/relay/channel/openai/relay-openai.go @@ -273,36 +273,25 @@ func OpenaiHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayI } func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) { - responseBody, err := io.ReadAll(resp.Body) - if err != nil { - return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil - } - err = resp.Body.Close() - if err != nil { - return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil - } - // Reset response body - resp.Body = io.NopCloser(bytes.NewBuffer(responseBody)) - // We shouldn't set the header before we parse the response body, because the parse part may fail. - // And then we will have to send an error response, but in this case, the header has already been set. - // So the httpClient will be confused by the response. - // For example, Postman will report error, and we cannot check the response at all. + // the status code has been judged before, if there is a body reading failure, + // it should be regarded as a non-recoverable error, so it should not return err for external retry. + // Analogous to nginx's load balancing, it will only retry if it can't be requested or + // if the upstream returns a specific status code, once the upstream has already written the header, + // the subsequent failure of the response body should be regarded as a non-recoverable error, + // and can be terminated directly. + defer resp.Body.Close() + usage := &dto.Usage{} + usage.PromptTokens = info.PromptTokens + usage.TotalTokens = info.PromptTokens for k, v := range resp.Header { c.Writer.Header().Set(k, v[0]) } c.Writer.WriteHeader(resp.StatusCode) - _, err = io.Copy(c.Writer, resp.Body) + c.Writer.WriteHeaderNow() + _, err := io.Copy(c.Writer, resp.Body) if err != nil { - return service.OpenAIErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil + common.LogError(c, err.Error()) } - err = resp.Body.Close() - if err != nil { - return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil - } - - usage := &dto.Usage{} - usage.PromptTokens = info.PromptTokens - usage.TotalTokens = info.PromptTokens return nil, usage } From 1b64db55215bf3fb6e10d69b7da30126ed9d1f5a Mon Sep 17 00:00:00 2001 From: RedwindA <128586631+RedwindA@users.noreply.github.com> Date: Thu, 29 May 2025 12:33:27 +0800 Subject: [PATCH 2/4] Add `ERROR_LOG_ENABLED` description --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a807b07d..5d0014f9 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ New API提供了丰富的功能,详细特性请参考[特性说明](https://do - `AZURE_DEFAULT_API_VERSION`:Azure渠道默认API版本,默认 `2025-04-01-preview` - `NOTIFICATION_LIMIT_DURATION_MINUTE`:通知限制持续时间,默认 `10`分钟 - `NOTIFY_LIMIT_COUNT`:用户通知在指定持续时间内的最大数量,默认 `2` +- `ERROR_LOG_ENABLED=true`: 是否记录并显示错误日志,默认`false` ## 部署 From f907c25b21137e8d7a94caa9a8450913e980b941 Mon Sep 17 00:00:00 2001 From: RedwindA <128586631+RedwindA@users.noreply.github.com> Date: Thu, 29 May 2025 12:35:13 +0800 Subject: [PATCH 3/4] Add `ERROR_LOG_ENABLED` description --- README.en.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.en.md b/README.en.md index 4709bc5b..ad11f386 100644 --- a/README.en.md +++ b/README.en.md @@ -110,6 +110,7 @@ For detailed configuration instructions, please refer to [Installation Guide-Env - `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, default is `2025-04-01-preview` - `NOTIFICATION_LIMIT_DURATION_MINUTE`: Notification limit duration, default is `10` minutes - `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications within the specified duration, default is `2` +- `ERROR_LOG_ENABLED=true`: Whether to record and display error logs, default is `false` ## Deployment From 1c4d7fd84b55519235cd88e48cf14cd383275281 Mon Sep 17 00:00:00 2001 From: xqx121 <78908927+xqx121@users.noreply.github.com> Date: Sat, 31 May 2025 17:50:00 +0800 Subject: [PATCH 4/4] Fix: Gemini2.5pro ThinkingConfig --- relay/channel/gemini/relay-gemini.go | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/relay/channel/gemini/relay-gemini.go b/relay/channel/gemini/relay-gemini.go index da0bc5fc..9ab167b1 100644 --- a/relay/channel/gemini/relay-gemini.go +++ b/relay/channel/gemini/relay-gemini.go @@ -39,15 +39,22 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon } if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { - if strings.HasSuffix(info.OriginModelName, "-thinking") { - budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens) - if budgetTokens == 0 || budgetTokens > 24576 { - budgetTokens = 24576 - } - geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{ - ThinkingBudget: common.GetPointer(int(budgetTokens)), - IncludeThoughts: true, - } + if strings.HasSuffix(info.OriginModelName, "-thinking") { + // 如果模型名以 gemini-2.5-pro 开头,不设置 ThinkingBudget + if strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") { + geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{ + IncludeThoughts: true, + } + } else { + budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens) + if budgetTokens == 0 || budgetTokens > 24576 { + budgetTokens = 24576 + } + geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{ + ThinkingBudget: common.GetPointer(int(budgetTokens)), + IncludeThoughts: true, + } + } } else if strings.HasSuffix(info.OriginModelName, "-nothinking") { geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{ ThinkingBudget: common.GetPointer(0),