Merge remote-tracking branch 'origin/main' into ui/refactor
This commit is contained in:
@@ -110,6 +110,7 @@ For detailed configuration instructions, please refer to [Installation Guide-Env
|
|||||||
- `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, default is `2025-04-01-preview`
|
- `AZURE_DEFAULT_API_VERSION`: Azure channel default API version, default is `2025-04-01-preview`
|
||||||
- `NOTIFICATION_LIMIT_DURATION_MINUTE`: Notification limit duration, default is `10` minutes
|
- `NOTIFICATION_LIMIT_DURATION_MINUTE`: Notification limit duration, default is `10` minutes
|
||||||
- `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications within the specified duration, default is `2`
|
- `NOTIFY_LIMIT_COUNT`: Maximum number of user notifications within the specified duration, default is `2`
|
||||||
|
- `ERROR_LOG_ENABLED=true`: Whether to record and display error logs, default is `false`
|
||||||
|
|
||||||
## Deployment
|
## Deployment
|
||||||
|
|
||||||
|
|||||||
@@ -110,6 +110,7 @@ New API提供了丰富的功能,详细特性请参考[特性说明](https://do
|
|||||||
- `AZURE_DEFAULT_API_VERSION`:Azure渠道默认API版本,默认 `2025-04-01-preview`
|
- `AZURE_DEFAULT_API_VERSION`:Azure渠道默认API版本,默认 `2025-04-01-preview`
|
||||||
- `NOTIFICATION_LIMIT_DURATION_MINUTE`:通知限制持续时间,默认 `10`分钟
|
- `NOTIFICATION_LIMIT_DURATION_MINUTE`:通知限制持续时间,默认 `10`分钟
|
||||||
- `NOTIFY_LIMIT_COUNT`:用户通知在指定持续时间内的最大数量,默认 `2`
|
- `NOTIFY_LIMIT_COUNT`:用户通知在指定持续时间内的最大数量,默认 `2`
|
||||||
|
- `ERROR_LOG_ENABLED=true`: 是否记录并显示错误日志,默认`false`
|
||||||
|
|
||||||
## 部署
|
## 部署
|
||||||
|
|
||||||
|
|||||||
@@ -39,15 +39,22 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
|||||||
}
|
}
|
||||||
|
|
||||||
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
||||||
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
||||||
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
// 如果模型名以 gemini-2.5-pro 开头,不设置 ThinkingBudget
|
||||||
if budgetTokens == 0 || budgetTokens > 24576 {
|
if strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") {
|
||||||
budgetTokens = 24576
|
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||||
}
|
IncludeThoughts: true,
|
||||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
}
|
||||||
ThinkingBudget: common.GetPointer(int(budgetTokens)),
|
} else {
|
||||||
IncludeThoughts: true,
|
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
||||||
}
|
if budgetTokens == 0 || budgetTokens > 24576 {
|
||||||
|
budgetTokens = 24576
|
||||||
|
}
|
||||||
|
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||||
|
ThinkingBudget: common.GetPointer(int(budgetTokens)),
|
||||||
|
IncludeThoughts: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
|
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
|
||||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||||
ThinkingBudget: common.GetPointer(0),
|
ThinkingBudget: common.GetPointer(0),
|
||||||
|
|||||||
@@ -273,36 +273,25 @@ func OpenaiHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayI
|
|||||||
}
|
}
|
||||||
|
|
||||||
func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
|
func OpenaiTTSHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
|
||||||
responseBody, err := io.ReadAll(resp.Body)
|
// the status code has been judged before, if there is a body reading failure,
|
||||||
if err != nil {
|
// it should be regarded as a non-recoverable error, so it should not return err for external retry.
|
||||||
return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
|
// Analogous to nginx's load balancing, it will only retry if it can't be requested or
|
||||||
}
|
// if the upstream returns a specific status code, once the upstream has already written the header,
|
||||||
err = resp.Body.Close()
|
// the subsequent failure of the response body should be regarded as a non-recoverable error,
|
||||||
if err != nil {
|
// and can be terminated directly.
|
||||||
return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
|
defer resp.Body.Close()
|
||||||
}
|
usage := &dto.Usage{}
|
||||||
// Reset response body
|
usage.PromptTokens = info.PromptTokens
|
||||||
resp.Body = io.NopCloser(bytes.NewBuffer(responseBody))
|
usage.TotalTokens = info.PromptTokens
|
||||||
// We shouldn't set the header before we parse the response body, because the parse part may fail.
|
|
||||||
// And then we will have to send an error response, but in this case, the header has already been set.
|
|
||||||
// So the httpClient will be confused by the response.
|
|
||||||
// For example, Postman will report error, and we cannot check the response at all.
|
|
||||||
for k, v := range resp.Header {
|
for k, v := range resp.Header {
|
||||||
c.Writer.Header().Set(k, v[0])
|
c.Writer.Header().Set(k, v[0])
|
||||||
}
|
}
|
||||||
c.Writer.WriteHeader(resp.StatusCode)
|
c.Writer.WriteHeader(resp.StatusCode)
|
||||||
_, err = io.Copy(c.Writer, resp.Body)
|
c.Writer.WriteHeaderNow()
|
||||||
|
_, err := io.Copy(c.Writer, resp.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return service.OpenAIErrorWrapper(err, "copy_response_body_failed", http.StatusInternalServerError), nil
|
common.LogError(c, err.Error())
|
||||||
}
|
}
|
||||||
err = resp.Body.Close()
|
|
||||||
if err != nil {
|
|
||||||
return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
|
|
||||||
}
|
|
||||||
|
|
||||||
usage := &dto.Usage{}
|
|
||||||
usage.PromptTokens = info.PromptTokens
|
|
||||||
usage.TotalTokens = info.PromptTokens
|
|
||||||
return nil, usage
|
return nil, usage
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user