From 414b959e6555332a093062dc9056a9c9b43249c9 Mon Sep 17 00:00:00 2001 From: somnifex <98788152+somnifex@users.noreply.github.com> Date: Tue, 16 Sep 2025 08:51:29 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E6=B7=BB=E5=8A=A0=E5=AF=B9Thinking?= =?UTF-8?q?=E5=AD=97=E6=AE=B5=E7=9A=84=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91?= =?UTF-8?q?=EF=BC=8C=E7=A1=AE=E4=BF=9D=E6=8E=A8=E7=90=86=E5=86=85=E5=AE=B9?= =?UTF-8?q?=E6=AD=A3=E7=A1=AE=E4=BC=A0=E9=80=92?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- relay/channel/ollama/stream.go | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go index ad12e7f8..cea45844 100644 --- a/relay/channel/ollama/stream.go +++ b/relay/channel/ollama/stream.go @@ -26,6 +26,7 @@ type ollamaChatStreamChunk struct { Message *struct { Role string `json:"role"` Content string `json:"content"` + Thinking json.RawMessage `json:"thinking"` ToolCalls []struct { Function struct { Name string `json:"name"` @@ -41,7 +42,6 @@ type ollamaChatStreamChunk struct { LoadDuration int64 `json:"load_duration"` PromptEvalCount int `json:"prompt_eval_count"` EvalCount int `json:"eval_count"` - // generate mode may use these PromptEvalDuration int64 `json:"prompt_eval_duration"` EvalDuration int64 `json:"eval_duration"` } @@ -95,13 +95,18 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http } }, } if content != "" { delta.Choices[0].Delta.SetContentString(content) } + if chunk.Message != nil && len(chunk.Message.Thinking) > 0 { + raw := strings.TrimSpace(string(chunk.Message.Thinking)) + if raw != "" && raw != "null" { delta.Choices[0].Delta.SetReasoningContent(raw) } + } // tool calls if chunk.Message != nil && len(chunk.Message.ToolCalls) > 0 { delta.Choices[0].Delta.ToolCalls = make([]dto.ToolCallResponse,0,len(chunk.Message.ToolCalls)) for _, tc := range chunk.Message.ToolCalls { // arguments -> string argBytes, _ := json.Marshal(tc.Function.Arguments) - tr := dto.ToolCallResponse{ID:"", Type:nil, Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}} + toolId := fmt.Sprintf("call_%d", toolCallIndex) + tr := dto.ToolCallResponse{ID:toolId, Type:"function", Function: dto.FunctionResponse{Name: tc.Function.Name, Arguments: string(argBytes)}} tr.SetIndex(toolCallIndex) toolCallIndex++ delta.Choices[0].Delta.ToolCalls = append(delta.Choices[0].Delta.ToolCalls, tr) @@ -115,8 +120,8 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http usage.PromptTokens = chunk.PromptEvalCount usage.CompletionTokens = chunk.EvalCount usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens - finishReason := chunk.DoneReason - if finishReason == "" { finishReason = "stop" } + finishReason := chunk.DoneReason + if finishReason == "" { finishReason = "stop" } // emit stop delta if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil { if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } @@ -144,6 +149,7 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R lines := strings.Split(raw, "\n") var ( aggContent strings.Builder + reasoningBuilder strings.Builder lastChunk ollamaChatStreamChunk parsedAny bool ) @@ -157,18 +163,21 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R } parsedAny = true lastChunk = ck - if !ck.Done { - if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } - } else { - if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } + if ck.Message != nil && len(ck.Message.Thinking) > 0 { + raw := strings.TrimSpace(string(ck.Message.Thinking)) + if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } } + if ck.Message != nil && ck.Message.Content != "" { aggContent.WriteString(ck.Message.Content) } else if ck.Response != "" { aggContent.WriteString(ck.Response) } } if !parsedAny { var single ollamaChatStreamChunk if err := json.Unmarshal(body, &single); err != nil { return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError) } lastChunk = single - if single.Message != nil { aggContent.WriteString(single.Message.Content) } else { aggContent.WriteString(single.Response) } + if single.Message != nil { + if len(single.Message.Thinking) > 0 { raw := strings.TrimSpace(string(single.Message.Thinking)); if raw != "" && raw != "null" { reasoningBuilder.WriteString(raw) } } + aggContent.WriteString(single.Message.Content) + } else { aggContent.WriteString(single.Response) } } model := lastChunk.Model @@ -179,6 +188,8 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R finishReason := lastChunk.DoneReason if finishReason == "" { finishReason = "stop" } + msg := dto.Message{Role: "assistant", Content: contentPtr(content)} + if rc := reasoningBuilder.String(); rc != "" { msg.ReasoningContent = &rc } full := dto.OpenAITextResponse{ Id: common.GetUUID(), Model: model, @@ -186,7 +197,7 @@ func ollamaChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R Created: created, Choices: []dto.OpenAITextResponseChoice{ { Index: 0, - Message: dto.Message{Role: "assistant", Content: contentPtr(content)}, + Message: msg, FinishReason: finishReason, } }, Usage: *usage,