diff --git a/relay/channel/ollama/stream.go b/relay/channel/ollama/stream.go index d5b104d6..4e17f12d 100644 --- a/relay/channel/ollama/stream.go +++ b/relay/channel/ollama/stream.go @@ -114,15 +114,23 @@ func ollamaStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http continue } // done frame + // finalize once and break loop usage.PromptTokens = chunk.PromptEvalCount usage.CompletionTokens = chunk.EvalCount usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens finishReason := chunk.DoneReason if finishReason == "" { finishReason = "stop" } - stop := helper.GenerateStopResponse(responseId, created, model, finishReason) - if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } - final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage) - if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) } + // emit stop delta + if stop := helper.GenerateStopResponse(responseId, created, model, finishReason); stop != nil { + if data, err := common.Marshal(stop); err == nil { _ = helper.StringData(c, string(data)) } + } + // emit usage frame + if final := helper.GenerateFinalUsageResponse(responseId, created, model, *usage); final != nil { + if data, err := common.Marshal(final); err == nil { _ = helper.StringData(c, string(data)) } + } + // send [DONE] + helper.Done(c) + break } if err := scanner.Err(); err != nil && err != io.EOF { logger.LogError(c, "ollama stream scan error: "+err.Error()) } return usage, nil