Merge branch 'main' into feat-04

This commit is contained in:
neotf
2025-06-11 13:55:47 +08:00
committed by GitHub
177 changed files with 25105 additions and 11236 deletions

View File

@@ -31,6 +31,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
switch info.RelayMode {
case constant.RelayModeEmbeddings:
fullRequestURL = fmt.Sprintf("%s/api/v1/services/embeddings/text-embedding/text-embedding", info.BaseUrl)
case constant.RelayModeRerank:
fullRequestURL = fmt.Sprintf("%s/api/v1/services/rerank/text-rerank/text-rerank", info.BaseUrl)
case constant.RelayModeImagesGenerations:
fullRequestURL = fmt.Sprintf("%s/api/v1/services/aigc/text2image/image-synthesis", info.BaseUrl)
case constant.RelayModeCompletions:
@@ -76,7 +78,7 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
}
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
return nil, errors.New("not implemented")
return ConvertRerankRequest(request), nil
}
func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
@@ -103,6 +105,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
err, usage = aliImageHandler(c, resp, info)
case constant.RelayModeEmbeddings:
err, usage = aliEmbeddingHandler(c, resp)
case constant.RelayModeRerank:
err, usage = RerankHandler(c, resp, info)
default:
if info.IsStream {
err, usage = openai.OaiStreamHandler(c, resp, info)

View File

@@ -8,6 +8,7 @@ var ModelList = []string{
"qwq-32b",
"qwen3-235b-a22b",
"text-embedding-v1",
"gte-rerank-v2",
}
var ChannelName = "ali"

View File

@@ -1,5 +1,7 @@
package ali
import "one-api/dto"
type AliMessage struct {
Content string `json:"content"`
Role string `json:"role"`
@@ -97,3 +99,28 @@ type AliImageRequest struct {
} `json:"parameters,omitempty"`
ResponseFormat string `json:"response_format,omitempty"`
}
type AliRerankParameters struct {
TopN *int `json:"top_n,omitempty"`
ReturnDocuments *bool `json:"return_documents,omitempty"`
}
type AliRerankInput struct {
Query string `json:"query"`
Documents []any `json:"documents"`
}
type AliRerankRequest struct {
Model string `json:"model"`
Input AliRerankInput `json:"input"`
Parameters AliRerankParameters `json:"parameters,omitempty"`
}
type AliRerankResponse struct {
Output struct {
Results []dto.RerankResponseResult `json:"results"`
} `json:"output"`
Usage AliUsage `json:"usage"`
RequestId string `json:"request_id"`
AliError
}

View File

@@ -0,0 +1,83 @@
package ali
import (
"encoding/json"
"io"
"net/http"
"one-api/dto"
relaycommon "one-api/relay/common"
"one-api/service"
"github.com/gin-gonic/gin"
)
func ConvertRerankRequest(request dto.RerankRequest) *AliRerankRequest {
returnDocuments := request.ReturnDocuments
if returnDocuments == nil {
t := true
returnDocuments = &t
}
return &AliRerankRequest{
Model: request.Model,
Input: AliRerankInput{
Query: request.Query,
Documents: request.Documents,
},
Parameters: AliRerankParameters{
TopN: &request.TopN,
ReturnDocuments: returnDocuments,
},
}
}
func RerankHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
responseBody, err := io.ReadAll(resp.Body)
if err != nil {
return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
}
err = resp.Body.Close()
if err != nil {
return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
}
var aliResponse AliRerankResponse
err = json.Unmarshal(responseBody, &aliResponse)
if err != nil {
return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
}
if aliResponse.Code != "" {
return &dto.OpenAIErrorWithStatusCode{
Error: dto.OpenAIError{
Message: aliResponse.Message,
Type: aliResponse.Code,
Param: aliResponse.RequestId,
Code: aliResponse.Code,
},
StatusCode: resp.StatusCode,
}, nil
}
usage := dto.Usage{
PromptTokens: aliResponse.Usage.TotalTokens,
CompletionTokens: 0,
TotalTokens: aliResponse.Usage.TotalTokens,
}
rerankResponse := dto.RerankResponse{
Results: aliResponse.Output.Results,
Usage: usage,
}
jsonResponse, err := json.Marshal(rerankResponse)
if err != nil {
return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
}
c.Writer.Header().Set("Content-Type", "application/json")
c.Writer.WriteHeader(resp.StatusCode)
_, err = c.Writer.Write(jsonResponse)
if err != nil {
return service.OpenAIErrorWrapper(err, "write_response_body_failed", http.StatusInternalServerError), nil
}
return nil, &usage
}

View File

@@ -3,7 +3,6 @@ package ali
import (
"bufio"
"encoding/json"
"github.com/gin-gonic/gin"
"io"
"net/http"
"one-api/common"
@@ -11,6 +10,8 @@ import (
"one-api/relay/helper"
"one-api/service"
"strings"
"github.com/gin-gonic/gin"
)
// https://help.aliyun.com/document_detail/613695.html?spm=a2c4g.2399480.0.0.1adb778fAdzP9w#341800c0f8w0r
@@ -27,9 +28,6 @@ func requestOpenAI2Ali(request dto.GeneralOpenAIRequest) *dto.GeneralOpenAIReque
}
func embeddingRequestOpenAI2Ali(request dto.EmbeddingRequest) *AliEmbeddingRequest {
if request.Model == "" {
request.Model = "text-embedding-v1"
}
return &AliEmbeddingRequest{
Model: request.Model,
Input: struct {
@@ -64,7 +62,11 @@ func aliEmbeddingHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorW
}, nil
}
fullTextResponse := embeddingResponseAli2OpenAI(&aliResponse)
model := c.GetString("model")
if model == "" {
model = "text-embedding-v4"
}
fullTextResponse := embeddingResponseAli2OpenAI(&aliResponse, model)
jsonResponse, err := json.Marshal(fullTextResponse)
if err != nil {
return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
@@ -75,11 +77,11 @@ func aliEmbeddingHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorW
return nil, &fullTextResponse.Usage
}
func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse) *dto.OpenAIEmbeddingResponse {
func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse, model string) *dto.OpenAIEmbeddingResponse {
openAIEmbeddingResponse := dto.OpenAIEmbeddingResponse{
Object: "list",
Data: make([]dto.OpenAIEmbeddingResponseItem, 0, len(response.Output.Embeddings)),
Model: "text-embedding-v1",
Model: model,
Usage: dto.Usage{TotalTokens: response.Usage.TotalTokens},
}
@@ -94,12 +96,11 @@ func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse) *dto.OpenAIEmbe
}
func responseAli2OpenAI(response *AliResponse) *dto.OpenAITextResponse {
content, _ := json.Marshal(response.Output.Text)
choice := dto.OpenAITextResponseChoice{
Index: 0,
Message: dto.Message{
Role: "assistant",
Content: content,
Content: response.Output.Text,
},
FinishReason: response.Output.FinishReason,
}

View File

@@ -109,6 +109,12 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
gopool.Go(func() {
defer func() {
// 增加panic恢复处理
if r := recover(); r != nil {
if common2.DebugEnabled {
println("SSE ping goroutine panic recovered:", fmt.Sprintf("%v", r))
}
}
if common2.DebugEnabled {
println("SSE ping goroutine stopped.")
}
@@ -119,19 +125,32 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
}
ticker := time.NewTicker(pingInterval)
// 退出时清理 ticker
defer ticker.Stop()
// 确保在任何情况下都清理ticker
defer func() {
ticker.Stop()
if common2.DebugEnabled {
println("SSE ping ticker stopped")
}
}()
var pingMutex sync.Mutex
if common2.DebugEnabled {
println("SSE ping goroutine started")
}
// 增加超时控制防止goroutine长时间运行
maxPingDuration := 120 * time.Minute // 最大ping持续时间
pingTimeout := time.NewTimer(maxPingDuration)
defer pingTimeout.Stop()
for {
select {
// 发送 ping 数据
case <-ticker.C:
if err := sendPingData(c, &pingMutex); err != nil {
if common2.DebugEnabled {
println("SSE ping error, stopping goroutine:", err.Error())
}
return
}
// 收到退出信号
@@ -140,6 +159,12 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
// request 结束
case <-c.Request.Context().Done():
return
// 超时保护防止goroutine无限运行
case <-pingTimeout.C:
if common2.DebugEnabled {
println("SSE ping goroutine timeout, stopping")
}
return
}
}
})
@@ -148,19 +173,34 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
}
func sendPingData(c *gin.Context, mutex *sync.Mutex) error {
mutex.Lock()
defer mutex.Unlock()
// 增加超时控制,防止锁死等待
done := make(chan error, 1)
go func() {
mutex.Lock()
defer mutex.Unlock()
err := helper.PingData(c)
if err != nil {
common2.LogError(c, "SSE ping error: "+err.Error())
err := helper.PingData(c)
if err != nil {
common2.LogError(c, "SSE ping error: "+err.Error())
done <- err
return
}
if common2.DebugEnabled {
println("SSE ping data sent.")
}
done <- nil
}()
// 设置发送ping数据的超时时间
select {
case err := <-done:
return err
case <-time.After(10 * time.Second):
return errors.New("SSE ping data send timeout")
case <-c.Request.Context().Done():
return errors.New("request context cancelled during ping")
}
if common2.DebugEnabled {
println("SSE ping data sent.")
}
return nil
}
func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http.Response, error) {
@@ -175,15 +215,23 @@ func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http
client = service.GetHttpClient()
}
var stopPinger context.CancelFunc
if info.IsStream {
helper.SetEventStreamHeaders(c)
// 处理流式请求的 ping 保活
generalSettings := operation_setting.GetGeneralSetting()
if generalSettings.PingIntervalEnabled {
pingInterval := time.Duration(generalSettings.PingIntervalSeconds) * time.Second
stopPinger := startPingKeepAlive(c, pingInterval)
defer stopPinger()
stopPinger = startPingKeepAlive(c, pingInterval)
// 使用defer确保在任何情况下都能停止ping goroutine
defer func() {
if stopPinger != nil {
stopPinger()
if common2.DebugEnabled {
println("SSE ping goroutine stopped by defer")
}
}
}()
}
}

View File

@@ -53,12 +53,11 @@ func requestOpenAI2Baidu(request dto.GeneralOpenAIRequest) *BaiduChatRequest {
}
func responseBaidu2OpenAI(response *BaiduChatResponse) *dto.OpenAITextResponse {
content, _ := json.Marshal(response.Result)
choice := dto.OpenAITextResponseChoice{
Index: 0,
Message: dto.Message{
Role: "assistant",
Content: content,
Content: response.Result,
},
FinishReason: "stop",
}

View File

@@ -48,9 +48,9 @@ func RequestOpenAI2ClaudeComplete(textRequest dto.GeneralOpenAIRequest) *dto.Cla
prompt := ""
for _, message := range textRequest.Messages {
if message.Role == "user" {
prompt += fmt.Sprintf("\n\nHuman: %s", message.Content)
prompt += fmt.Sprintf("\n\nHuman: %s", message.StringContent())
} else if message.Role == "assistant" {
prompt += fmt.Sprintf("\n\nAssistant: %s", message.Content)
prompt += fmt.Sprintf("\n\nAssistant: %s", message.StringContent())
} else if message.Role == "system" {
if prompt == "" {
prompt = message.StringContent()
@@ -155,15 +155,13 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*dto.Cla
}
if lastMessage.Role == message.Role && lastMessage.Role != "tool" {
if lastMessage.IsStringContent() && message.IsStringContent() {
content, _ := json.Marshal(strings.Trim(fmt.Sprintf("%s %s", lastMessage.StringContent(), message.StringContent()), "\""))
fmtMessage.Content = content
fmtMessage.SetStringContent(strings.Trim(fmt.Sprintf("%s %s", lastMessage.StringContent(), message.StringContent()), "\""))
// delete last message
formatMessages = formatMessages[:len(formatMessages)-1]
}
}
if fmtMessage.Content == nil {
content, _ := json.Marshal("...")
fmtMessage.Content = content
fmtMessage.SetStringContent("...")
}
formatMessages = append(formatMessages, fmtMessage)
lastMessage = fmtMessage
@@ -397,12 +395,11 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse) *dto
thinkingContent := ""
if reqMode == RequestModeCompletion {
content, _ := json.Marshal(strings.TrimPrefix(claudeResponse.Completion, " "))
choice := dto.OpenAITextResponseChoice{
Index: 0,
Message: dto.Message{
Role: "assistant",
Content: content,
Content: strings.TrimPrefix(claudeResponse.Completion, " "),
Name: nil,
},
FinishReason: stopReasonClaude2OpenAI(claudeResponse.StopReason),

View File

@@ -195,11 +195,10 @@ func cohereHandler(c *gin.Context, resp *http.Response, modelName string, prompt
openaiResp.Model = modelName
openaiResp.Usage = usage
content, _ := json.Marshal(cohereResp.Text)
openaiResp.Choices = []dto.OpenAITextResponseChoice{
{
Index: 0,
Message: dto.Message{Content: content, Role: "assistant"},
Message: dto.Message{Content: cohereResp.Text, Role: "assistant"},
FinishReason: stopReasonCohere2OpenAI(cohereResp.FinishReason),
},
}

View File

@@ -10,7 +10,7 @@ type CozeError struct {
type CozeEnterMessage struct {
Role string `json:"role"`
Type string `json:"type,omitempty"`
Content json.RawMessage `json:"content,omitempty"`
Content any `json:"content,omitempty"`
MetaData json.RawMessage `json:"meta_data,omitempty"`
ContentType string `json:"content_type,omitempty"`
}

View File

@@ -278,12 +278,11 @@ func difyHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInf
Created: common.GetTimestamp(),
Usage: difyResponse.MetaData.Usage,
}
content, _ := json.Marshal(difyResponse.Answer)
choice := dto.OpenAITextResponseChoice{
Index: 0,
Message: dto.Message{
Role: "assistant",
Content: content,
Content: difyResponse.Answer,
},
FinishReason: "stop",
}

View File

@@ -1,5 +1,7 @@
package gemini
import "encoding/json"
type GeminiChatRequest struct {
Contents []GeminiChatContent `json:"contents"`
SafetySettings []GeminiChatSafetySettings `json:"safetySettings,omitempty"`
@@ -22,19 +24,38 @@ type GeminiInlineData struct {
Data string `json:"data"`
}
// UnmarshalJSON custom unmarshaler for GeminiInlineData to support snake_case and camelCase for MimeType
func (g *GeminiInlineData) UnmarshalJSON(data []byte) error {
type Alias GeminiInlineData // Use type alias to avoid recursion
var aux struct {
Alias
MimeTypeSnake string `json:"mime_type"`
}
if err := json.Unmarshal(data, &aux); err != nil {
return err
}
*g = GeminiInlineData(aux.Alias) // Copy other fields if any in future
// Prioritize snake_case if present
if aux.MimeTypeSnake != "" {
g.MimeType = aux.MimeTypeSnake
} else if aux.MimeType != "" { // Fallback to camelCase from Alias
g.MimeType = aux.MimeType
}
// g.Data would be populated by aux.Alias.Data
return nil
}
type FunctionCall struct {
FunctionName string `json:"name"`
Arguments any `json:"args"`
}
type GeminiFunctionResponseContent struct {
Name string `json:"name"`
Content any `json:"content"`
}
type FunctionResponse struct {
Name string `json:"name"`
Response GeminiFunctionResponseContent `json:"response"`
Name string `json:"name"`
Response map[string]interface{} `json:"response"`
}
type GeminiPartExecutableCode struct {
@@ -63,6 +84,33 @@ type GeminiPart struct {
CodeExecutionResult *GeminiPartCodeExecutionResult `json:"codeExecutionResult,omitempty"`
}
// UnmarshalJSON custom unmarshaler for GeminiPart to support snake_case and camelCase for InlineData
func (p *GeminiPart) UnmarshalJSON(data []byte) error {
// Alias to avoid recursion during unmarshalling
type Alias GeminiPart
var aux struct {
Alias
InlineDataSnake *GeminiInlineData `json:"inline_data,omitempty"` // snake_case variant
}
if err := json.Unmarshal(data, &aux); err != nil {
return err
}
// Assign fields from alias
*p = GeminiPart(aux.Alias)
// Prioritize snake_case for InlineData if present
if aux.InlineDataSnake != nil {
p.InlineData = aux.InlineDataSnake
} else if aux.InlineData != nil { // Fallback to camelCase from Alias
p.InlineData = aux.InlineData
}
// Other fields like Text, FunctionCall etc. are already populated via aux.Alias
return nil
}
type GeminiChatContent struct {
Role string `json:"role,omitempty"`
Parts []GeminiPart `json:"parts"`
@@ -117,10 +165,16 @@ type GeminiChatResponse struct {
}
type GeminiUsageMetadata struct {
PromptTokenCount int `json:"promptTokenCount"`
CandidatesTokenCount int `json:"candidatesTokenCount"`
TotalTokenCount int `json:"totalTokenCount"`
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
PromptTokenCount int `json:"promptTokenCount"`
CandidatesTokenCount int `json:"candidatesTokenCount"`
TotalTokenCount int `json:"totalTokenCount"`
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
PromptTokensDetails []GeminiPromptTokensDetails `json:"promptTokensDetails"`
}
type GeminiPromptTokensDetails struct {
Modality string `json:"modality"`
TokenCount int `json:"tokenCount"`
}
// Imagen related structs

View File

@@ -55,6 +55,16 @@ func GeminiTextGenerationHandler(c *gin.Context, resp *http.Response, info *rela
TotalTokens: geminiResponse.UsageMetadata.TotalTokenCount,
}
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" {
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
} else if detail.Modality == "TEXT" {
usage.PromptTokensDetails.TextTokens = detail.TokenCount
}
}
// 直接返回 Gemini 原生格式的 JSON 响应
jsonResponse, err := json.Marshal(geminiResponse)
if err != nil {
@@ -100,6 +110,14 @@ func GeminiTextGenerationStreamHandler(c *gin.Context, resp *http.Response, info
usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" {
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
} else if detail.Modality == "TEXT" {
usage.PromptTokensDetails.TextTokens = detail.TokenCount
}
}
}
// 直接发送 GeminiChatResponse 响应
@@ -118,11 +136,10 @@ func GeminiTextGenerationStreamHandler(c *gin.Context, resp *http.Response, info
}
// 计算最终使用量
usage.PromptTokensDetails.TextTokens = usage.PromptTokens
usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
// 结束流式响应
helper.Done(c)
// 移除流式响应结尾的[Done]因为Gemini API没有发送Done的行为
//helper.Done(c)
return usage, nil
}

View File

@@ -57,25 +57,63 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
}
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
if strings.HasSuffix(info.OriginModelName, "-thinking") {
// 如果模型名以 gemini-2.5-pro 开头,不设置 ThinkingBudget
if strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") {
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
IncludeThoughts: true,
}
} else {
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
if budgetTokens == 0 || budgetTokens > 24576 {
budgetTokens = 24576
}
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
ThinkingBudget: common.GetPointer(int(budgetTokens)),
IncludeThoughts: true,
}
}
if strings.HasSuffix(info.OriginModelName, "-thinking") {
// 硬编码不支持 ThinkingBudget 的旧模型
unsupportedModels := []string{
"gemini-2.5-pro-preview-05-06",
"gemini-2.5-pro-preview-03-25",
}
isUnsupported := false
for _, unsupportedModel := range unsupportedModels {
if strings.HasPrefix(info.OriginModelName, unsupportedModel) {
isUnsupported = true
break
}
}
if isUnsupported {
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
IncludeThoughts: true,
}
} else {
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
// 检查是否为新的2.5pro模型支持ThinkingBudget但有特殊范围
isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
if isNew25Pro {
// 新的2.5pro模型ThinkingBudget范围为128-32768
if budgetTokens == 0 || budgetTokens < 128 {
budgetTokens = 128
} else if budgetTokens > 32768 {
budgetTokens = 32768
}
} else {
// 其他模型ThinkingBudget范围为0-24576
if budgetTokens == 0 || budgetTokens > 24576 {
budgetTokens = 24576
}
}
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
ThinkingBudget: common.GetPointer(int(budgetTokens)),
IncludeThoughts: true,
}
}
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
ThinkingBudget: common.GetPointer(0),
// 检查是否为新的2.5pro模型(不支持-nothinking因为最低值只能为128
isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
if !isNew25Pro {
// 只有非新2.5pro模型才支持-nothinking
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
ThinkingBudget: common.GetPointer(0),
}
}
}
}
@@ -137,12 +175,6 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
// common.SysLog("tools: " + fmt.Sprintf("%+v", geminiRequest.Tools))
// json_data, _ := json.Marshal(geminiRequest.Tools)
// common.SysLog("tools_json: " + string(json_data))
} else if textRequest.Functions != nil {
//geminiRequest.Tools = []GeminiChatTool{
// {
// FunctionDeclarations: textRequest.Functions,
// },
//}
}
if textRequest.ResponseFormat != nil && (textRequest.ResponseFormat.Type == "json_schema" || textRequest.ResponseFormat.Type == "json_object") {
@@ -173,17 +205,27 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
} else if val, exists := tool_call_ids[message.ToolCallId]; exists {
name = val
}
content := common.StrToMap(message.StringContent())
var contentMap map[string]interface{}
contentStr := message.StringContent()
// 1. 尝试解析为 JSON 对象
if err := json.Unmarshal([]byte(contentStr), &contentMap); err != nil {
// 2. 如果失败,尝试解析为 JSON 数组
var contentSlice []interface{}
if err := json.Unmarshal([]byte(contentStr), &contentSlice); err == nil {
// 如果是数组,包装成对象
contentMap = map[string]interface{}{"result": contentSlice}
} else {
// 3. 如果再次失败,作为纯文本处理
contentMap = map[string]interface{}{"content": contentStr}
}
}
functionResp := &FunctionResponse{
Name: name,
Response: GeminiFunctionResponseContent{
Name: name,
Content: content,
},
}
if content == nil {
functionResp.Response.Content = message.StringContent()
Name: name,
Response: contentMap,
}
*parts = append(*parts, GeminiPart{
FunctionResponse: functionResp,
})
@@ -280,13 +322,13 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
if part.GetInputAudio().Data == "" {
return nil, fmt.Errorf("only base64 audio is supported in gemini")
}
format, base64String, err := service.DecodeBase64FileData(part.GetInputAudio().Data)
base64String, err := service.DecodeBase64AudioData(part.GetInputAudio().Data)
if err != nil {
return nil, fmt.Errorf("decode base64 audio data failed: %s", err.Error())
}
parts = append(parts, GeminiPart{
InlineData: &GeminiInlineData{
MimeType: format,
MimeType: "audio/" + part.GetInputAudio().Format,
Data: base64String,
},
})
@@ -576,14 +618,13 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
Created: common.GetTimestamp(),
Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
}
content, _ := json.Marshal("")
isToolCall := false
for _, candidate := range response.Candidates {
choice := dto.OpenAITextResponseChoice{
Index: int(candidate.Index),
Message: dto.Message{
Role: "assistant",
Content: content,
Content: "",
},
FinishReason: constant.FinishReasonStop,
}
@@ -738,6 +779,13 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" {
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
} else if detail.Modality == "TEXT" {
usage.PromptTokensDetails.TextTokens = detail.TokenCount
}
}
}
err = helper.ObjectData(c, response)
if err != nil {
@@ -812,6 +860,14 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
if detail.Modality == "AUDIO" {
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
} else if detail.Modality == "TEXT" {
usage.PromptTokensDetails.TextTokens = detail.TokenCount
}
}
fullTextResponse.Usage = usage
jsonResponse, err := json.Marshal(fullTextResponse)
if err != nil {

View File

@@ -1,13 +1,55 @@
package mistral
import (
"one-api/common"
"one-api/dto"
"regexp"
)
var mistralToolCallIdRegexp = regexp.MustCompile("^[a-zA-Z0-9]{9}$")
func requestOpenAI2Mistral(request *dto.GeneralOpenAIRequest) *dto.GeneralOpenAIRequest {
messages := make([]dto.Message, 0, len(request.Messages))
idMap := make(map[string]string)
for _, message := range request.Messages {
// 1. tool_calls.id
toolCalls := message.ParseToolCalls()
if toolCalls != nil {
for i := range toolCalls {
if !mistralToolCallIdRegexp.MatchString(toolCalls[i].ID) {
if newId, ok := idMap[toolCalls[i].ID]; ok {
toolCalls[i].ID = newId
} else {
newId, err := common.GenerateRandomCharsKey(9)
if err == nil {
idMap[toolCalls[i].ID] = newId
toolCalls[i].ID = newId
}
}
}
}
message.SetToolCalls(toolCalls)
}
// 2. tool_call_id
if message.ToolCallId != "" {
if newId, ok := idMap[message.ToolCallId]; ok {
message.ToolCallId = newId
} else {
if !mistralToolCallIdRegexp.MatchString(message.ToolCallId) {
newId, err := common.GenerateRandomCharsKey(9)
if err == nil {
idMap[message.ToolCallId] = newId
message.ToolCallId = newId
}
}
}
}
mediaMessages := message.ParseContent()
if message.Role == "assistant" && message.ToolCalls != nil && message.Content == "" {
mediaMessages = []dto.MediaContent{}
}
for j, mediaMessage := range mediaMessages {
if mediaMessage.Type == dto.ContentTypeImageURL {
imageUrl := mediaMessage.GetImageMedia()

View File

@@ -45,12 +45,11 @@ func responsePaLM2OpenAI(response *PaLMChatResponse) *dto.OpenAITextResponse {
Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
}
for i, candidate := range response.Candidates {
content, _ := json.Marshal(candidate.Content)
choice := dto.OpenAITextResponseChoice{
Index: i,
Message: dto.Message{
Role: "assistant",
Content: content,
Content: candidate.Content,
},
FinishReason: "stop",
}

View File

@@ -56,12 +56,11 @@ func responseTencent2OpenAI(response *TencentChatResponse) *dto.OpenAITextRespon
},
}
if len(response.Choices) > 0 {
content, _ := json.Marshal(response.Choices[0].Messages.Content)
choice := dto.OpenAITextResponseChoice{
Index: 0,
Message: dto.Message{
Role: "assistant",
Content: content,
Content: response.Choices[0].Messages.Content,
},
FinishReason: response.Choices[0].FinishReason,
}

View File

@@ -61,12 +61,11 @@ func responseXunfei2OpenAI(response *XunfeiChatResponse) *dto.OpenAITextResponse
},
}
}
content, _ := json.Marshal(response.Payload.Choices.Text[0].Content)
choice := dto.OpenAITextResponseChoice{
Index: 0,
Message: dto.Message{
Role: "assistant",
Content: content,
Content: response.Payload.Choices.Text[0].Content,
},
FinishReason: constant.FinishReasonStop,
}

View File

@@ -108,12 +108,11 @@ func responseZhipu2OpenAI(response *ZhipuResponse) *dto.OpenAITextResponse {
Usage: response.Data.Usage,
}
for i, choice := range response.Data.Choices {
content, _ := json.Marshal(strings.Trim(choice.Content, "\""))
openaiChoice := dto.OpenAITextResponseChoice{
Index: i,
Message: dto.Message{
Role: choice.Role,
Content: content,
Content: strings.Trim(choice.Content, "\""),
},
FinishReason: "",
}

View File

@@ -3,6 +3,7 @@ package helper
import (
"bufio"
"context"
"fmt"
"io"
"net/http"
"one-api/common"
@@ -19,8 +20,8 @@ import (
)
const (
InitialScannerBufferSize = 1 << 20 // 1MB (1*1024*1024)
MaxScannerBufferSize = 10 << 20 // 10MB (10*1024*1024)
InitialScannerBufferSize = 64 << 10 // 64KB (64*1024)
MaxScannerBufferSize = 10 << 20 // 10MB (10*1024*1024)
DefaultPingInterval = 10 * time.Second
)
@@ -30,7 +31,12 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
return
}
defer resp.Body.Close()
// 确保响应体总是被关闭
defer func() {
if resp.Body != nil {
resp.Body.Close()
}
}()
streamingTimeout := time.Duration(constant.StreamingTimeout) * time.Second
if strings.HasPrefix(info.UpstreamModelName, "o") {
@@ -39,11 +45,12 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
}
var (
stopChan = make(chan bool, 2)
stopChan = make(chan bool, 3) // 增加缓冲区避免阻塞
scanner = bufio.NewScanner(resp.Body)
ticker = time.NewTicker(streamingTimeout)
pingTicker *time.Ticker
writeMutex sync.Mutex // Mutex to protect concurrent writes
wg sync.WaitGroup // 用于等待所有 goroutine 退出
)
generalSettings := operation_setting.GetGeneralSetting()
@@ -57,13 +64,32 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
pingTicker = time.NewTicker(pingInterval)
}
// 改进资源清理,确保所有 goroutine 正确退出
defer func() {
// 通知所有 goroutine 停止
common.SafeSendBool(stopChan, true)
ticker.Stop()
if pingTicker != nil {
pingTicker.Stop()
}
// 等待所有 goroutine 退出最多等待5秒
done := make(chan struct{})
go func() {
wg.Wait()
close(done)
}()
select {
case <-done:
case <-time.After(5 * time.Second):
common.LogError(c, "timeout waiting for goroutines to exit")
}
close(stopChan)
}()
scanner.Buffer(make([]byte, InitialScannerBufferSize), MaxScannerBufferSize)
scanner.Split(bufio.ScanLines)
SetEventStreamHeaders(c)
@@ -73,35 +99,95 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
ctx = context.WithValue(ctx, "stop_chan", stopChan)
// Handle ping data sending
// Handle ping data sending with improved error handling
if pingEnabled && pingTicker != nil {
wg.Add(1)
gopool.Go(func() {
defer func() {
wg.Done()
if r := recover(); r != nil {
common.LogError(c, fmt.Sprintf("ping goroutine panic: %v", r))
common.SafeSendBool(stopChan, true)
}
if common.DebugEnabled {
println("ping goroutine exited")
}
}()
// 添加超时保护,防止 goroutine 无限运行
maxPingDuration := 30 * time.Minute // 最大 ping 持续时间
pingTimeout := time.NewTimer(maxPingDuration)
defer pingTimeout.Stop()
for {
select {
case <-pingTicker.C:
writeMutex.Lock() // Lock before writing
err := PingData(c)
writeMutex.Unlock() // Unlock after writing
if err != nil {
common.LogError(c, "ping data error: "+err.Error())
common.SafeSendBool(stopChan, true)
// 使用超时机制防止写操作阻塞
done := make(chan error, 1)
go func() {
writeMutex.Lock()
defer writeMutex.Unlock()
done <- PingData(c)
}()
select {
case err := <-done:
if err != nil {
common.LogError(c, "ping data error: "+err.Error())
return
}
if common.DebugEnabled {
println("ping data sent")
}
case <-time.After(10 * time.Second):
common.LogError(c, "ping data send timeout")
return
case <-ctx.Done():
return
case <-stopChan:
return
}
if common.DebugEnabled {
println("ping data sent")
}
case <-ctx.Done():
if common.DebugEnabled {
println("ping data goroutine stopped")
}
return
case <-stopChan:
return
case <-c.Request.Context().Done():
// 监听客户端断开连接
return
case <-pingTimeout.C:
common.LogError(c, "ping goroutine max duration reached")
return
}
}
})
}
// Scanner goroutine with improved error handling
wg.Add(1)
common.RelayCtxGo(ctx, func() {
defer func() {
wg.Done()
if r := recover(); r != nil {
common.LogError(c, fmt.Sprintf("scanner goroutine panic: %v", r))
}
common.SafeSendBool(stopChan, true)
if common.DebugEnabled {
println("scanner goroutine exited")
}
}()
for scanner.Scan() {
// 检查是否需要停止
select {
case <-stopChan:
return
case <-ctx.Done():
return
case <-c.Request.Context().Done():
return
default:
}
ticker.Reset(streamingTimeout)
data := scanner.Text()
if common.DebugEnabled {
@@ -119,11 +205,27 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
data = strings.TrimSuffix(data, "\r")
if !strings.HasPrefix(data, "[DONE]") {
info.SetFirstResponseTime()
writeMutex.Lock() // Lock before writing
success := dataHandler(data)
writeMutex.Unlock() // Unlock after writing
if !success {
break
// 使用超时机制防止写操作阻塞
done := make(chan bool, 1)
go func() {
writeMutex.Lock()
defer writeMutex.Unlock()
done <- dataHandler(data)
}()
select {
case success := <-done:
if !success {
return
}
case <-time.After(10 * time.Second):
common.LogError(c, "data handler timeout")
return
case <-ctx.Done():
return
case <-stopChan:
return
}
}
}
@@ -133,17 +235,18 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
common.LogError(c, "scanner error: "+err.Error())
}
}
common.SafeSendBool(stopChan, true)
})
// 主循环等待完成或超时
select {
case <-ticker.C:
// 超时处理逻辑
common.LogError(c, "streaming timeout")
common.SafeSendBool(stopChan, true)
case <-stopChan:
// 正常结束
common.LogInfo(c, "streaming finished")
case <-c.Request.Context().Done():
// 客户端断开连接
common.LogInfo(c, "client disconnected")
}
}

View File

@@ -352,6 +352,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
promptTokens := usage.PromptTokens
cacheTokens := usage.PromptTokensDetails.CachedTokens
imageTokens := usage.PromptTokensDetails.ImageTokens
audioTokens := usage.PromptTokensDetails.AudioTokens
completionTokens := usage.CompletionTokens
modelName := relayInfo.OriginModelName
@@ -367,6 +368,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
dPromptTokens := decimal.NewFromInt(int64(promptTokens))
dCacheTokens := decimal.NewFromInt(int64(cacheTokens))
dImageTokens := decimal.NewFromInt(int64(imageTokens))
dAudioTokens := decimal.NewFromInt(int64(audioTokens))
dCompletionTokens := decimal.NewFromInt(int64(completionTokens))
dCompletionRatio := decimal.NewFromFloat(completionRatio)
dCacheRatio := decimal.NewFromFloat(cacheRatio)
@@ -412,23 +414,43 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
dFileSearchQuota = decimal.NewFromFloat(fileSearchPrice).
Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
extraContent += fmt.Sprintf("File Search 调用 %d 次,调用花费 $%s",
extraContent += fmt.Sprintf("File Search 调用 %d 次,调用花费 %s",
fileSearchTool.CallCount, dFileSearchQuota.String())
}
}
var quotaCalculateDecimal decimal.Decimal
if !priceData.UsePrice {
nonCachedTokens := dPromptTokens.Sub(dCacheTokens)
cachedTokensWithRatio := dCacheTokens.Mul(dCacheRatio)
promptQuota := nonCachedTokens.Add(cachedTokensWithRatio)
if imageTokens > 0 {
nonImageTokens := dPromptTokens.Sub(dImageTokens)
imageTokensWithRatio := dImageTokens.Mul(dImageRatio)
promptQuota = nonImageTokens.Add(imageTokensWithRatio)
var audioInputQuota decimal.Decimal
var audioInputPrice float64
if !priceData.UsePrice {
baseTokens := dPromptTokens
// 减去 cached tokens
var cachedTokensWithRatio decimal.Decimal
if !dCacheTokens.IsZero() {
baseTokens = baseTokens.Sub(dCacheTokens)
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
}
// 减去 image tokens
var imageTokensWithRatio decimal.Decimal
if !dImageTokens.IsZero() {
baseTokens = baseTokens.Sub(dImageTokens)
imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
}
// 减去 Gemini audio tokens
if !dAudioTokens.IsZero() {
audioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(modelName)
if audioInputPrice > 0 {
// 重新计算 base tokens
baseTokens = baseTokens.Sub(dAudioTokens)
audioInputQuota = decimal.NewFromFloat(audioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
extraContent += fmt.Sprintf("Audio Input 花费 %s", audioInputQuota.String())
}
}
promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio)
completionQuota := dCompletionTokens.Mul(dCompletionRatio)
quotaCalculateDecimal = promptQuota.Add(completionQuota).Mul(ratio)
@@ -442,6 +464,8 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
// 添加 responses tools call 调用的配额
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
// 添加 audio input 独立计费
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
quota := int(quotaCalculateDecimal.Round(0).IntPart())
totalTokens := promptTokens + completionTokens
@@ -512,6 +536,11 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
other["file_search_price"] = fileSearchPrice
}
}
if !audioInputQuota.IsZero() {
other["audio_input_seperate_price"] = true
other["audio_input_token_count"] = audioTokens
other["audio_input_price"] = audioInputPrice
}
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel,
tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
}