Merge branch 'main' into feat-04

2025-06-11 13:55:47 +08:00
parent e77555a04f 3d6859b865
commit 52763c09f2
177 changed files with 25105 additions and 11236 deletions
--- a/relay/channel/ali/adaptor.go
+++ b/relay/channel/ali/adaptor.go
@@ -31,6 +31,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 	switch info.RelayMode {
 	case constant.RelayModeEmbeddings:
 		fullRequestURL = fmt.Sprintf("%s/api/v1/services/embeddings/text-embedding/text-embedding", info.BaseUrl)
+	case constant.RelayModeRerank:
+		fullRequestURL = fmt.Sprintf("%s/api/v1/services/rerank/text-rerank/text-rerank", info.BaseUrl)
 	case constant.RelayModeImagesGenerations:
 		fullRequestURL = fmt.Sprintf("%s/api/v1/services/aigc/text2image/image-synthesis", info.BaseUrl)
 	case constant.RelayModeCompletions:
@@ -76,7 +78,7 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
 }

 func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
-	return nil, errors.New("not implemented")
+	return ConvertRerankRequest(request), nil
 }

 func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
@@ -103,6 +105,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 		err, usage = aliImageHandler(c, resp, info)
 	case constant.RelayModeEmbeddings:
 		err, usage = aliEmbeddingHandler(c, resp)
+	case constant.RelayModeRerank:
+		err, usage = RerankHandler(c, resp, info)
 	default:
 		if info.IsStream {
 			err, usage = openai.OaiStreamHandler(c, resp, info)
--- a/relay/channel/ali/constants.go
+++ b/relay/channel/ali/constants.go
@@ -8,6 +8,7 @@ var ModelList = []string{
 	"qwq-32b",
 	"qwen3-235b-a22b",
 	"text-embedding-v1",
+	"gte-rerank-v2",
 }

 var ChannelName = "ali"
--- a/relay/channel/ali/dto.go
+++ b/relay/channel/ali/dto.go
@@ -1,5 +1,7 @@
 package ali

+import "one-api/dto"
+
 type AliMessage struct {
 	Content string `json:"content"`
 	Role    string `json:"role"`
@@ -97,3 +99,28 @@ type AliImageRequest struct {
 	} `json:"parameters,omitempty"`
 	ResponseFormat string `json:"response_format,omitempty"`
 }
+
+type AliRerankParameters struct {
+	TopN            *int  `json:"top_n,omitempty"`
+	ReturnDocuments *bool `json:"return_documents,omitempty"`
+}
+
+type AliRerankInput struct {
+	Query     string `json:"query"`
+	Documents []any  `json:"documents"`
+}
+
+type AliRerankRequest struct {
+	Model      string              `json:"model"`
+	Input      AliRerankInput      `json:"input"`
+	Parameters AliRerankParameters `json:"parameters,omitempty"`
+}
+
+type AliRerankResponse struct {
+	Output struct {
+		Results []dto.RerankResponseResult `json:"results"`
+	} `json:"output"`
+	Usage     AliUsage `json:"usage"`
+	RequestId string   `json:"request_id"`
+	AliError
+}
--- a/relay/channel/ali/rerank.go
+++ b/relay/channel/ali/rerank.go
@@ -0,0 +1,83 @@
+package ali
+
+import (
+	"encoding/json"
+	"io"
+	"net/http"
+	"one-api/dto"
+	relaycommon "one-api/relay/common"
+	"one-api/service"
+
+	"github.com/gin-gonic/gin"
+)
+
+func ConvertRerankRequest(request dto.RerankRequest) *AliRerankRequest {
+	returnDocuments := request.ReturnDocuments
+	if returnDocuments == nil {
+		t := true
+		returnDocuments = &t
+	}
+	return &AliRerankRequest{
+		Model: request.Model,
+		Input: AliRerankInput{
+			Query:     request.Query,
+			Documents: request.Documents,
+		},
+		Parameters: AliRerankParameters{
+			TopN:            &request.TopN,
+			ReturnDocuments: returnDocuments,
+		},
+	}
+}
+
+func RerankHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
+	}
+	err = resp.Body.Close()
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+	}
+
+	var aliResponse AliRerankResponse
+	err = json.Unmarshal(responseBody, &aliResponse)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
+	}
+
+	if aliResponse.Code != "" {
+		return &dto.OpenAIErrorWithStatusCode{
+			Error: dto.OpenAIError{
+				Message: aliResponse.Message,
+				Type:    aliResponse.Code,
+				Param:   aliResponse.RequestId,
+				Code:    aliResponse.Code,
+			},
+			StatusCode: resp.StatusCode,
+		}, nil
+	}
+
+	usage := dto.Usage{
+		PromptTokens:     aliResponse.Usage.TotalTokens,
+		CompletionTokens: 0,
+		TotalTokens:      aliResponse.Usage.TotalTokens,
+	}
+	rerankResponse := dto.RerankResponse{
+		Results: aliResponse.Output.Results,
+		Usage:   usage,
+	}
+
+	jsonResponse, err := json.Marshal(rerankResponse)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
+	}
+	c.Writer.Header().Set("Content-Type", "application/json")
+	c.Writer.WriteHeader(resp.StatusCode)
+	_, err = c.Writer.Write(jsonResponse)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "write_response_body_failed", http.StatusInternalServerError), nil
+	}
+
+	return nil, &usage
+}
--- a/relay/channel/ali/text.go
+++ b/relay/channel/ali/text.go
@@ -3,7 +3,6 @@ package ali
 import (
 	"bufio"
 	"encoding/json"
-	"github.com/gin-gonic/gin"
 	"io"
 	"net/http"
 	"one-api/common"
@@ -11,6 +10,8 @@ import (
 	"one-api/relay/helper"
 	"one-api/service"
 	"strings"
+
+	"github.com/gin-gonic/gin"
 )

 // https://help.aliyun.com/document_detail/613695.html?spm=a2c4g.2399480.0.0.1adb778fAdzP9w#341800c0f8w0r
@@ -27,9 +28,6 @@ func requestOpenAI2Ali(request dto.GeneralOpenAIRequest) *dto.GeneralOpenAIReque
 }

 func embeddingRequestOpenAI2Ali(request dto.EmbeddingRequest) *AliEmbeddingRequest {
-	if request.Model == "" {
-		request.Model = "text-embedding-v1"
-	}
 	return &AliEmbeddingRequest{
 		Model: request.Model,
 		Input: struct {
@@ -64,7 +62,11 @@ func aliEmbeddingHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorW
 		}, nil
 	}

-	fullTextResponse := embeddingResponseAli2OpenAI(&aliResponse)
+	model := c.GetString("model")
+	if model == "" {
+		model = "text-embedding-v4"
+	}
+	fullTextResponse := embeddingResponseAli2OpenAI(&aliResponse, model)
 	jsonResponse, err := json.Marshal(fullTextResponse)
 	if err != nil {
 		return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
@@ -75,11 +77,11 @@ func aliEmbeddingHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorW
 	return nil, &fullTextResponse.Usage
 }

-func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse) *dto.OpenAIEmbeddingResponse {
+func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse, model string) *dto.OpenAIEmbeddingResponse {
 	openAIEmbeddingResponse := dto.OpenAIEmbeddingResponse{
 		Object: "list",
 		Data:   make([]dto.OpenAIEmbeddingResponseItem, 0, len(response.Output.Embeddings)),
-		Model:  "text-embedding-v1",
+		Model:  model,
 		Usage:  dto.Usage{TotalTokens: response.Usage.TotalTokens},
 	}

@@ -94,12 +96,11 @@ func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse) *dto.OpenAIEmbe
 }

 func responseAli2OpenAI(response *AliResponse) *dto.OpenAITextResponse {
-	content, _ := json.Marshal(response.Output.Text)
 	choice := dto.OpenAITextResponseChoice{
 		Index: 0,
 		Message: dto.Message{
 			Role:    "assistant",
-			Content: content,
+			Content: response.Output.Text,
 		},
 		FinishReason: response.Output.FinishReason,
 	}
--- a/relay/channel/api_request.go
+++ b/relay/channel/api_request.go
@@ -109,6 +109,12 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc

 	gopool.Go(func() {
 		defer func() {
+			// 增加panic恢复处理
+			if r := recover(); r != nil {
+				if common2.DebugEnabled {
+					println("SSE ping goroutine panic recovered:", fmt.Sprintf("%v", r))
+				}
+			}
 			if common2.DebugEnabled {
 				println("SSE ping goroutine stopped.")
 			}
@@ -119,19 +125,32 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
 		}

 		ticker := time.NewTicker(pingInterval)
-		// 退出时清理 ticker
-		defer ticker.Stop()
+		// 确保在任何情况下都清理ticker
+		defer func() {
+			ticker.Stop()
+			if common2.DebugEnabled {
+				println("SSE ping ticker stopped")
+			}
+		}()

 		var pingMutex sync.Mutex
 		if common2.DebugEnabled {
 			println("SSE ping goroutine started")
 		}

+		// 增加超时控制，防止goroutine长时间运行
+		maxPingDuration := 120 * time.Minute // 最大ping持续时间
+		pingTimeout := time.NewTimer(maxPingDuration)
+		defer pingTimeout.Stop()
+
 		for {
 			select {
 			// 发送 ping 数据
 			case <-ticker.C:
 				if err := sendPingData(c, &pingMutex); err != nil {
+					if common2.DebugEnabled {
+						println("SSE ping error, stopping goroutine:", err.Error())
+					}
 					return
 				}
 			// 收到退出信号
@@ -140,6 +159,12 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
 			// request 结束
 			case <-c.Request.Context().Done():
 				return
+			// 超时保护，防止goroutine无限运行
+			case <-pingTimeout.C:
+				if common2.DebugEnabled {
+					println("SSE ping goroutine timeout, stopping")
+				}
+				return
 			}
 		}
 	})
@@ -148,19 +173,34 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
 }

 func sendPingData(c *gin.Context, mutex *sync.Mutex) error {
-	mutex.Lock()
-	defer mutex.Unlock()
+	// 增加超时控制，防止锁死等待
+	done := make(chan error, 1)
+	go func() {
+		mutex.Lock()
+		defer mutex.Unlock()

-	err := helper.PingData(c)
-	if err != nil {
-		common2.LogError(c, "SSE ping error: "+err.Error())
+		err := helper.PingData(c)
+		if err != nil {
+			common2.LogError(c, "SSE ping error: "+err.Error())
+			done <- err
+			return
+		}
+
+		if common2.DebugEnabled {
+			println("SSE ping data sent.")
+		}
+		done <- nil
+	}()
+
+	// 设置发送ping数据的超时时间
+	select {
+	case err := <-done:
 		return err
+	case <-time.After(10 * time.Second):
+		return errors.New("SSE ping data send timeout")
+	case <-c.Request.Context().Done():
+		return errors.New("request context cancelled during ping")
 	}
-
-	if common2.DebugEnabled {
-		println("SSE ping data sent.")
-	}
-	return nil
 }

 func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http.Response, error) {
@@ -175,15 +215,23 @@ func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http
 		client = service.GetHttpClient()
 	}

+	var stopPinger context.CancelFunc
 	if info.IsStream {
 		helper.SetEventStreamHeaders(c)
-
 		// 处理流式请求的 ping 保活
 		generalSettings := operation_setting.GetGeneralSetting()
 		if generalSettings.PingIntervalEnabled {
 			pingInterval := time.Duration(generalSettings.PingIntervalSeconds) * time.Second
-			stopPinger := startPingKeepAlive(c, pingInterval)
-			defer stopPinger()
+			stopPinger = startPingKeepAlive(c, pingInterval)
+			// 使用defer确保在任何情况下都能停止ping goroutine
+			defer func() {
+				if stopPinger != nil {
+					stopPinger()
+					if common2.DebugEnabled {
+						println("SSE ping goroutine stopped by defer")
+					}
+				}
+			}()
 		}
 	}

--- a/relay/channel/baidu/relay-baidu.go
+++ b/relay/channel/baidu/relay-baidu.go
@@ -53,12 +53,11 @@ func requestOpenAI2Baidu(request dto.GeneralOpenAIRequest) *BaiduChatRequest {
 }

 func responseBaidu2OpenAI(response *BaiduChatResponse) *dto.OpenAITextResponse {
-	content, _ := json.Marshal(response.Result)
 	choice := dto.OpenAITextResponseChoice{
 		Index: 0,
 		Message: dto.Message{
 			Role:    "assistant",
-			Content: content,
+			Content: response.Result,
 		},
 		FinishReason: "stop",
 	}
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -48,9 +48,9 @@ func RequestOpenAI2ClaudeComplete(textRequest dto.GeneralOpenAIRequest) *dto.Cla
 	prompt := ""
 	for _, message := range textRequest.Messages {
 		if message.Role == "user" {
-			prompt += fmt.Sprintf("\n\nHuman: %s", message.Content)
+			prompt += fmt.Sprintf("\n\nHuman: %s", message.StringContent())
 		} else if message.Role == "assistant" {
-			prompt += fmt.Sprintf("\n\nAssistant: %s", message.Content)
+			prompt += fmt.Sprintf("\n\nAssistant: %s", message.StringContent())
 		} else if message.Role == "system" {
 			if prompt == "" {
 				prompt = message.StringContent()
@@ -155,15 +155,13 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*dto.Cla
 		}
 		if lastMessage.Role == message.Role && lastMessage.Role != "tool" {
 			if lastMessage.IsStringContent() && message.IsStringContent() {
-				content, _ := json.Marshal(strings.Trim(fmt.Sprintf("%s %s", lastMessage.StringContent(), message.StringContent()), "\""))
-				fmtMessage.Content = content
+				fmtMessage.SetStringContent(strings.Trim(fmt.Sprintf("%s %s", lastMessage.StringContent(), message.StringContent()), "\""))
 				// delete last message
 				formatMessages = formatMessages[:len(formatMessages)-1]
 			}
 		}
 		if fmtMessage.Content == nil {
-			content, _ := json.Marshal("...")
-			fmtMessage.Content = content
+			fmtMessage.SetStringContent("...")
 		}
 		formatMessages = append(formatMessages, fmtMessage)
 		lastMessage = fmtMessage
@@ -397,12 +395,11 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse) *dto
 	thinkingContent := ""

 	if reqMode == RequestModeCompletion {
-		content, _ := json.Marshal(strings.TrimPrefix(claudeResponse.Completion, " "))
 		choice := dto.OpenAITextResponseChoice{
 			Index: 0,
 			Message: dto.Message{
 				Role:    "assistant",
-				Content: content,
+				Content: strings.TrimPrefix(claudeResponse.Completion, " "),
 				Name:    nil,
 			},
 			FinishReason: stopReasonClaude2OpenAI(claudeResponse.StopReason),
--- a/relay/channel/cohere/relay-cohere.go
+++ b/relay/channel/cohere/relay-cohere.go
@@ -195,11 +195,10 @@ func cohereHandler(c *gin.Context, resp *http.Response, modelName string, prompt
 	openaiResp.Model = modelName
 	openaiResp.Usage = usage

-	content, _ := json.Marshal(cohereResp.Text)
 	openaiResp.Choices = []dto.OpenAITextResponseChoice{
 		{
 			Index:        0,
-			Message:      dto.Message{Content: content, Role: "assistant"},
+			Message:      dto.Message{Content: cohereResp.Text, Role: "assistant"},
 			FinishReason: stopReasonCohere2OpenAI(cohereResp.FinishReason),
 		},
 	}
--- a/relay/channel/coze/dto.go
+++ b/relay/channel/coze/dto.go
@@ -10,7 +10,7 @@ type CozeError struct {
 type CozeEnterMessage struct {
 	Role        string          `json:"role"`
 	Type        string          `json:"type,omitempty"`
-	Content     json.RawMessage `json:"content,omitempty"`
+	Content     any             `json:"content,omitempty"`
 	MetaData    json.RawMessage `json:"meta_data,omitempty"`
 	ContentType string          `json:"content_type,omitempty"`
 }
--- a/relay/channel/dify/relay-dify.go
+++ b/relay/channel/dify/relay-dify.go
@@ -278,12 +278,11 @@ func difyHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInf
 		Created: common.GetTimestamp(),
 		Usage:   difyResponse.MetaData.Usage,
 	}
-	content, _ := json.Marshal(difyResponse.Answer)
 	choice := dto.OpenAITextResponseChoice{
 		Index: 0,
 		Message: dto.Message{
 			Role:    "assistant",
-			Content: content,
+			Content: difyResponse.Answer,
 		},
 		FinishReason: "stop",
 	}
--- a/relay/channel/gemini/dto.go
+++ b/relay/channel/gemini/dto.go
@@ -1,5 +1,7 @@
 package gemini

+import "encoding/json"
+
 type GeminiChatRequest struct {
 	Contents           []GeminiChatContent        `json:"contents"`
 	SafetySettings     []GeminiChatSafetySettings `json:"safetySettings,omitempty"`
@@ -22,19 +24,38 @@ type GeminiInlineData struct {
 	Data     string `json:"data"`
 }

+// UnmarshalJSON custom unmarshaler for GeminiInlineData to support snake_case and camelCase for MimeType
+func (g *GeminiInlineData) UnmarshalJSON(data []byte) error {
+	type Alias GeminiInlineData // Use type alias to avoid recursion
+	var aux struct {
+		Alias
+		MimeTypeSnake string `json:"mime_type"`
+	}
+
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	*g = GeminiInlineData(aux.Alias) // Copy other fields if any in future
+
+	// Prioritize snake_case if present
+	if aux.MimeTypeSnake != "" {
+		g.MimeType = aux.MimeTypeSnake
+	} else if aux.MimeType != "" { // Fallback to camelCase from Alias
+		g.MimeType = aux.MimeType
+	}
+	// g.Data would be populated by aux.Alias.Data
+	return nil
+}
+
 type FunctionCall struct {
 	FunctionName string `json:"name"`
 	Arguments    any    `json:"args"`
 }

-type GeminiFunctionResponseContent struct {
-	Name    string `json:"name"`
-	Content any    `json:"content"`
-}
-
 type FunctionResponse struct {
-	Name     string                        `json:"name"`
-	Response GeminiFunctionResponseContent `json:"response"`
+	Name     string                 `json:"name"`
+	Response map[string]interface{} `json:"response"`
 }

 type GeminiPartExecutableCode struct {
@@ -63,6 +84,33 @@ type GeminiPart struct {
 	CodeExecutionResult *GeminiPartCodeExecutionResult `json:"codeExecutionResult,omitempty"`
 }

+// UnmarshalJSON custom unmarshaler for GeminiPart to support snake_case and camelCase for InlineData
+func (p *GeminiPart) UnmarshalJSON(data []byte) error {
+	// Alias to avoid recursion during unmarshalling
+	type Alias GeminiPart
+	var aux struct {
+		Alias
+		InlineDataSnake *GeminiInlineData `json:"inline_data,omitempty"` // snake_case variant
+	}
+
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	// Assign fields from alias
+	*p = GeminiPart(aux.Alias)
+
+	// Prioritize snake_case for InlineData if present
+	if aux.InlineDataSnake != nil {
+		p.InlineData = aux.InlineDataSnake
+	} else if aux.InlineData != nil { // Fallback to camelCase from Alias
+		p.InlineData = aux.InlineData
+	}
+	// Other fields like Text, FunctionCall etc. are already populated via aux.Alias
+
+	return nil
+}
+
 type GeminiChatContent struct {
 	Role  string       `json:"role,omitempty"`
 	Parts []GeminiPart `json:"parts"`
@@ -117,10 +165,16 @@ type GeminiChatResponse struct {
 }

 type GeminiUsageMetadata struct {
-	PromptTokenCount     int `json:"promptTokenCount"`
-	CandidatesTokenCount int `json:"candidatesTokenCount"`
-	TotalTokenCount      int `json:"totalTokenCount"`
-	ThoughtsTokenCount   int `json:"thoughtsTokenCount"`
+	PromptTokenCount     int                         `json:"promptTokenCount"`
+	CandidatesTokenCount int                         `json:"candidatesTokenCount"`
+	TotalTokenCount      int                         `json:"totalTokenCount"`
+	ThoughtsTokenCount   int                         `json:"thoughtsTokenCount"`
+	PromptTokensDetails  []GeminiPromptTokensDetails `json:"promptTokensDetails"`
+}
+
+type GeminiPromptTokensDetails struct {
+	Modality   string `json:"modality"`
+	TokenCount int    `json:"tokenCount"`
 }

 // Imagen related structs
--- a/relay/channel/gemini/relay-gemini-native.go
+++ b/relay/channel/gemini/relay-gemini-native.go
@@ -55,6 +55,16 @@ func GeminiTextGenerationHandler(c *gin.Context, resp *http.Response, info *rela
 		TotalTokens:      geminiResponse.UsageMetadata.TotalTokenCount,
 	}

+	usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+
+	for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
+		if detail.Modality == "AUDIO" {
+			usage.PromptTokensDetails.AudioTokens = detail.TokenCount
+		} else if detail.Modality == "TEXT" {
+			usage.PromptTokensDetails.TextTokens = detail.TokenCount
+		}
+	}
+
 	// 直接返回 Gemini 原生格式的 JSON 响应
 	jsonResponse, err := json.Marshal(geminiResponse)
 	if err != nil {
@@ -100,6 +110,14 @@ func GeminiTextGenerationStreamHandler(c *gin.Context, resp *http.Response, info
 			usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
 			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
 			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
+			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+			for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
+				if detail.Modality == "AUDIO" {
+					usage.PromptTokensDetails.AudioTokens = detail.TokenCount
+				} else if detail.Modality == "TEXT" {
+					usage.PromptTokensDetails.TextTokens = detail.TokenCount
+				}
+			}
 		}

 		// 直接发送 GeminiChatResponse 响应
@@ -118,11 +136,10 @@ func GeminiTextGenerationStreamHandler(c *gin.Context, resp *http.Response, info
 	}

 	// 计算最终使用量
-	usage.PromptTokensDetails.TextTokens = usage.PromptTokens
 	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens

-	// 结束流式响应
-	helper.Done(c)
+	// 移除流式响应结尾的[Done]，因为Gemini API没有发送Done的行为
+	//helper.Done(c)

 	return usage, nil
 }
--- a/relay/channel/gemini/relay-gemini.go
+++ b/relay/channel/gemini/relay-gemini.go
@@ -57,25 +57,63 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
 	}

 	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
-	        if strings.HasSuffix(info.OriginModelName, "-thinking") {
-	            // 如果模型名以 gemini-2.5-pro 开头，不设置 ThinkingBudget
-	            if strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") {
-	                geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
-	                    IncludeThoughts: true,
-	                }
-	            } else {
-	                budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
-	                if budgetTokens == 0 || budgetTokens > 24576 {
-	                    budgetTokens = 24576
-	                }
-	                geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
-	                    ThinkingBudget:  common.GetPointer(int(budgetTokens)),
-	                    IncludeThoughts: true,
-	                }
-	            }
+		if strings.HasSuffix(info.OriginModelName, "-thinking") {
+			// 硬编码不支持 ThinkingBudget 的旧模型
+			unsupportedModels := []string{
+				"gemini-2.5-pro-preview-05-06",
+				"gemini-2.5-pro-preview-03-25",
+			}
+
+			isUnsupported := false
+			for _, unsupportedModel := range unsupportedModels {
+				if strings.HasPrefix(info.OriginModelName, unsupportedModel) {
+					isUnsupported = true
+					break
+				}
+			}
+
+			if isUnsupported {
+				geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+					IncludeThoughts: true,
+				}
+			} else {
+				budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
+
+				// 检查是否为新的2.5pro模型（支持ThinkingBudget但有特殊范围）
+				isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
+					!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
+					!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
+
+				if isNew25Pro {
+					// 新的2.5pro模型：ThinkingBudget范围为128-32768
+					if budgetTokens == 0 || budgetTokens < 128 {
+						budgetTokens = 128
+					} else if budgetTokens > 32768 {
+						budgetTokens = 32768
+					}
+				} else {
+					// 其他模型：ThinkingBudget范围为0-24576
+					if budgetTokens == 0 || budgetTokens > 24576 {
+						budgetTokens = 24576
+					}
+				}
+
+				geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+					ThinkingBudget:  common.GetPointer(int(budgetTokens)),
+					IncludeThoughts: true,
+				}
+			}
 		} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
-			geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
-				ThinkingBudget: common.GetPointer(0),
+			// 检查是否为新的2.5pro模型（不支持-nothinking，因为最低值只能为128）
+			isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
+				!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
+				!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
+
+			if !isNew25Pro {
+				// 只有非新2.5pro模型才支持-nothinking
+				geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
+					ThinkingBudget: common.GetPointer(0),
+				}
 			}
 		}
 	}
@@ -137,12 +175,6 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
 		// common.SysLog("tools: " + fmt.Sprintf("%+v", geminiRequest.Tools))
 		// json_data, _ := json.Marshal(geminiRequest.Tools)
 		// common.SysLog("tools_json: " + string(json_data))
-	} else if textRequest.Functions != nil {
-		//geminiRequest.Tools = []GeminiChatTool{
-		//	{
-		//		FunctionDeclarations: textRequest.Functions,
-		//	},
-		//}
 	}

 	if textRequest.ResponseFormat != nil && (textRequest.ResponseFormat.Type == "json_schema" || textRequest.ResponseFormat.Type == "json_object") {
@@ -173,17 +205,27 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
 			} else if val, exists := tool_call_ids[message.ToolCallId]; exists {
 				name = val
 			}
-			content := common.StrToMap(message.StringContent())
+			var contentMap map[string]interface{}
+			contentStr := message.StringContent()
+
+			// 1. 尝试解析为 JSON 对象
+			if err := json.Unmarshal([]byte(contentStr), &contentMap); err != nil {
+				// 2. 如果失败，尝试解析为 JSON 数组
+				var contentSlice []interface{}
+				if err := json.Unmarshal([]byte(contentStr), &contentSlice); err == nil {
+					// 如果是数组，包装成对象
+					contentMap = map[string]interface{}{"result": contentSlice}
+				} else {
+					// 3. 如果再次失败，作为纯文本处理
+					contentMap = map[string]interface{}{"content": contentStr}
+				}
+			}
+
 			functionResp := &FunctionResponse{
-				Name: name,
-				Response: GeminiFunctionResponseContent{
-					Name:    name,
-					Content: content,
-				},
-			}
-			if content == nil {
-				functionResp.Response.Content = message.StringContent()
+				Name:     name,
+				Response: contentMap,
 			}
+
 			*parts = append(*parts, GeminiPart{
 				FunctionResponse: functionResp,
 			})
@@ -280,13 +322,13 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
 				if part.GetInputAudio().Data == "" {
 					return nil, fmt.Errorf("only base64 audio is supported in gemini")
 				}
-				format, base64String, err := service.DecodeBase64FileData(part.GetInputAudio().Data)
+				base64String, err := service.DecodeBase64AudioData(part.GetInputAudio().Data)
 				if err != nil {
 					return nil, fmt.Errorf("decode base64 audio data failed: %s", err.Error())
 				}
 				parts = append(parts, GeminiPart{
 					InlineData: &GeminiInlineData{
-						MimeType: format,
+						MimeType: "audio/" + part.GetInputAudio().Format,
 						Data:     base64String,
 					},
 				})
@@ -576,14 +618,13 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
 		Created: common.GetTimestamp(),
 		Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
 	}
-	content, _ := json.Marshal("")
 	isToolCall := false
 	for _, candidate := range response.Candidates {
 		choice := dto.OpenAITextResponseChoice{
 			Index: int(candidate.Index),
 			Message: dto.Message{
 				Role:    "assistant",
-				Content: content,
+				Content: "",
 			},
 			FinishReason: constant.FinishReasonStop,
 		}
@@ -738,6 +779,13 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom
 			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
 			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
 			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
+			for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
+				if detail.Modality == "AUDIO" {
+					usage.PromptTokensDetails.AudioTokens = detail.TokenCount
+				} else if detail.Modality == "TEXT" {
+					usage.PromptTokensDetails.TextTokens = detail.TokenCount
+				}
+			}
 		}
 		err = helper.ObjectData(c, response)
 		if err != nil {
@@ -812,6 +860,14 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
 	usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
 	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens

+	for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
+		if detail.Modality == "AUDIO" {
+			usage.PromptTokensDetails.AudioTokens = detail.TokenCount
+		} else if detail.Modality == "TEXT" {
+			usage.PromptTokensDetails.TextTokens = detail.TokenCount
+		}
+	}
+
 	fullTextResponse.Usage = usage
 	jsonResponse, err := json.Marshal(fullTextResponse)
 	if err != nil {
--- a/relay/channel/mistral/text.go
+++ b/relay/channel/mistral/text.go
@@ -1,13 +1,55 @@
 package mistral

 import (
+	"one-api/common"
 	"one-api/dto"
+	"regexp"
 )

+var mistralToolCallIdRegexp = regexp.MustCompile("^[a-zA-Z0-9]{9}$")
+
 func requestOpenAI2Mistral(request *dto.GeneralOpenAIRequest) *dto.GeneralOpenAIRequest {
 	messages := make([]dto.Message, 0, len(request.Messages))
+	idMap := make(map[string]string)
 	for _, message := range request.Messages {
+		// 1. tool_calls.id
+		toolCalls := message.ParseToolCalls()
+		if toolCalls != nil {
+			for i := range toolCalls {
+				if !mistralToolCallIdRegexp.MatchString(toolCalls[i].ID) {
+					if newId, ok := idMap[toolCalls[i].ID]; ok {
+						toolCalls[i].ID = newId
+					} else {
+						newId, err := common.GenerateRandomCharsKey(9)
+						if err == nil {
+							idMap[toolCalls[i].ID] = newId
+							toolCalls[i].ID = newId
+						}
+					}
+				}
+			}
+			message.SetToolCalls(toolCalls)
+		}
+
+		// 2. tool_call_id
+		if message.ToolCallId != "" {
+			if newId, ok := idMap[message.ToolCallId]; ok {
+				message.ToolCallId = newId
+			} else {
+				if !mistralToolCallIdRegexp.MatchString(message.ToolCallId) {
+					newId, err := common.GenerateRandomCharsKey(9)
+					if err == nil {
+						idMap[message.ToolCallId] = newId
+						message.ToolCallId = newId
+					}
+				}
+			}
+		}
+
 		mediaMessages := message.ParseContent()
+		if message.Role == "assistant" && message.ToolCalls != nil && message.Content == "" {
+			mediaMessages = []dto.MediaContent{}
+		}
 		for j, mediaMessage := range mediaMessages {
 			if mediaMessage.Type == dto.ContentTypeImageURL {
 				imageUrl := mediaMessage.GetImageMedia()
--- a/relay/channel/palm/relay-palm.go
+++ b/relay/channel/palm/relay-palm.go
@@ -45,12 +45,11 @@ func responsePaLM2OpenAI(response *PaLMChatResponse) *dto.OpenAITextResponse {
 		Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
 	}
 	for i, candidate := range response.Candidates {
-		content, _ := json.Marshal(candidate.Content)
 		choice := dto.OpenAITextResponseChoice{
 			Index: i,
 			Message: dto.Message{
 				Role:    "assistant",
-				Content: content,
+				Content: candidate.Content,
 			},
 			FinishReason: "stop",
 		}
--- a/relay/channel/tencent/relay-tencent.go
+++ b/relay/channel/tencent/relay-tencent.go
@@ -56,12 +56,11 @@ func responseTencent2OpenAI(response *TencentChatResponse) *dto.OpenAITextRespon
 		},
 	}
 	if len(response.Choices) > 0 {
-		content, _ := json.Marshal(response.Choices[0].Messages.Content)
 		choice := dto.OpenAITextResponseChoice{
 			Index: 0,
 			Message: dto.Message{
 				Role:    "assistant",
-				Content: content,
+				Content: response.Choices[0].Messages.Content,
 			},
 			FinishReason: response.Choices[0].FinishReason,
 		}
--- a/relay/channel/xunfei/relay-xunfei.go
+++ b/relay/channel/xunfei/relay-xunfei.go
@@ -61,12 +61,11 @@ func responseXunfei2OpenAI(response *XunfeiChatResponse) *dto.OpenAITextResponse
 			},
 		}
 	}
-	content, _ := json.Marshal(response.Payload.Choices.Text[0].Content)
 	choice := dto.OpenAITextResponseChoice{
 		Index: 0,
 		Message: dto.Message{
 			Role:    "assistant",
-			Content: content,
+			Content: response.Payload.Choices.Text[0].Content,
 		},
 		FinishReason: constant.FinishReasonStop,
 	}
--- a/relay/channel/zhipu/relay-zhipu.go
+++ b/relay/channel/zhipu/relay-zhipu.go
@@ -108,12 +108,11 @@ func responseZhipu2OpenAI(response *ZhipuResponse) *dto.OpenAITextResponse {
 		Usage:   response.Data.Usage,
 	}
 	for i, choice := range response.Data.Choices {
-		content, _ := json.Marshal(strings.Trim(choice.Content, "\""))
 		openaiChoice := dto.OpenAITextResponseChoice{
 			Index: i,
 			Message: dto.Message{
 				Role:    choice.Role,
-				Content: content,
+				Content: strings.Trim(choice.Content, "\""),
 			},
 			FinishReason: "",
 		}
--- a/relay/helper/stream_scanner.go
+++ b/relay/helper/stream_scanner.go
@@ -3,6 +3,7 @@ package helper
 import (
 	"bufio"
 	"context"
+	"fmt"
 	"io"
 	"net/http"
 	"one-api/common"
@@ -19,8 +20,8 @@ import (
 )

 const (
-	InitialScannerBufferSize = 1 << 20  // 1MB (1*1024*1024)
-	MaxScannerBufferSize     = 10 << 20 // 10MB (10*1024*1024)
+	InitialScannerBufferSize = 64 << 10  // 64KB (64*1024)
+	MaxScannerBufferSize     = 10 << 20  // 10MB (10*1024*1024)
 	DefaultPingInterval      = 10 * time.Second
 )

@@ -30,7 +31,12 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 		return
 	}

-	defer resp.Body.Close()
+	// 确保响应体总是被关闭
+	defer func() {
+		if resp.Body != nil {
+			resp.Body.Close()
+		}
+	}()

 	streamingTimeout := time.Duration(constant.StreamingTimeout) * time.Second
 	if strings.HasPrefix(info.UpstreamModelName, "o") {
@@ -39,11 +45,12 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 	}

 	var (
-		stopChan   = make(chan bool, 2)
+		stopChan   = make(chan bool, 3) // 增加缓冲区避免阻塞
 		scanner    = bufio.NewScanner(resp.Body)
 		ticker     = time.NewTicker(streamingTimeout)
 		pingTicker *time.Ticker
 		writeMutex sync.Mutex // Mutex to protect concurrent writes
+		wg         sync.WaitGroup // 用于等待所有 goroutine 退出
 	)

 	generalSettings := operation_setting.GetGeneralSetting()
@@ -57,13 +64,32 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 		pingTicker = time.NewTicker(pingInterval)
 	}

+	// 改进资源清理，确保所有 goroutine 正确退出
 	defer func() {
+		// 通知所有 goroutine 停止
+		common.SafeSendBool(stopChan, true)
+		
 		ticker.Stop()
 		if pingTicker != nil {
 			pingTicker.Stop()
 		}
+		
+		// 等待所有 goroutine 退出，最多等待5秒
+		done := make(chan struct{})
+		go func() {
+			wg.Wait()
+			close(done)
+		}()
+		
+		select {
+		case <-done:
+		case <-time.After(5 * time.Second):
+			common.LogError(c, "timeout waiting for goroutines to exit")
+		}
+		
 		close(stopChan)
 	}()
+	
 	scanner.Buffer(make([]byte, InitialScannerBufferSize), MaxScannerBufferSize)
 	scanner.Split(bufio.ScanLines)
 	SetEventStreamHeaders(c)
@@ -73,35 +99,95 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon

 	ctx = context.WithValue(ctx, "stop_chan", stopChan)

-	// Handle ping data sending
+	// Handle ping data sending with improved error handling
 	if pingEnabled && pingTicker != nil {
+		wg.Add(1)
 		gopool.Go(func() {
+			defer func() {
+				wg.Done()
+				if r := recover(); r != nil {
+					common.LogError(c, fmt.Sprintf("ping goroutine panic: %v", r))
+					common.SafeSendBool(stopChan, true)
+				}
+				if common.DebugEnabled {
+					println("ping goroutine exited")
+				}
+			}()
+			
+			// 添加超时保护，防止 goroutine 无限运行
+			maxPingDuration := 30 * time.Minute // 最大 ping 持续时间
+			pingTimeout := time.NewTimer(maxPingDuration)
+			defer pingTimeout.Stop()
+			
 			for {
 				select {
 				case <-pingTicker.C:
-					writeMutex.Lock() // Lock before writing
-					err := PingData(c)
-					writeMutex.Unlock() // Unlock after writing
-					if err != nil {
-						common.LogError(c, "ping data error: "+err.Error())
-						common.SafeSendBool(stopChan, true)
+					// 使用超时机制防止写操作阻塞
+					done := make(chan error, 1)
+					go func() {
+						writeMutex.Lock()
+						defer writeMutex.Unlock()
+						done <- PingData(c)
+					}()
+					
+					select {
+					case err := <-done:
+						if err != nil {
+							common.LogError(c, "ping data error: "+err.Error())
+							return
+						}
+						if common.DebugEnabled {
+							println("ping data sent")
+						}
+					case <-time.After(10 * time.Second):
+						common.LogError(c, "ping data send timeout")
+						return
+					case <-ctx.Done():
+						return
+					case <-stopChan:
 						return
 					}
-					if common.DebugEnabled {
-						println("ping data sent")
-					}
 				case <-ctx.Done():
-					if common.DebugEnabled {
-						println("ping data goroutine stopped")
-					}
+					return
+				case <-stopChan:
+					return
+				case <-c.Request.Context().Done():
+					// 监听客户端断开连接
+					return
+				case <-pingTimeout.C:
+					common.LogError(c, "ping goroutine max duration reached")
 					return
 				}
 			}
 		})
 	}

+	// Scanner goroutine with improved error handling
+	wg.Add(1)
 	common.RelayCtxGo(ctx, func() {
+		defer func() {
+			wg.Done()
+			if r := recover(); r != nil {
+				common.LogError(c, fmt.Sprintf("scanner goroutine panic: %v", r))
+			}
+			common.SafeSendBool(stopChan, true)
+			if common.DebugEnabled {
+				println("scanner goroutine exited")
+			}
+		}()
+		
 		for scanner.Scan() {
+			// 检查是否需要停止
+			select {
+			case <-stopChan:
+				return
+			case <-ctx.Done():
+				return
+			case <-c.Request.Context().Done():
+				return
+			default:
+			}
+			
 			ticker.Reset(streamingTimeout)
 			data := scanner.Text()
 			if common.DebugEnabled {
@@ -119,11 +205,27 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 			data = strings.TrimSuffix(data, "\r")
 			if !strings.HasPrefix(data, "[DONE]") {
 				info.SetFirstResponseTime()
-				writeMutex.Lock() // Lock before writing
-				success := dataHandler(data)
-				writeMutex.Unlock() // Unlock after writing
-				if !success {
-					break
+				
+				// 使用超时机制防止写操作阻塞
+				done := make(chan bool, 1)
+				go func() {
+					writeMutex.Lock()
+					defer writeMutex.Unlock()
+					done <- dataHandler(data)
+				}()
+				
+				select {
+				case success := <-done:
+					if !success {
+						return
+					}
+				case <-time.After(10 * time.Second):
+					common.LogError(c, "data handler timeout")
+					return
+				case <-ctx.Done():
+					return
+				case <-stopChan:
+					return
 				}
 			}
 		}
@@ -133,17 +235,18 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
 				common.LogError(c, "scanner error: "+err.Error())
 			}
 		}
-
-		common.SafeSendBool(stopChan, true)
 	})

+	// 主循环等待完成或超时
 	select {
 	case <-ticker.C:
 		// 超时处理逻辑
 		common.LogError(c, "streaming timeout")
-		common.SafeSendBool(stopChan, true)
 	case <-stopChan:
 		// 正常结束
 		common.LogInfo(c, "streaming finished")
+	case <-c.Request.Context().Done():
+		// 客户端断开连接
+		common.LogInfo(c, "client disconnected")
 	}
 }
--- a/relay/relay-text.go
+++ b/relay/relay-text.go
@@ -352,6 +352,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	promptTokens := usage.PromptTokens
 	cacheTokens := usage.PromptTokensDetails.CachedTokens
 	imageTokens := usage.PromptTokensDetails.ImageTokens
+	audioTokens := usage.PromptTokensDetails.AudioTokens
 	completionTokens := usage.CompletionTokens
 	modelName := relayInfo.OriginModelName

@@ -367,6 +368,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	dPromptTokens := decimal.NewFromInt(int64(promptTokens))
 	dCacheTokens := decimal.NewFromInt(int64(cacheTokens))
 	dImageTokens := decimal.NewFromInt(int64(imageTokens))
+	dAudioTokens := decimal.NewFromInt(int64(audioTokens))
 	dCompletionTokens := decimal.NewFromInt(int64(completionTokens))
 	dCompletionRatio := decimal.NewFromFloat(completionRatio)
 	dCacheRatio := decimal.NewFromFloat(cacheRatio)
@@ -412,23 +414,43 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 			dFileSearchQuota = decimal.NewFromFloat(fileSearchPrice).
 				Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
 				Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
-			extraContent += fmt.Sprintf("File Search 调用 %d 次，调用花费 $%s",
+			extraContent += fmt.Sprintf("File Search 调用 %d 次，调用花费 %s",
 				fileSearchTool.CallCount, dFileSearchQuota.String())
 		}
 	}

 	var quotaCalculateDecimal decimal.Decimal
-	if !priceData.UsePrice {
-		nonCachedTokens := dPromptTokens.Sub(dCacheTokens)
-		cachedTokensWithRatio := dCacheTokens.Mul(dCacheRatio)

-		promptQuota := nonCachedTokens.Add(cachedTokensWithRatio)
-		if imageTokens > 0 {
-			nonImageTokens := dPromptTokens.Sub(dImageTokens)
-			imageTokensWithRatio := dImageTokens.Mul(dImageRatio)
-			promptQuota = nonImageTokens.Add(imageTokensWithRatio)
+	var audioInputQuota decimal.Decimal
+	var audioInputPrice float64
+	if !priceData.UsePrice {
+		baseTokens := dPromptTokens
+		// 减去 cached tokens
+		var cachedTokensWithRatio decimal.Decimal
+		if !dCacheTokens.IsZero() {
+			baseTokens = baseTokens.Sub(dCacheTokens)
+			cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
 		}

+		// 减去 image tokens
+		var imageTokensWithRatio decimal.Decimal
+		if !dImageTokens.IsZero() {
+			baseTokens = baseTokens.Sub(dImageTokens)
+			imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
+		}
+
+		// 减去 Gemini audio tokens
+		if !dAudioTokens.IsZero() {
+			audioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(modelName)
+			if audioInputPrice > 0 {
+				// 重新计算 base tokens
+				baseTokens = baseTokens.Sub(dAudioTokens)
+				audioInputQuota = decimal.NewFromFloat(audioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
+				extraContent += fmt.Sprintf("Audio Input 花费 %s", audioInputQuota.String())
+			}
+		}
+		promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio)
+
 		completionQuota := dCompletionTokens.Mul(dCompletionRatio)

 		quotaCalculateDecimal = promptQuota.Add(completionQuota).Mul(ratio)
@@ -442,6 +464,8 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	// 添加 responses tools call 调用的配额
 	quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
 	quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
+	// 添加 audio input 独立计费
+	quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)

 	quota := int(quotaCalculateDecimal.Round(0).IntPart())
 	totalTokens := promptTokens + completionTokens
@@ -512,6 +536,11 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 			other["file_search_price"] = fileSearchPrice
 		}
 	}
+	if !audioInputQuota.IsZero() {
+		other["audio_input_seperate_price"] = true
+		other["audio_input_token_count"] = audioTokens
+		other["audio_input_price"] = audioInputPrice
+	}
 	model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel,
 		tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
 }