Merge branch 'main' into feat-04
This commit is contained in:
@@ -31,6 +31,8 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
|
||||
switch info.RelayMode {
|
||||
case constant.RelayModeEmbeddings:
|
||||
fullRequestURL = fmt.Sprintf("%s/api/v1/services/embeddings/text-embedding/text-embedding", info.BaseUrl)
|
||||
case constant.RelayModeRerank:
|
||||
fullRequestURL = fmt.Sprintf("%s/api/v1/services/rerank/text-rerank/text-rerank", info.BaseUrl)
|
||||
case constant.RelayModeImagesGenerations:
|
||||
fullRequestURL = fmt.Sprintf("%s/api/v1/services/aigc/text2image/image-synthesis", info.BaseUrl)
|
||||
case constant.RelayModeCompletions:
|
||||
@@ -76,7 +78,7 @@ func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInf
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
|
||||
return nil, errors.New("not implemented")
|
||||
return ConvertRerankRequest(request), nil
|
||||
}
|
||||
|
||||
func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
|
||||
@@ -103,6 +105,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
|
||||
err, usage = aliImageHandler(c, resp, info)
|
||||
case constant.RelayModeEmbeddings:
|
||||
err, usage = aliEmbeddingHandler(c, resp)
|
||||
case constant.RelayModeRerank:
|
||||
err, usage = RerankHandler(c, resp, info)
|
||||
default:
|
||||
if info.IsStream {
|
||||
err, usage = openai.OaiStreamHandler(c, resp, info)
|
||||
|
||||
@@ -8,6 +8,7 @@ var ModelList = []string{
|
||||
"qwq-32b",
|
||||
"qwen3-235b-a22b",
|
||||
"text-embedding-v1",
|
||||
"gte-rerank-v2",
|
||||
}
|
||||
|
||||
var ChannelName = "ali"
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package ali
|
||||
|
||||
import "one-api/dto"
|
||||
|
||||
type AliMessage struct {
|
||||
Content string `json:"content"`
|
||||
Role string `json:"role"`
|
||||
@@ -97,3 +99,28 @@ type AliImageRequest struct {
|
||||
} `json:"parameters,omitempty"`
|
||||
ResponseFormat string `json:"response_format,omitempty"`
|
||||
}
|
||||
|
||||
type AliRerankParameters struct {
|
||||
TopN *int `json:"top_n,omitempty"`
|
||||
ReturnDocuments *bool `json:"return_documents,omitempty"`
|
||||
}
|
||||
|
||||
type AliRerankInput struct {
|
||||
Query string `json:"query"`
|
||||
Documents []any `json:"documents"`
|
||||
}
|
||||
|
||||
type AliRerankRequest struct {
|
||||
Model string `json:"model"`
|
||||
Input AliRerankInput `json:"input"`
|
||||
Parameters AliRerankParameters `json:"parameters,omitempty"`
|
||||
}
|
||||
|
||||
type AliRerankResponse struct {
|
||||
Output struct {
|
||||
Results []dto.RerankResponseResult `json:"results"`
|
||||
} `json:"output"`
|
||||
Usage AliUsage `json:"usage"`
|
||||
RequestId string `json:"request_id"`
|
||||
AliError
|
||||
}
|
||||
|
||||
83
relay/channel/ali/rerank.go
Normal file
83
relay/channel/ali/rerank.go
Normal file
@@ -0,0 +1,83 @@
|
||||
package ali
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"one-api/dto"
|
||||
relaycommon "one-api/relay/common"
|
||||
"one-api/service"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func ConvertRerankRequest(request dto.RerankRequest) *AliRerankRequest {
|
||||
returnDocuments := request.ReturnDocuments
|
||||
if returnDocuments == nil {
|
||||
t := true
|
||||
returnDocuments = &t
|
||||
}
|
||||
return &AliRerankRequest{
|
||||
Model: request.Model,
|
||||
Input: AliRerankInput{
|
||||
Query: request.Query,
|
||||
Documents: request.Documents,
|
||||
},
|
||||
Parameters: AliRerankParameters{
|
||||
TopN: &request.TopN,
|
||||
ReturnDocuments: returnDocuments,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func RerankHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
|
||||
responseBody, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
|
||||
}
|
||||
err = resp.Body.Close()
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
|
||||
}
|
||||
|
||||
var aliResponse AliRerankResponse
|
||||
err = json.Unmarshal(responseBody, &aliResponse)
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
|
||||
}
|
||||
|
||||
if aliResponse.Code != "" {
|
||||
return &dto.OpenAIErrorWithStatusCode{
|
||||
Error: dto.OpenAIError{
|
||||
Message: aliResponse.Message,
|
||||
Type: aliResponse.Code,
|
||||
Param: aliResponse.RequestId,
|
||||
Code: aliResponse.Code,
|
||||
},
|
||||
StatusCode: resp.StatusCode,
|
||||
}, nil
|
||||
}
|
||||
|
||||
usage := dto.Usage{
|
||||
PromptTokens: aliResponse.Usage.TotalTokens,
|
||||
CompletionTokens: 0,
|
||||
TotalTokens: aliResponse.Usage.TotalTokens,
|
||||
}
|
||||
rerankResponse := dto.RerankResponse{
|
||||
Results: aliResponse.Output.Results,
|
||||
Usage: usage,
|
||||
}
|
||||
|
||||
jsonResponse, err := json.Marshal(rerankResponse)
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
|
||||
}
|
||||
c.Writer.Header().Set("Content-Type", "application/json")
|
||||
c.Writer.WriteHeader(resp.StatusCode)
|
||||
_, err = c.Writer.Write(jsonResponse)
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "write_response_body_failed", http.StatusInternalServerError), nil
|
||||
}
|
||||
|
||||
return nil, &usage
|
||||
}
|
||||
@@ -3,7 +3,6 @@ package ali
|
||||
import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"github.com/gin-gonic/gin"
|
||||
"io"
|
||||
"net/http"
|
||||
"one-api/common"
|
||||
@@ -11,6 +10,8 @@ import (
|
||||
"one-api/relay/helper"
|
||||
"one-api/service"
|
||||
"strings"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
// https://help.aliyun.com/document_detail/613695.html?spm=a2c4g.2399480.0.0.1adb778fAdzP9w#341800c0f8w0r
|
||||
@@ -27,9 +28,6 @@ func requestOpenAI2Ali(request dto.GeneralOpenAIRequest) *dto.GeneralOpenAIReque
|
||||
}
|
||||
|
||||
func embeddingRequestOpenAI2Ali(request dto.EmbeddingRequest) *AliEmbeddingRequest {
|
||||
if request.Model == "" {
|
||||
request.Model = "text-embedding-v1"
|
||||
}
|
||||
return &AliEmbeddingRequest{
|
||||
Model: request.Model,
|
||||
Input: struct {
|
||||
@@ -64,7 +62,11 @@ func aliEmbeddingHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorW
|
||||
}, nil
|
||||
}
|
||||
|
||||
fullTextResponse := embeddingResponseAli2OpenAI(&aliResponse)
|
||||
model := c.GetString("model")
|
||||
if model == "" {
|
||||
model = "text-embedding-v4"
|
||||
}
|
||||
fullTextResponse := embeddingResponseAli2OpenAI(&aliResponse, model)
|
||||
jsonResponse, err := json.Marshal(fullTextResponse)
|
||||
if err != nil {
|
||||
return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
|
||||
@@ -75,11 +77,11 @@ func aliEmbeddingHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorW
|
||||
return nil, &fullTextResponse.Usage
|
||||
}
|
||||
|
||||
func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse) *dto.OpenAIEmbeddingResponse {
|
||||
func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse, model string) *dto.OpenAIEmbeddingResponse {
|
||||
openAIEmbeddingResponse := dto.OpenAIEmbeddingResponse{
|
||||
Object: "list",
|
||||
Data: make([]dto.OpenAIEmbeddingResponseItem, 0, len(response.Output.Embeddings)),
|
||||
Model: "text-embedding-v1",
|
||||
Model: model,
|
||||
Usage: dto.Usage{TotalTokens: response.Usage.TotalTokens},
|
||||
}
|
||||
|
||||
@@ -94,12 +96,11 @@ func embeddingResponseAli2OpenAI(response *AliEmbeddingResponse) *dto.OpenAIEmbe
|
||||
}
|
||||
|
||||
func responseAli2OpenAI(response *AliResponse) *dto.OpenAITextResponse {
|
||||
content, _ := json.Marshal(response.Output.Text)
|
||||
choice := dto.OpenAITextResponseChoice{
|
||||
Index: 0,
|
||||
Message: dto.Message{
|
||||
Role: "assistant",
|
||||
Content: content,
|
||||
Content: response.Output.Text,
|
||||
},
|
||||
FinishReason: response.Output.FinishReason,
|
||||
}
|
||||
|
||||
@@ -109,6 +109,12 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
|
||||
|
||||
gopool.Go(func() {
|
||||
defer func() {
|
||||
// 增加panic恢复处理
|
||||
if r := recover(); r != nil {
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping goroutine panic recovered:", fmt.Sprintf("%v", r))
|
||||
}
|
||||
}
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping goroutine stopped.")
|
||||
}
|
||||
@@ -119,19 +125,32 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(pingInterval)
|
||||
// 退出时清理 ticker
|
||||
defer ticker.Stop()
|
||||
// 确保在任何情况下都清理ticker
|
||||
defer func() {
|
||||
ticker.Stop()
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping ticker stopped")
|
||||
}
|
||||
}()
|
||||
|
||||
var pingMutex sync.Mutex
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping goroutine started")
|
||||
}
|
||||
|
||||
// 增加超时控制,防止goroutine长时间运行
|
||||
maxPingDuration := 120 * time.Minute // 最大ping持续时间
|
||||
pingTimeout := time.NewTimer(maxPingDuration)
|
||||
defer pingTimeout.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
// 发送 ping 数据
|
||||
case <-ticker.C:
|
||||
if err := sendPingData(c, &pingMutex); err != nil {
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping error, stopping goroutine:", err.Error())
|
||||
}
|
||||
return
|
||||
}
|
||||
// 收到退出信号
|
||||
@@ -140,6 +159,12 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
|
||||
// request 结束
|
||||
case <-c.Request.Context().Done():
|
||||
return
|
||||
// 超时保护,防止goroutine无限运行
|
||||
case <-pingTimeout.C:
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping goroutine timeout, stopping")
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
@@ -148,19 +173,34 @@ func startPingKeepAlive(c *gin.Context, pingInterval time.Duration) context.Canc
|
||||
}
|
||||
|
||||
func sendPingData(c *gin.Context, mutex *sync.Mutex) error {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
// 增加超时控制,防止锁死等待
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
|
||||
err := helper.PingData(c)
|
||||
if err != nil {
|
||||
common2.LogError(c, "SSE ping error: "+err.Error())
|
||||
err := helper.PingData(c)
|
||||
if err != nil {
|
||||
common2.LogError(c, "SSE ping error: "+err.Error())
|
||||
done <- err
|
||||
return
|
||||
}
|
||||
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping data sent.")
|
||||
}
|
||||
done <- nil
|
||||
}()
|
||||
|
||||
// 设置发送ping数据的超时时间
|
||||
select {
|
||||
case err := <-done:
|
||||
return err
|
||||
case <-time.After(10 * time.Second):
|
||||
return errors.New("SSE ping data send timeout")
|
||||
case <-c.Request.Context().Done():
|
||||
return errors.New("request context cancelled during ping")
|
||||
}
|
||||
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping data sent.")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http.Response, error) {
|
||||
@@ -175,15 +215,23 @@ func doRequest(c *gin.Context, req *http.Request, info *common.RelayInfo) (*http
|
||||
client = service.GetHttpClient()
|
||||
}
|
||||
|
||||
var stopPinger context.CancelFunc
|
||||
if info.IsStream {
|
||||
helper.SetEventStreamHeaders(c)
|
||||
|
||||
// 处理流式请求的 ping 保活
|
||||
generalSettings := operation_setting.GetGeneralSetting()
|
||||
if generalSettings.PingIntervalEnabled {
|
||||
pingInterval := time.Duration(generalSettings.PingIntervalSeconds) * time.Second
|
||||
stopPinger := startPingKeepAlive(c, pingInterval)
|
||||
defer stopPinger()
|
||||
stopPinger = startPingKeepAlive(c, pingInterval)
|
||||
// 使用defer确保在任何情况下都能停止ping goroutine
|
||||
defer func() {
|
||||
if stopPinger != nil {
|
||||
stopPinger()
|
||||
if common2.DebugEnabled {
|
||||
println("SSE ping goroutine stopped by defer")
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -53,12 +53,11 @@ func requestOpenAI2Baidu(request dto.GeneralOpenAIRequest) *BaiduChatRequest {
|
||||
}
|
||||
|
||||
func responseBaidu2OpenAI(response *BaiduChatResponse) *dto.OpenAITextResponse {
|
||||
content, _ := json.Marshal(response.Result)
|
||||
choice := dto.OpenAITextResponseChoice{
|
||||
Index: 0,
|
||||
Message: dto.Message{
|
||||
Role: "assistant",
|
||||
Content: content,
|
||||
Content: response.Result,
|
||||
},
|
||||
FinishReason: "stop",
|
||||
}
|
||||
|
||||
@@ -48,9 +48,9 @@ func RequestOpenAI2ClaudeComplete(textRequest dto.GeneralOpenAIRequest) *dto.Cla
|
||||
prompt := ""
|
||||
for _, message := range textRequest.Messages {
|
||||
if message.Role == "user" {
|
||||
prompt += fmt.Sprintf("\n\nHuman: %s", message.Content)
|
||||
prompt += fmt.Sprintf("\n\nHuman: %s", message.StringContent())
|
||||
} else if message.Role == "assistant" {
|
||||
prompt += fmt.Sprintf("\n\nAssistant: %s", message.Content)
|
||||
prompt += fmt.Sprintf("\n\nAssistant: %s", message.StringContent())
|
||||
} else if message.Role == "system" {
|
||||
if prompt == "" {
|
||||
prompt = message.StringContent()
|
||||
@@ -155,15 +155,13 @@ func RequestOpenAI2ClaudeMessage(textRequest dto.GeneralOpenAIRequest) (*dto.Cla
|
||||
}
|
||||
if lastMessage.Role == message.Role && lastMessage.Role != "tool" {
|
||||
if lastMessage.IsStringContent() && message.IsStringContent() {
|
||||
content, _ := json.Marshal(strings.Trim(fmt.Sprintf("%s %s", lastMessage.StringContent(), message.StringContent()), "\""))
|
||||
fmtMessage.Content = content
|
||||
fmtMessage.SetStringContent(strings.Trim(fmt.Sprintf("%s %s", lastMessage.StringContent(), message.StringContent()), "\""))
|
||||
// delete last message
|
||||
formatMessages = formatMessages[:len(formatMessages)-1]
|
||||
}
|
||||
}
|
||||
if fmtMessage.Content == nil {
|
||||
content, _ := json.Marshal("...")
|
||||
fmtMessage.Content = content
|
||||
fmtMessage.SetStringContent("...")
|
||||
}
|
||||
formatMessages = append(formatMessages, fmtMessage)
|
||||
lastMessage = fmtMessage
|
||||
@@ -397,12 +395,11 @@ func ResponseClaude2OpenAI(reqMode int, claudeResponse *dto.ClaudeResponse) *dto
|
||||
thinkingContent := ""
|
||||
|
||||
if reqMode == RequestModeCompletion {
|
||||
content, _ := json.Marshal(strings.TrimPrefix(claudeResponse.Completion, " "))
|
||||
choice := dto.OpenAITextResponseChoice{
|
||||
Index: 0,
|
||||
Message: dto.Message{
|
||||
Role: "assistant",
|
||||
Content: content,
|
||||
Content: strings.TrimPrefix(claudeResponse.Completion, " "),
|
||||
Name: nil,
|
||||
},
|
||||
FinishReason: stopReasonClaude2OpenAI(claudeResponse.StopReason),
|
||||
|
||||
@@ -195,11 +195,10 @@ func cohereHandler(c *gin.Context, resp *http.Response, modelName string, prompt
|
||||
openaiResp.Model = modelName
|
||||
openaiResp.Usage = usage
|
||||
|
||||
content, _ := json.Marshal(cohereResp.Text)
|
||||
openaiResp.Choices = []dto.OpenAITextResponseChoice{
|
||||
{
|
||||
Index: 0,
|
||||
Message: dto.Message{Content: content, Role: "assistant"},
|
||||
Message: dto.Message{Content: cohereResp.Text, Role: "assistant"},
|
||||
FinishReason: stopReasonCohere2OpenAI(cohereResp.FinishReason),
|
||||
},
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ type CozeError struct {
|
||||
type CozeEnterMessage struct {
|
||||
Role string `json:"role"`
|
||||
Type string `json:"type,omitempty"`
|
||||
Content json.RawMessage `json:"content,omitempty"`
|
||||
Content any `json:"content,omitempty"`
|
||||
MetaData json.RawMessage `json:"meta_data,omitempty"`
|
||||
ContentType string `json:"content_type,omitempty"`
|
||||
}
|
||||
|
||||
@@ -278,12 +278,11 @@ func difyHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInf
|
||||
Created: common.GetTimestamp(),
|
||||
Usage: difyResponse.MetaData.Usage,
|
||||
}
|
||||
content, _ := json.Marshal(difyResponse.Answer)
|
||||
choice := dto.OpenAITextResponseChoice{
|
||||
Index: 0,
|
||||
Message: dto.Message{
|
||||
Role: "assistant",
|
||||
Content: content,
|
||||
Content: difyResponse.Answer,
|
||||
},
|
||||
FinishReason: "stop",
|
||||
}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package gemini
|
||||
|
||||
import "encoding/json"
|
||||
|
||||
type GeminiChatRequest struct {
|
||||
Contents []GeminiChatContent `json:"contents"`
|
||||
SafetySettings []GeminiChatSafetySettings `json:"safetySettings,omitempty"`
|
||||
@@ -22,19 +24,38 @@ type GeminiInlineData struct {
|
||||
Data string `json:"data"`
|
||||
}
|
||||
|
||||
// UnmarshalJSON custom unmarshaler for GeminiInlineData to support snake_case and camelCase for MimeType
|
||||
func (g *GeminiInlineData) UnmarshalJSON(data []byte) error {
|
||||
type Alias GeminiInlineData // Use type alias to avoid recursion
|
||||
var aux struct {
|
||||
Alias
|
||||
MimeTypeSnake string `json:"mime_type"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(data, &aux); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
*g = GeminiInlineData(aux.Alias) // Copy other fields if any in future
|
||||
|
||||
// Prioritize snake_case if present
|
||||
if aux.MimeTypeSnake != "" {
|
||||
g.MimeType = aux.MimeTypeSnake
|
||||
} else if aux.MimeType != "" { // Fallback to camelCase from Alias
|
||||
g.MimeType = aux.MimeType
|
||||
}
|
||||
// g.Data would be populated by aux.Alias.Data
|
||||
return nil
|
||||
}
|
||||
|
||||
type FunctionCall struct {
|
||||
FunctionName string `json:"name"`
|
||||
Arguments any `json:"args"`
|
||||
}
|
||||
|
||||
type GeminiFunctionResponseContent struct {
|
||||
Name string `json:"name"`
|
||||
Content any `json:"content"`
|
||||
}
|
||||
|
||||
type FunctionResponse struct {
|
||||
Name string `json:"name"`
|
||||
Response GeminiFunctionResponseContent `json:"response"`
|
||||
Name string `json:"name"`
|
||||
Response map[string]interface{} `json:"response"`
|
||||
}
|
||||
|
||||
type GeminiPartExecutableCode struct {
|
||||
@@ -63,6 +84,33 @@ type GeminiPart struct {
|
||||
CodeExecutionResult *GeminiPartCodeExecutionResult `json:"codeExecutionResult,omitempty"`
|
||||
}
|
||||
|
||||
// UnmarshalJSON custom unmarshaler for GeminiPart to support snake_case and camelCase for InlineData
|
||||
func (p *GeminiPart) UnmarshalJSON(data []byte) error {
|
||||
// Alias to avoid recursion during unmarshalling
|
||||
type Alias GeminiPart
|
||||
var aux struct {
|
||||
Alias
|
||||
InlineDataSnake *GeminiInlineData `json:"inline_data,omitempty"` // snake_case variant
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(data, &aux); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Assign fields from alias
|
||||
*p = GeminiPart(aux.Alias)
|
||||
|
||||
// Prioritize snake_case for InlineData if present
|
||||
if aux.InlineDataSnake != nil {
|
||||
p.InlineData = aux.InlineDataSnake
|
||||
} else if aux.InlineData != nil { // Fallback to camelCase from Alias
|
||||
p.InlineData = aux.InlineData
|
||||
}
|
||||
// Other fields like Text, FunctionCall etc. are already populated via aux.Alias
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type GeminiChatContent struct {
|
||||
Role string `json:"role,omitempty"`
|
||||
Parts []GeminiPart `json:"parts"`
|
||||
@@ -117,10 +165,16 @@ type GeminiChatResponse struct {
|
||||
}
|
||||
|
||||
type GeminiUsageMetadata struct {
|
||||
PromptTokenCount int `json:"promptTokenCount"`
|
||||
CandidatesTokenCount int `json:"candidatesTokenCount"`
|
||||
TotalTokenCount int `json:"totalTokenCount"`
|
||||
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
|
||||
PromptTokenCount int `json:"promptTokenCount"`
|
||||
CandidatesTokenCount int `json:"candidatesTokenCount"`
|
||||
TotalTokenCount int `json:"totalTokenCount"`
|
||||
ThoughtsTokenCount int `json:"thoughtsTokenCount"`
|
||||
PromptTokensDetails []GeminiPromptTokensDetails `json:"promptTokensDetails"`
|
||||
}
|
||||
|
||||
type GeminiPromptTokensDetails struct {
|
||||
Modality string `json:"modality"`
|
||||
TokenCount int `json:"tokenCount"`
|
||||
}
|
||||
|
||||
// Imagen related structs
|
||||
|
||||
@@ -55,6 +55,16 @@ func GeminiTextGenerationHandler(c *gin.Context, resp *http.Response, info *rela
|
||||
TotalTokens: geminiResponse.UsageMetadata.TotalTokenCount,
|
||||
}
|
||||
|
||||
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||
|
||||
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
||||
if detail.Modality == "AUDIO" {
|
||||
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
|
||||
} else if detail.Modality == "TEXT" {
|
||||
usage.PromptTokensDetails.TextTokens = detail.TokenCount
|
||||
}
|
||||
}
|
||||
|
||||
// 直接返回 Gemini 原生格式的 JSON 响应
|
||||
jsonResponse, err := json.Marshal(geminiResponse)
|
||||
if err != nil {
|
||||
@@ -100,6 +110,14 @@ func GeminiTextGenerationStreamHandler(c *gin.Context, resp *http.Response, info
|
||||
usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
|
||||
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
|
||||
usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
|
||||
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
||||
if detail.Modality == "AUDIO" {
|
||||
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
|
||||
} else if detail.Modality == "TEXT" {
|
||||
usage.PromptTokensDetails.TextTokens = detail.TokenCount
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 直接发送 GeminiChatResponse 响应
|
||||
@@ -118,11 +136,10 @@ func GeminiTextGenerationStreamHandler(c *gin.Context, resp *http.Response, info
|
||||
}
|
||||
|
||||
// 计算最终使用量
|
||||
usage.PromptTokensDetails.TextTokens = usage.PromptTokens
|
||||
usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
|
||||
|
||||
// 结束流式响应
|
||||
helper.Done(c)
|
||||
// 移除流式响应结尾的[Done],因为Gemini API没有发送Done的行为
|
||||
//helper.Done(c)
|
||||
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
@@ -57,25 +57,63 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
||||
}
|
||||
|
||||
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
||||
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
||||
// 如果模型名以 gemini-2.5-pro 开头,不设置 ThinkingBudget
|
||||
if strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") {
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
} else {
|
||||
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
||||
if budgetTokens == 0 || budgetTokens > 24576 {
|
||||
budgetTokens = 24576
|
||||
}
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
ThinkingBudget: common.GetPointer(int(budgetTokens)),
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
}
|
||||
if strings.HasSuffix(info.OriginModelName, "-thinking") {
|
||||
// 硬编码不支持 ThinkingBudget 的旧模型
|
||||
unsupportedModels := []string{
|
||||
"gemini-2.5-pro-preview-05-06",
|
||||
"gemini-2.5-pro-preview-03-25",
|
||||
}
|
||||
|
||||
isUnsupported := false
|
||||
for _, unsupportedModel := range unsupportedModels {
|
||||
if strings.HasPrefix(info.OriginModelName, unsupportedModel) {
|
||||
isUnsupported = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if isUnsupported {
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
} else {
|
||||
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
||||
|
||||
// 检查是否为新的2.5pro模型(支持ThinkingBudget但有特殊范围)
|
||||
isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
|
||||
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
|
||||
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
|
||||
|
||||
if isNew25Pro {
|
||||
// 新的2.5pro模型:ThinkingBudget范围为128-32768
|
||||
if budgetTokens == 0 || budgetTokens < 128 {
|
||||
budgetTokens = 128
|
||||
} else if budgetTokens > 32768 {
|
||||
budgetTokens = 32768
|
||||
}
|
||||
} else {
|
||||
// 其他模型:ThinkingBudget范围为0-24576
|
||||
if budgetTokens == 0 || budgetTokens > 24576 {
|
||||
budgetTokens = 24576
|
||||
}
|
||||
}
|
||||
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
ThinkingBudget: common.GetPointer(int(budgetTokens)),
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
}
|
||||
} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
ThinkingBudget: common.GetPointer(0),
|
||||
// 检查是否为新的2.5pro模型(不支持-nothinking,因为最低值只能为128)
|
||||
isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
|
||||
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
|
||||
!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
|
||||
|
||||
if !isNew25Pro {
|
||||
// 只有非新2.5pro模型才支持-nothinking
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
|
||||
ThinkingBudget: common.GetPointer(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -137,12 +175,6 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
||||
// common.SysLog("tools: " + fmt.Sprintf("%+v", geminiRequest.Tools))
|
||||
// json_data, _ := json.Marshal(geminiRequest.Tools)
|
||||
// common.SysLog("tools_json: " + string(json_data))
|
||||
} else if textRequest.Functions != nil {
|
||||
//geminiRequest.Tools = []GeminiChatTool{
|
||||
// {
|
||||
// FunctionDeclarations: textRequest.Functions,
|
||||
// },
|
||||
//}
|
||||
}
|
||||
|
||||
if textRequest.ResponseFormat != nil && (textRequest.ResponseFormat.Type == "json_schema" || textRequest.ResponseFormat.Type == "json_object") {
|
||||
@@ -173,17 +205,27 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
||||
} else if val, exists := tool_call_ids[message.ToolCallId]; exists {
|
||||
name = val
|
||||
}
|
||||
content := common.StrToMap(message.StringContent())
|
||||
var contentMap map[string]interface{}
|
||||
contentStr := message.StringContent()
|
||||
|
||||
// 1. 尝试解析为 JSON 对象
|
||||
if err := json.Unmarshal([]byte(contentStr), &contentMap); err != nil {
|
||||
// 2. 如果失败,尝试解析为 JSON 数组
|
||||
var contentSlice []interface{}
|
||||
if err := json.Unmarshal([]byte(contentStr), &contentSlice); err == nil {
|
||||
// 如果是数组,包装成对象
|
||||
contentMap = map[string]interface{}{"result": contentSlice}
|
||||
} else {
|
||||
// 3. 如果再次失败,作为纯文本处理
|
||||
contentMap = map[string]interface{}{"content": contentStr}
|
||||
}
|
||||
}
|
||||
|
||||
functionResp := &FunctionResponse{
|
||||
Name: name,
|
||||
Response: GeminiFunctionResponseContent{
|
||||
Name: name,
|
||||
Content: content,
|
||||
},
|
||||
}
|
||||
if content == nil {
|
||||
functionResp.Response.Content = message.StringContent()
|
||||
Name: name,
|
||||
Response: contentMap,
|
||||
}
|
||||
|
||||
*parts = append(*parts, GeminiPart{
|
||||
FunctionResponse: functionResp,
|
||||
})
|
||||
@@ -280,13 +322,13 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
||||
if part.GetInputAudio().Data == "" {
|
||||
return nil, fmt.Errorf("only base64 audio is supported in gemini")
|
||||
}
|
||||
format, base64String, err := service.DecodeBase64FileData(part.GetInputAudio().Data)
|
||||
base64String, err := service.DecodeBase64AudioData(part.GetInputAudio().Data)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("decode base64 audio data failed: %s", err.Error())
|
||||
}
|
||||
parts = append(parts, GeminiPart{
|
||||
InlineData: &GeminiInlineData{
|
||||
MimeType: format,
|
||||
MimeType: "audio/" + part.GetInputAudio().Format,
|
||||
Data: base64String,
|
||||
},
|
||||
})
|
||||
@@ -576,14 +618,13 @@ func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResp
|
||||
Created: common.GetTimestamp(),
|
||||
Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
|
||||
}
|
||||
content, _ := json.Marshal("")
|
||||
isToolCall := false
|
||||
for _, candidate := range response.Candidates {
|
||||
choice := dto.OpenAITextResponseChoice{
|
||||
Index: int(candidate.Index),
|
||||
Message: dto.Message{
|
||||
Role: "assistant",
|
||||
Content: content,
|
||||
Content: "",
|
||||
},
|
||||
FinishReason: constant.FinishReasonStop,
|
||||
}
|
||||
@@ -738,6 +779,13 @@ func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycom
|
||||
usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
|
||||
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||
usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
|
||||
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
||||
if detail.Modality == "AUDIO" {
|
||||
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
|
||||
} else if detail.Modality == "TEXT" {
|
||||
usage.PromptTokensDetails.TextTokens = detail.TokenCount
|
||||
}
|
||||
}
|
||||
}
|
||||
err = helper.ObjectData(c, response)
|
||||
if err != nil {
|
||||
@@ -812,6 +860,14 @@ func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.Re
|
||||
usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
|
||||
usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
|
||||
|
||||
for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
|
||||
if detail.Modality == "AUDIO" {
|
||||
usage.PromptTokensDetails.AudioTokens = detail.TokenCount
|
||||
} else if detail.Modality == "TEXT" {
|
||||
usage.PromptTokensDetails.TextTokens = detail.TokenCount
|
||||
}
|
||||
}
|
||||
|
||||
fullTextResponse.Usage = usage
|
||||
jsonResponse, err := json.Marshal(fullTextResponse)
|
||||
if err != nil {
|
||||
|
||||
@@ -1,13 +1,55 @@
|
||||
package mistral
|
||||
|
||||
import (
|
||||
"one-api/common"
|
||||
"one-api/dto"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
var mistralToolCallIdRegexp = regexp.MustCompile("^[a-zA-Z0-9]{9}$")
|
||||
|
||||
func requestOpenAI2Mistral(request *dto.GeneralOpenAIRequest) *dto.GeneralOpenAIRequest {
|
||||
messages := make([]dto.Message, 0, len(request.Messages))
|
||||
idMap := make(map[string]string)
|
||||
for _, message := range request.Messages {
|
||||
// 1. tool_calls.id
|
||||
toolCalls := message.ParseToolCalls()
|
||||
if toolCalls != nil {
|
||||
for i := range toolCalls {
|
||||
if !mistralToolCallIdRegexp.MatchString(toolCalls[i].ID) {
|
||||
if newId, ok := idMap[toolCalls[i].ID]; ok {
|
||||
toolCalls[i].ID = newId
|
||||
} else {
|
||||
newId, err := common.GenerateRandomCharsKey(9)
|
||||
if err == nil {
|
||||
idMap[toolCalls[i].ID] = newId
|
||||
toolCalls[i].ID = newId
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
message.SetToolCalls(toolCalls)
|
||||
}
|
||||
|
||||
// 2. tool_call_id
|
||||
if message.ToolCallId != "" {
|
||||
if newId, ok := idMap[message.ToolCallId]; ok {
|
||||
message.ToolCallId = newId
|
||||
} else {
|
||||
if !mistralToolCallIdRegexp.MatchString(message.ToolCallId) {
|
||||
newId, err := common.GenerateRandomCharsKey(9)
|
||||
if err == nil {
|
||||
idMap[message.ToolCallId] = newId
|
||||
message.ToolCallId = newId
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mediaMessages := message.ParseContent()
|
||||
if message.Role == "assistant" && message.ToolCalls != nil && message.Content == "" {
|
||||
mediaMessages = []dto.MediaContent{}
|
||||
}
|
||||
for j, mediaMessage := range mediaMessages {
|
||||
if mediaMessage.Type == dto.ContentTypeImageURL {
|
||||
imageUrl := mediaMessage.GetImageMedia()
|
||||
|
||||
@@ -45,12 +45,11 @@ func responsePaLM2OpenAI(response *PaLMChatResponse) *dto.OpenAITextResponse {
|
||||
Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
|
||||
}
|
||||
for i, candidate := range response.Candidates {
|
||||
content, _ := json.Marshal(candidate.Content)
|
||||
choice := dto.OpenAITextResponseChoice{
|
||||
Index: i,
|
||||
Message: dto.Message{
|
||||
Role: "assistant",
|
||||
Content: content,
|
||||
Content: candidate.Content,
|
||||
},
|
||||
FinishReason: "stop",
|
||||
}
|
||||
|
||||
@@ -56,12 +56,11 @@ func responseTencent2OpenAI(response *TencentChatResponse) *dto.OpenAITextRespon
|
||||
},
|
||||
}
|
||||
if len(response.Choices) > 0 {
|
||||
content, _ := json.Marshal(response.Choices[0].Messages.Content)
|
||||
choice := dto.OpenAITextResponseChoice{
|
||||
Index: 0,
|
||||
Message: dto.Message{
|
||||
Role: "assistant",
|
||||
Content: content,
|
||||
Content: response.Choices[0].Messages.Content,
|
||||
},
|
||||
FinishReason: response.Choices[0].FinishReason,
|
||||
}
|
||||
|
||||
@@ -61,12 +61,11 @@ func responseXunfei2OpenAI(response *XunfeiChatResponse) *dto.OpenAITextResponse
|
||||
},
|
||||
}
|
||||
}
|
||||
content, _ := json.Marshal(response.Payload.Choices.Text[0].Content)
|
||||
choice := dto.OpenAITextResponseChoice{
|
||||
Index: 0,
|
||||
Message: dto.Message{
|
||||
Role: "assistant",
|
||||
Content: content,
|
||||
Content: response.Payload.Choices.Text[0].Content,
|
||||
},
|
||||
FinishReason: constant.FinishReasonStop,
|
||||
}
|
||||
|
||||
@@ -108,12 +108,11 @@ func responseZhipu2OpenAI(response *ZhipuResponse) *dto.OpenAITextResponse {
|
||||
Usage: response.Data.Usage,
|
||||
}
|
||||
for i, choice := range response.Data.Choices {
|
||||
content, _ := json.Marshal(strings.Trim(choice.Content, "\""))
|
||||
openaiChoice := dto.OpenAITextResponseChoice{
|
||||
Index: i,
|
||||
Message: dto.Message{
|
||||
Role: choice.Role,
|
||||
Content: content,
|
||||
Content: strings.Trim(choice.Content, "\""),
|
||||
},
|
||||
FinishReason: "",
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ package helper
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"one-api/common"
|
||||
@@ -19,8 +20,8 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
InitialScannerBufferSize = 1 << 20 // 1MB (1*1024*1024)
|
||||
MaxScannerBufferSize = 10 << 20 // 10MB (10*1024*1024)
|
||||
InitialScannerBufferSize = 64 << 10 // 64KB (64*1024)
|
||||
MaxScannerBufferSize = 10 << 20 // 10MB (10*1024*1024)
|
||||
DefaultPingInterval = 10 * time.Second
|
||||
)
|
||||
|
||||
@@ -30,7 +31,12 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
|
||||
return
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
// 确保响应体总是被关闭
|
||||
defer func() {
|
||||
if resp.Body != nil {
|
||||
resp.Body.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
streamingTimeout := time.Duration(constant.StreamingTimeout) * time.Second
|
||||
if strings.HasPrefix(info.UpstreamModelName, "o") {
|
||||
@@ -39,11 +45,12 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
|
||||
}
|
||||
|
||||
var (
|
||||
stopChan = make(chan bool, 2)
|
||||
stopChan = make(chan bool, 3) // 增加缓冲区避免阻塞
|
||||
scanner = bufio.NewScanner(resp.Body)
|
||||
ticker = time.NewTicker(streamingTimeout)
|
||||
pingTicker *time.Ticker
|
||||
writeMutex sync.Mutex // Mutex to protect concurrent writes
|
||||
wg sync.WaitGroup // 用于等待所有 goroutine 退出
|
||||
)
|
||||
|
||||
generalSettings := operation_setting.GetGeneralSetting()
|
||||
@@ -57,13 +64,32 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
|
||||
pingTicker = time.NewTicker(pingInterval)
|
||||
}
|
||||
|
||||
// 改进资源清理,确保所有 goroutine 正确退出
|
||||
defer func() {
|
||||
// 通知所有 goroutine 停止
|
||||
common.SafeSendBool(stopChan, true)
|
||||
|
||||
ticker.Stop()
|
||||
if pingTicker != nil {
|
||||
pingTicker.Stop()
|
||||
}
|
||||
|
||||
// 等待所有 goroutine 退出,最多等待5秒
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
case <-time.After(5 * time.Second):
|
||||
common.LogError(c, "timeout waiting for goroutines to exit")
|
||||
}
|
||||
|
||||
close(stopChan)
|
||||
}()
|
||||
|
||||
scanner.Buffer(make([]byte, InitialScannerBufferSize), MaxScannerBufferSize)
|
||||
scanner.Split(bufio.ScanLines)
|
||||
SetEventStreamHeaders(c)
|
||||
@@ -73,35 +99,95 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
|
||||
|
||||
ctx = context.WithValue(ctx, "stop_chan", stopChan)
|
||||
|
||||
// Handle ping data sending
|
||||
// Handle ping data sending with improved error handling
|
||||
if pingEnabled && pingTicker != nil {
|
||||
wg.Add(1)
|
||||
gopool.Go(func() {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
if r := recover(); r != nil {
|
||||
common.LogError(c, fmt.Sprintf("ping goroutine panic: %v", r))
|
||||
common.SafeSendBool(stopChan, true)
|
||||
}
|
||||
if common.DebugEnabled {
|
||||
println("ping goroutine exited")
|
||||
}
|
||||
}()
|
||||
|
||||
// 添加超时保护,防止 goroutine 无限运行
|
||||
maxPingDuration := 30 * time.Minute // 最大 ping 持续时间
|
||||
pingTimeout := time.NewTimer(maxPingDuration)
|
||||
defer pingTimeout.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-pingTicker.C:
|
||||
writeMutex.Lock() // Lock before writing
|
||||
err := PingData(c)
|
||||
writeMutex.Unlock() // Unlock after writing
|
||||
if err != nil {
|
||||
common.LogError(c, "ping data error: "+err.Error())
|
||||
common.SafeSendBool(stopChan, true)
|
||||
// 使用超时机制防止写操作阻塞
|
||||
done := make(chan error, 1)
|
||||
go func() {
|
||||
writeMutex.Lock()
|
||||
defer writeMutex.Unlock()
|
||||
done <- PingData(c)
|
||||
}()
|
||||
|
||||
select {
|
||||
case err := <-done:
|
||||
if err != nil {
|
||||
common.LogError(c, "ping data error: "+err.Error())
|
||||
return
|
||||
}
|
||||
if common.DebugEnabled {
|
||||
println("ping data sent")
|
||||
}
|
||||
case <-time.After(10 * time.Second):
|
||||
common.LogError(c, "ping data send timeout")
|
||||
return
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-stopChan:
|
||||
return
|
||||
}
|
||||
if common.DebugEnabled {
|
||||
println("ping data sent")
|
||||
}
|
||||
case <-ctx.Done():
|
||||
if common.DebugEnabled {
|
||||
println("ping data goroutine stopped")
|
||||
}
|
||||
return
|
||||
case <-stopChan:
|
||||
return
|
||||
case <-c.Request.Context().Done():
|
||||
// 监听客户端断开连接
|
||||
return
|
||||
case <-pingTimeout.C:
|
||||
common.LogError(c, "ping goroutine max duration reached")
|
||||
return
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Scanner goroutine with improved error handling
|
||||
wg.Add(1)
|
||||
common.RelayCtxGo(ctx, func() {
|
||||
defer func() {
|
||||
wg.Done()
|
||||
if r := recover(); r != nil {
|
||||
common.LogError(c, fmt.Sprintf("scanner goroutine panic: %v", r))
|
||||
}
|
||||
common.SafeSendBool(stopChan, true)
|
||||
if common.DebugEnabled {
|
||||
println("scanner goroutine exited")
|
||||
}
|
||||
}()
|
||||
|
||||
for scanner.Scan() {
|
||||
// 检查是否需要停止
|
||||
select {
|
||||
case <-stopChan:
|
||||
return
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-c.Request.Context().Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
ticker.Reset(streamingTimeout)
|
||||
data := scanner.Text()
|
||||
if common.DebugEnabled {
|
||||
@@ -119,11 +205,27 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
|
||||
data = strings.TrimSuffix(data, "\r")
|
||||
if !strings.HasPrefix(data, "[DONE]") {
|
||||
info.SetFirstResponseTime()
|
||||
writeMutex.Lock() // Lock before writing
|
||||
success := dataHandler(data)
|
||||
writeMutex.Unlock() // Unlock after writing
|
||||
if !success {
|
||||
break
|
||||
|
||||
// 使用超时机制防止写操作阻塞
|
||||
done := make(chan bool, 1)
|
||||
go func() {
|
||||
writeMutex.Lock()
|
||||
defer writeMutex.Unlock()
|
||||
done <- dataHandler(data)
|
||||
}()
|
||||
|
||||
select {
|
||||
case success := <-done:
|
||||
if !success {
|
||||
return
|
||||
}
|
||||
case <-time.After(10 * time.Second):
|
||||
common.LogError(c, "data handler timeout")
|
||||
return
|
||||
case <-ctx.Done():
|
||||
return
|
||||
case <-stopChan:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -133,17 +235,18 @@ func StreamScannerHandler(c *gin.Context, resp *http.Response, info *relaycommon
|
||||
common.LogError(c, "scanner error: "+err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
common.SafeSendBool(stopChan, true)
|
||||
})
|
||||
|
||||
// 主循环等待完成或超时
|
||||
select {
|
||||
case <-ticker.C:
|
||||
// 超时处理逻辑
|
||||
common.LogError(c, "streaming timeout")
|
||||
common.SafeSendBool(stopChan, true)
|
||||
case <-stopChan:
|
||||
// 正常结束
|
||||
common.LogInfo(c, "streaming finished")
|
||||
case <-c.Request.Context().Done():
|
||||
// 客户端断开连接
|
||||
common.LogInfo(c, "client disconnected")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -352,6 +352,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
||||
promptTokens := usage.PromptTokens
|
||||
cacheTokens := usage.PromptTokensDetails.CachedTokens
|
||||
imageTokens := usage.PromptTokensDetails.ImageTokens
|
||||
audioTokens := usage.PromptTokensDetails.AudioTokens
|
||||
completionTokens := usage.CompletionTokens
|
||||
modelName := relayInfo.OriginModelName
|
||||
|
||||
@@ -367,6 +368,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
||||
dPromptTokens := decimal.NewFromInt(int64(promptTokens))
|
||||
dCacheTokens := decimal.NewFromInt(int64(cacheTokens))
|
||||
dImageTokens := decimal.NewFromInt(int64(imageTokens))
|
||||
dAudioTokens := decimal.NewFromInt(int64(audioTokens))
|
||||
dCompletionTokens := decimal.NewFromInt(int64(completionTokens))
|
||||
dCompletionRatio := decimal.NewFromFloat(completionRatio)
|
||||
dCacheRatio := decimal.NewFromFloat(cacheRatio)
|
||||
@@ -412,23 +414,43 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
||||
dFileSearchQuota = decimal.NewFromFloat(fileSearchPrice).
|
||||
Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
|
||||
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||
extraContent += fmt.Sprintf("File Search 调用 %d 次,调用花费 $%s",
|
||||
extraContent += fmt.Sprintf("File Search 调用 %d 次,调用花费 %s",
|
||||
fileSearchTool.CallCount, dFileSearchQuota.String())
|
||||
}
|
||||
}
|
||||
|
||||
var quotaCalculateDecimal decimal.Decimal
|
||||
if !priceData.UsePrice {
|
||||
nonCachedTokens := dPromptTokens.Sub(dCacheTokens)
|
||||
cachedTokensWithRatio := dCacheTokens.Mul(dCacheRatio)
|
||||
|
||||
promptQuota := nonCachedTokens.Add(cachedTokensWithRatio)
|
||||
if imageTokens > 0 {
|
||||
nonImageTokens := dPromptTokens.Sub(dImageTokens)
|
||||
imageTokensWithRatio := dImageTokens.Mul(dImageRatio)
|
||||
promptQuota = nonImageTokens.Add(imageTokensWithRatio)
|
||||
var audioInputQuota decimal.Decimal
|
||||
var audioInputPrice float64
|
||||
if !priceData.UsePrice {
|
||||
baseTokens := dPromptTokens
|
||||
// 减去 cached tokens
|
||||
var cachedTokensWithRatio decimal.Decimal
|
||||
if !dCacheTokens.IsZero() {
|
||||
baseTokens = baseTokens.Sub(dCacheTokens)
|
||||
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
|
||||
}
|
||||
|
||||
// 减去 image tokens
|
||||
var imageTokensWithRatio decimal.Decimal
|
||||
if !dImageTokens.IsZero() {
|
||||
baseTokens = baseTokens.Sub(dImageTokens)
|
||||
imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
|
||||
}
|
||||
|
||||
// 减去 Gemini audio tokens
|
||||
if !dAudioTokens.IsZero() {
|
||||
audioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(modelName)
|
||||
if audioInputPrice > 0 {
|
||||
// 重新计算 base tokens
|
||||
baseTokens = baseTokens.Sub(dAudioTokens)
|
||||
audioInputQuota = decimal.NewFromFloat(audioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||
extraContent += fmt.Sprintf("Audio Input 花费 %s", audioInputQuota.String())
|
||||
}
|
||||
}
|
||||
promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio)
|
||||
|
||||
completionQuota := dCompletionTokens.Mul(dCompletionRatio)
|
||||
|
||||
quotaCalculateDecimal = promptQuota.Add(completionQuota).Mul(ratio)
|
||||
@@ -442,6 +464,8 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
||||
// 添加 responses tools call 调用的配额
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
|
||||
// 添加 audio input 独立计费
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
|
||||
|
||||
quota := int(quotaCalculateDecimal.Round(0).IntPart())
|
||||
totalTokens := promptTokens + completionTokens
|
||||
@@ -512,6 +536,11 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
||||
other["file_search_price"] = fileSearchPrice
|
||||
}
|
||||
}
|
||||
if !audioInputQuota.IsZero() {
|
||||
other["audio_input_seperate_price"] = true
|
||||
other["audio_input_token_count"] = audioTokens
|
||||
other["audio_input_price"] = audioInputPrice
|
||||
}
|
||||
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel,
|
||||
tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user