diff --git a/dto/openai_response.go b/dto/openai_response.go index c8f61b9d..790d4df8 100644 --- a/dto/openai_response.go +++ b/dto/openai_response.go @@ -195,28 +195,28 @@ type OutputTokenDetails struct { } type OpenAIResponsesResponse struct { - ID string `json:"id"` - Object string `json:"object"` - CreatedAt int `json:"created_at"` - Status string `json:"status"` - Error *OpenAIError `json:"error,omitempty"` - IncompleteDetails *IncompleteDetails `json:"incomplete_details,omitempty"` - Instructions string `json:"instructions"` - MaxOutputTokens int `json:"max_output_tokens"` - Model string `json:"model"` - Output []ResponsesOutput `json:"output"` - ParallelToolCalls bool `json:"parallel_tool_calls"` - PreviousResponseID string `json:"previous_response_id"` - Reasoning *Reasoning `json:"reasoning"` - Store bool `json:"store"` - Temperature float64 `json:"temperature"` - ToolChoice string `json:"tool_choice"` - Tools []interface{} `json:"tools"` - TopP float64 `json:"top_p"` - Truncation string `json:"truncation"` - Usage *Usage `json:"usage"` - User json.RawMessage `json:"user"` - Metadata json.RawMessage `json:"metadata"` + ID string `json:"id"` + Object string `json:"object"` + CreatedAt int `json:"created_at"` + Status string `json:"status"` + Error *OpenAIError `json:"error,omitempty"` + IncompleteDetails *IncompleteDetails `json:"incomplete_details,omitempty"` + Instructions string `json:"instructions"` + MaxOutputTokens int `json:"max_output_tokens"` + Model string `json:"model"` + Output []ResponsesOutput `json:"output"` + ParallelToolCalls bool `json:"parallel_tool_calls"` + PreviousResponseID string `json:"previous_response_id"` + Reasoning *Reasoning `json:"reasoning"` + Store bool `json:"store"` + Temperature float64 `json:"temperature"` + ToolChoice string `json:"tool_choice"` + Tools []ResponsesToolsCall `json:"tools"` + TopP float64 `json:"top_p"` + Truncation string `json:"truncation"` + Usage *Usage `json:"usage"` + User json.RawMessage `json:"user"` + Metadata json.RawMessage `json:"metadata"` } type IncompleteDetails struct { diff --git a/relay/channel/openai/relay_responses.go b/relay/channel/openai/relay_responses.go index 6af8c676..1d1e060e 100644 --- a/relay/channel/openai/relay_responses.go +++ b/relay/channel/openai/relay_responses.go @@ -3,7 +3,6 @@ package openai import ( "bytes" "fmt" - "github.com/gin-gonic/gin" "io" "net/http" "one-api/common" @@ -12,6 +11,8 @@ import ( "one-api/relay/helper" "one-api/service" "strings" + + "github.com/gin-gonic/gin" ) func OaiResponsesHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) { @@ -61,6 +62,10 @@ func OaiResponsesHandler(c *gin.Context, resp *http.Response, info *relaycommon. usage.PromptTokens = responsesResponse.Usage.InputTokens usage.CompletionTokens = responsesResponse.Usage.OutputTokens usage.TotalTokens = responsesResponse.Usage.TotalTokens + // 解析 Tools 用量 + for _, tool := range responsesResponse.Tools { + info.ResponsesUsageInfo.BuiltInTools[tool.Type].CallCount++ + } return nil, &usage } diff --git a/relay/relay-text.go b/relay/relay-text.go index 4fdd435d..a528ec52 100644 --- a/relay/relay-text.go +++ b/relay/relay-text.go @@ -358,6 +358,67 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, ratio := dModelRatio.Mul(dGroupRatio) + // openai web search 工具计费 + var dWebSearchQuota decimal.Decimal + if relayInfo.ResponsesUsageInfo != nil { + if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 { + // 确定模型类型 + // https://platform.openai.com/docs/pricing Web search 价格按模型类型和 search context size 收费 + // gpt-4.1, gpt-4o, or gpt-4o-search-preview 更贵,gpt-4.1-mini, gpt-4o-mini, gpt-4o-mini-search-preview 更便宜 + isHighTierModel := (strings.HasPrefix(modelName, "gpt-4.1") || strings.HasPrefix(modelName, "gpt-4o")) && + !strings.Contains(modelName, "mini") + + // 确定 search context size 对应的价格 + var priceWebSearchPerThousandCalls float64 + switch webSearchTool.SearchContextSize { + case "low": + if isHighTierModel { + priceWebSearchPerThousandCalls = 30.0 + } else { + priceWebSearchPerThousandCalls = 25.0 + } + case "medium": + if isHighTierModel { + priceWebSearchPerThousandCalls = 35.0 + } else { + priceWebSearchPerThousandCalls = 27.5 + } + case "high": + if isHighTierModel { + priceWebSearchPerThousandCalls = 50.0 + } else { + priceWebSearchPerThousandCalls = 30.0 + } + default: + // search context size 默认为 medium + if isHighTierModel { + priceWebSearchPerThousandCalls = 35.0 + } else { + priceWebSearchPerThousandCalls = 27.5 + } + } + // 计算 web search 调用的配额 (配额 = 价格 * 调用次数 / 1000) + dWebSearchQuota = decimal.NewFromFloat(priceWebSearchPerThousandCalls). + Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))). + Div(decimal.NewFromInt(1000)) + extraContent += fmt.Sprintf("Web Search 调用 %d 次,上下文大小 %s,调用花费 $%s", + webSearchTool.CallCount, webSearchTool.SearchContextSize, dWebSearchQuota.String()) + } + } + // file search tool 计费 + var dFileSearchQuota decimal.Decimal + if relayInfo.ResponsesUsageInfo != nil { + if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 { + // file search tool 调用价格 $2.50/1k calls + // 计算 file search tool 调用的配额 (配额 = 价格 * 调用次数 / 1000) + dFileSearchQuota = decimal.NewFromFloat(2.5). + Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))). + Div(decimal.NewFromInt(1000)) + extraContent += fmt.Sprintf("File Search 调用 %d 次,调用花费 $%s", + fileSearchTool.CallCount, dFileSearchQuota.String()) + } + } + var quotaCalculateDecimal decimal.Decimal if !priceData.UsePrice { nonCachedTokens := dPromptTokens.Sub(dCacheTokens) @@ -380,6 +441,9 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, } else { quotaCalculateDecimal = dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio) } + // 添加 responses tools call 调用的配额 + quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota) + quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota) quota := int(quotaCalculateDecimal.Round(0).IntPart()) totalTokens := promptTokens + completionTokens @@ -430,6 +494,19 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, other["image_ratio"] = imageRatio other["image_output"] = imageTokens } + if !dWebSearchQuota.IsZero() && relayInfo.ResponsesUsageInfo != nil { + if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists { + other["web_search"] = true + other["web_search_call_count"] = webSearchTool.CallCount + other["web_search_context_size"] = webSearchTool.SearchContextSize + } + } + if !dFileSearchQuota.IsZero() && relayInfo.ResponsesUsageInfo != nil { + if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists { + other["file_search"] = true + other["file_search_call_count"] = fileSearchTool.CallCount + } + } model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel, tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other) }