Merge pull request #1509 from QuantumNous/responses-input-cache-token

fix: responses cache token 未计费
This commit is contained in:
Calcium-Ion
2025-08-06 11:22:14 +08:00
committed by GitHub
3 changed files with 24 additions and 9 deletions

View File

@@ -37,9 +37,14 @@ func OaiResponsesHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
// compute usage
usage := dto.Usage{}
usage.PromptTokens = responsesResponse.Usage.InputTokens
usage.CompletionTokens = responsesResponse.Usage.OutputTokens
usage.TotalTokens = responsesResponse.Usage.TotalTokens
if responsesResponse.Usage != nil {
usage.PromptTokens = responsesResponse.Usage.InputTokens
usage.CompletionTokens = responsesResponse.Usage.OutputTokens
usage.TotalTokens = responsesResponse.Usage.TotalTokens
if responsesResponse.Usage.InputTokensDetails != nil {
usage.PromptTokensDetails.CachedTokens = responsesResponse.Usage.InputTokensDetails.CachedTokens
}
}
// 解析 Tools 用量
for _, tool := range responsesResponse.Tools {
info.ResponsesUsageInfo.BuiltInTools[common.Interface2String(tool["type"])].CallCount++
@@ -64,9 +69,14 @@ func OaiResponsesStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp
sendResponsesStreamData(c, streamResponse, data)
switch streamResponse.Type {
case "response.completed":
usage.PromptTokens = streamResponse.Response.Usage.InputTokens
usage.CompletionTokens = streamResponse.Response.Usage.OutputTokens
usage.TotalTokens = streamResponse.Response.Usage.TotalTokens
if streamResponse.Response.Usage != nil {
usage.PromptTokens = streamResponse.Response.Usage.InputTokens
usage.CompletionTokens = streamResponse.Response.Usage.OutputTokens
usage.TotalTokens = streamResponse.Response.Usage.TotalTokens
if streamResponse.Response.Usage.InputTokensDetails != nil {
usage.PromptTokensDetails.CachedTokens = streamResponse.Response.Usage.InputTokensDetails.CachedTokens
}
}
case "response.output_text.delta":
// 处理输出文本
responseTextBuilder.WriteString(streamResponse.Delta)

View File

@@ -1156,6 +1156,7 @@ export function renderLogContent(
modelPrice = -1,
groupRatio,
user_group_ratio,
cacheRatio = 1.0,
image = false,
imageRatio = 1.0,
webSearch = false,
@@ -1174,9 +1175,10 @@ export function renderLogContent(
} else {
if (image) {
return i18next.t(
'模型倍率 {{modelRatio}},输出倍率 {{completionRatio}},图片输入倍率 {{imageRatio}}{{ratioType}} {{ratio}}',
'模型倍率 {{modelRatio}}缓存倍率 {{cacheRatio}}输出倍率 {{completionRatio}},图片输入倍率 {{imageRatio}}{{ratioType}} {{ratio}}',
{
modelRatio: modelRatio,
cacheRatio: cacheRatio,
completionRatio: completionRatio,
imageRatio: imageRatio,
ratioType: ratioLabel,
@@ -1185,9 +1187,10 @@ export function renderLogContent(
);
} else if (webSearch) {
return i18next.t(
'模型倍率 {{modelRatio}},输出倍率 {{completionRatio}}{{ratioType}} {{ratio}}Web 搜索调用 {{webSearchCallCount}} 次',
'模型倍率 {{modelRatio}}缓存倍率 {{cacheRatio}}输出倍率 {{completionRatio}}{{ratioType}} {{ratio}}Web 搜索调用 {{webSearchCallCount}} 次',
{
modelRatio: modelRatio,
cacheRatio: cacheRatio,
completionRatio: completionRatio,
ratioType: ratioLabel,
ratio,
@@ -1196,9 +1199,10 @@ export function renderLogContent(
);
} else {
return i18next.t(
'模型倍率 {{modelRatio}},输出倍率 {{completionRatio}}{{ratioType}} {{ratio}}',
'模型倍率 {{modelRatio}}缓存倍率 {{cacheRatio}}输出倍率 {{completionRatio}}{{ratioType}} {{ratio}}',
{
modelRatio: modelRatio,
cacheRatio: cacheRatio,
completionRatio: completionRatio,
ratioType: ratioLabel,
ratio,

View File

@@ -366,6 +366,7 @@ export const useLogsData = () => {
other.model_price,
other.group_ratio,
other?.user_group_ratio,
other.cache_ratio || 1.0,
false,
1.0,
other.web_search || false,