feat: refactor token estimation logic
- Introduced new OpenAI text models in `common/model.go`. - Added `IsOpenAITextModel` function to check for OpenAI text models. - Refactored token estimation methods across various channels to use estimated prompt tokens instead of direct prompt token counts. - Updated related functions and structures to accommodate the new token estimation approach, enhancing overall token management.
This commit is contained in:
@@ -5,7 +5,6 @@ import (
|
||||
"net/http"
|
||||
|
||||
"github.com/QuantumNous/new-api/common"
|
||||
"github.com/QuantumNous/new-api/constant"
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
"github.com/QuantumNous/new-api/logger"
|
||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||
@@ -70,12 +69,7 @@ func NativeGeminiEmbeddingHandler(c *gin.Context, resp *http.Response, info *rel
|
||||
println(string(responseBody))
|
||||
}
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: info.PromptTokens,
|
||||
TotalTokens: info.PromptTokens,
|
||||
}
|
||||
|
||||
common.SetContextKey(c, constant.ContextKeyLocalCountTokens, true)
|
||||
usage := service.ResponseText2Usage(c, "", info.UpstreamModelName, info.GetEstimatePromptTokens())
|
||||
|
||||
if info.IsGeminiBatchEmbedding {
|
||||
var geminiResponse dto.GeminiBatchEmbeddingResponse
|
||||
|
||||
Reference in New Issue
Block a user