🔧 fix(token_counter): refactor token encoder initialization and retrieval logic

This commit is contained in:
CaIon
2025-06-10 18:51:26 +08:00
parent d1f493bf17
commit 6c4f64c397

View File

@@ -5,6 +5,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"github.com/tiktoken-go/tokenizer" "github.com/tiktoken-go/tokenizer"
"github.com/tiktoken-go/tokenizer/codec"
"image" "image"
"log" "log"
"math" "math"
@@ -21,20 +22,16 @@ var defaultTokenEncoder tokenizer.Codec
func InitTokenEncoders() { func InitTokenEncoders() {
common.SysLog("initializing token encoders") common.SysLog("initializing token encoders")
cl100TokenEncoder, err := tokenizer.Get(tokenizer.Cl100kBase) defaultTokenEncoder = codec.NewCl100kBase()
if err != nil {
common.FatalLog(fmt.Sprintf("failed to get gpt-3.5-turbo token encoder: %s", err.Error()))
}
defaultTokenEncoder = cl100TokenEncoder
common.SysLog("token encoders initialized") common.SysLog("token encoders initialized")
} }
func getTokenEncoder(model string) tokenizer.Codec { func getTokenEncoder(model string) tokenizer.Codec {
codec, err := tokenizer.ForModel(tokenizer.Model(model)) modelCodec, err := tokenizer.ForModel(tokenizer.Model(model))
if err != nil { if err != nil {
return defaultTokenEncoder return defaultTokenEncoder
} }
return codec return modelCodec
} }
func getTokenNum(tokenEncoder tokenizer.Codec, text string) int { func getTokenNum(tokenEncoder tokenizer.Codec, text string) int {