diff --git a/dto/openai_request.go b/dto/openai_request.go index 652d8cce..28903ed7 100644 --- a/dto/openai_request.go +++ b/dto/openai_request.go @@ -114,6 +114,7 @@ type MediaContent struct { ImageUrl any `json:"image_url,omitempty"` InputAudio any `json:"input_audio,omitempty"` File any `json:"file,omitempty"` + VideoUrl any `json:"video_url,omitempty"` } func (m *MediaContent) GetImageMedia() *MessageImageUrl { @@ -158,11 +159,16 @@ type MessageFile struct { FileId string `json:"file_id,omitempty"` } +type MessageVideoUrl struct { + Url string `json:"url"` +} + const ( ContentTypeText = "text" ContentTypeImageURL = "image_url" ContentTypeInputAudio = "input_audio" ContentTypeFile = "file" + ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别 ) func (m *Message) GetPrefix() bool { @@ -346,6 +352,15 @@ func (m *Message) ParseContent() []MediaContent { } } } + case ContentTypeVideoUrl: + if videoUrl, ok := contentItem["video_url"].(string); ok { + contentList = append(contentList, MediaContent{ + Type: ContentTypeVideoUrl, + VideoUrl: &MessageVideoUrl{ + Url: videoUrl, + }, + }) + } } } } diff --git a/main.go b/main.go index 4bdc97bd..95c6820d 100644 --- a/main.go +++ b/main.go @@ -80,6 +80,8 @@ func main() { // Initialize options model.InitOptionMap() + service.InitTokenEncoders() + if common.RedisEnabled { // for compatibility with old versions common.MemoryCacheEnabled = true @@ -133,8 +135,6 @@ func main() { common.SysLog("pprof enabled") } - service.InitTokenEncoders() - // Initialize HTTP server server := gin.New() server.Use(gin.CustomRecovery(func(c *gin.Context, err any) { diff --git a/service/token_counter.go b/service/token_counter.go index f3c3b6b0..21b882af 100644 --- a/service/token_counter.go +++ b/service/token_counter.go @@ -400,6 +400,8 @@ func CountTokenMessages(info *relaycommon.RelayInfo, messages []dto.Message, mod tokenNum += 100 } else if m.Type == dto.ContentTypeFile { tokenNum += 5000 + } else if m.Type == dto.ContentTypeVideoUrl { + tokenNum += 5000 } else { tokenNum += getTokenNum(tokenEncoder, m.Text) }