refactor: Introduce pre-consume quota and unify relay handlers

This commit introduces a major architectural refactoring to improve quota management, centralize logging, and streamline the relay handling logic.

Key changes:
- **Pre-consume Quota:** Implements a new mechanism to check and reserve user quota *before* making the request to the upstream provider. This ensures more accurate quota deduction and prevents users from exceeding their limits due to concurrent requests.

- **Unified Relay Handlers:** Refactors the relay logic to use generic handlers (e.g., `ChatHandler`, `ImageHandler`) instead of provider-specific implementations. This significantly reduces code duplication and simplifies adding new channels.

- **Centralized Logger:** A new dedicated `logger` package is introduced, and all system logging calls are migrated to use it, moving this responsibility out of the `common` package.

- **Code Reorganization:** DTOs are generalized (e.g., `dalle.go` -> `openai_image.go`) and utility code is moved to more appropriate packages (e.g., `common/http.go` -> `service/http.go`) for better code structure.
This commit is contained in:
CaIon
2025-08-14 20:05:06 +08:00
parent 17bab355e4
commit e2037ad756
113 changed files with 3095 additions and 2518 deletions

View File

@@ -2,8 +2,12 @@ package dto
import (
"encoding/json"
"fmt"
"one-api/common"
"one-api/types"
"strings"
"github.com/gin-gonic/gin"
)
type ResponseFormat struct {
@@ -67,6 +71,116 @@ type GeneralOpenAIRequest struct {
Extra map[string]json.RawMessage `json:"-"`
}
func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
var tokenCountMeta types.TokenCountMeta
var texts = make([]string, 0)
var fileMeta = make([]*types.FileMeta, 0)
if r.Prompt != nil {
switch v := r.Prompt.(type) {
case string:
texts = append(texts, v)
case []any:
for _, item := range v {
if str, ok := item.(string); ok {
texts = append(texts, str)
}
}
default:
texts = append(texts, fmt.Sprintf("%v", r.Prompt))
}
}
if r.Input != nil {
inputs := r.ParseInput()
texts = append(texts, inputs...)
}
if r.MaxCompletionTokens > r.MaxTokens {
tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
} else {
tokenCountMeta.MaxTokens = int(r.MaxTokens)
}
for _, message := range r.Messages {
tokenCountMeta.MessagesCount++
texts = append(texts, message.Role)
if message.Content != nil {
if message.Name != nil {
tokenCountMeta.NameCount++
texts = append(texts, *message.Name)
}
arrayContent := message.ParseContent()
for _, m := range arrayContent {
if m.Type == ContentTypeImageURL {
imageUrl := m.GetImageMedia()
if imageUrl != nil {
meta := &types.FileMeta{
FileType: types.FileTypeImage,
}
meta.Data = imageUrl.Url
meta.Detail = imageUrl.Detail
fileMeta = append(fileMeta, meta)
}
} else if m.Type == ContentTypeInputAudio {
inputAudio := m.GetInputAudio()
if inputAudio != nil {
meta := &types.FileMeta{
FileType: types.FileTypeAudio,
}
meta.Data = inputAudio.Data
fileMeta = append(fileMeta, meta)
}
} else if m.Type == ContentTypeFile {
file := m.GetFile()
if file != nil {
meta := &types.FileMeta{
FileType: types.FileTypeFile,
}
meta.Data = file.FileData
fileMeta = append(fileMeta, meta)
}
} else if m.Type == ContentTypeVideoUrl {
videoUrl := m.GetVideoUrl()
if videoUrl != nil {
meta := &types.FileMeta{
FileType: types.FileTypeVideo,
}
meta.Data = videoUrl.Url
fileMeta = append(fileMeta, meta)
}
} else {
texts = append(texts, m.Text)
}
}
}
}
if r.Tools != nil {
openaiTools := r.Tools
for _, tool := range openaiTools {
tokenCountMeta.ToolsCount++
texts = append(texts, tool.Function.Name)
if tool.Function.Description != "" {
texts = append(texts, tool.Function.Description)
}
if tool.Function.Parameters != nil {
texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
}
}
//toolTokens := CountTokenInput(countStr, request.Model)
//tkm += 8
//tkm += toolTokens
}
tokenCountMeta.CombineText = strings.Join(texts, "\n")
tokenCountMeta.Files = fileMeta
return &tokenCountMeta
}
func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
return r.Stream
}
func (r *GeneralOpenAIRequest) ToMap() map[string]any {
result := make(map[string]any)
data, _ := common.Marshal(r)
@@ -202,10 +316,25 @@ func (m *MediaContent) GetFile() *MessageFile {
return nil
}
func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
if m.VideoUrl != nil {
if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
return m.VideoUrl.(*MessageVideoUrl)
}
if itemMap, ok := m.VideoUrl.(map[string]any); ok {
out := &MessageVideoUrl{
Url: common.Interface2String(itemMap["url"]),
}
return out
}
}
return nil
}
type MessageImageUrl struct {
Url string `json:"url"`
Detail string `json:"detail"`
MimeType string
Url string `json:"url"`
Detail string `json:"detail"`
//MimeType string
}
func (m *MessageImageUrl) IsRemoteImage() bool {
@@ -233,6 +362,7 @@ const (
ContentTypeInputAudio = "input_audio"
ContentTypeFile = "file"
ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
//ContentTypeAudioUrl = "audio_url"
)
func (m *Message) GetPrefix() bool {
@@ -623,7 +753,7 @@ type WebSearchOptions struct {
// https://platform.openai.com/docs/api-reference/responses/create
type OpenAIResponsesRequest struct {
Model string `json:"model"`
Input json.RawMessage `json:"input,omitempty"`
Input any `json:"input,omitempty"`
Include json.RawMessage `json:"include,omitempty"`
Instructions json.RawMessage `json:"instructions,omitempty"`
MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
@@ -645,28 +775,145 @@ type OpenAIResponsesRequest struct {
Prompt json.RawMessage `json:"prompt,omitempty"`
}
func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
var fileMeta = make([]*types.FileMeta, 0)
var texts = make([]string, 0)
if r.Input != nil {
inputs := r.ParseInput()
for _, input := range inputs {
if input.Type == "input_image" {
fileMeta = append(fileMeta, &types.FileMeta{
FileType: types.FileTypeImage,
Data: input.ImageUrl,
Detail: input.Detail,
})
} else if input.Type == "input_file" {
fileMeta = append(fileMeta, &types.FileMeta{
FileType: types.FileTypeFile,
Data: input.FileUrl,
})
} else {
texts = append(texts, input.Text)
}
}
}
if len(r.Instructions) > 0 {
texts = append(texts, string(r.Instructions))
}
if len(r.Metadata) > 0 {
texts = append(texts, string(r.Metadata))
}
if len(r.Text) > 0 {
texts = append(texts, string(r.Text))
}
if len(r.ToolChoice) > 0 {
texts = append(texts, string(r.ToolChoice))
}
if len(r.Prompt) > 0 {
texts = append(texts, string(r.Prompt))
}
if len(r.Tools) > 0 {
toolStr, _ := common.Marshal(r.Tools)
texts = append(texts, string(toolStr))
}
return &types.TokenCountMeta{
CombineText: strings.Join(texts, "\n"),
Files: fileMeta,
MaxTokens: int(r.MaxOutputTokens),
}
}
func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
return r.Stream
}
type Reasoning struct {
Effort string `json:"effort,omitempty"`
Summary string `json:"summary,omitempty"`
}
//type ResponsesToolsCall struct {
// Type string `json:"type"`
// // Web Search
// UserLocation json.RawMessage `json:"user_location,omitempty"`
// SearchContextSize string `json:"search_context_size,omitempty"`
// // File Search
// VectorStoreIds []string `json:"vector_store_ids,omitempty"`
// MaxNumResults uint `json:"max_num_results,omitempty"`
// Filters json.RawMessage `json:"filters,omitempty"`
// // Computer Use
// DisplayWidth uint `json:"display_width,omitempty"`
// DisplayHeight uint `json:"display_height,omitempty"`
// Environment string `json:"environment,omitempty"`
// // Function
// Name string `json:"name,omitempty"`
// Description string `json:"description,omitempty"`
// Parameters json.RawMessage `json:"parameters,omitempty"`
// Function json.RawMessage `json:"function,omitempty"`
// Container json.RawMessage `json:"container,omitempty"`
//}
type MediaInput struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
FileUrl string `json:"file_url,omitempty"`
ImageUrl string `json:"image_url,omitempty"`
Detail string `json:"detail,omitempty"` // 仅 input_image 有效
}
// ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
// Reference implementation mirrors Message.ParseContent:
// - input can be a string, treated as an input_text item
// - input can be an array of objects with a `type` field
// supported types: input_text, input_image, input_file
func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
if r.Input == nil {
return nil
}
var inputs []MediaInput
// Try string first
if str, ok := r.Input.(string); ok {
inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
return inputs
}
// Try array of parts
if array, ok := r.Input.([]any); ok {
for _, itemAny := range array {
// Already parsed MediaInput
if media, ok := itemAny.(MediaInput); ok {
inputs = append(inputs, media)
continue
}
// Generic map
item, ok := itemAny.(map[string]any)
if !ok {
continue
}
typeVal, ok := item["type"].(string)
if !ok {
continue
}
switch typeVal {
case "input_text":
text, _ := item["text"].(string)
inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
case "input_image":
// image_url may be string or object with url field
var imageUrl string
switch v := item["image_url"].(type) {
case string:
imageUrl = v
case map[string]any:
if url, ok := v["url"].(string); ok {
imageUrl = url
}
}
inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
case "input_file":
// file_url may be string or object with url field
var fileUrl string
switch v := item["file_url"].(type) {
case string:
fileUrl = v
case map[string]any:
if url, ok := v["url"].(string); ok {
fileUrl = url
}
}
inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
}
}
}
return inputs
}